// Copied from https://github.com/rust-lang/cargo/blob/367fd9f213750cd40317803dd0a5a3ce3f0c676d/src/cargo/util/frontmatter.rs #![expect(dead_code)] // avoid editing #![expect(unreachable_pub)] // avoid editing #![expect(clippy::useless_format)] // avoid editing type Span = std::ops::Range; #[derive(Debug)] pub struct ScriptSource<'s> { /// The full file raw: &'s str, /// The `#!/usr/bin/env cargo` line, if present shebang: Option, /// The code fence opener (`---`) open: Option, /// Trailing text after `ScriptSource::open` that identifies the meaning of /// `ScriptSource::frontmatter` info: Option, /// The lines between `ScriptSource::open` and `ScriptSource::close` frontmatter: Option, /// The code fence closer (`---`) close: Option, /// All content after the frontmatter and shebang content: Span, } impl<'s> ScriptSource<'s> { pub fn parse(raw: &'s str) -> Result { use winnow::stream::FindSlice as _; use winnow::stream::Location as _; use winnow::stream::Offset as _; use winnow::stream::Stream as _; let content_end = raw.len(); let mut source = Self { raw, shebang: None, open: None, info: None, frontmatter: None, close: None, content: 0..content_end, }; let mut input = winnow::stream::LocatingSlice::new(raw); if let Some(shebang_end) = strip_shebang(input.as_ref()) { let shebang_start = input.current_token_start(); let _ = input.next_slice(shebang_end); let shebang_end = input.current_token_start(); source.shebang = Some(shebang_start..shebang_end); source.content = shebang_end..content_end; } // Whitespace may precede a frontmatter but must end with a newline if let Some(nl_end) = strip_ws_lines(input.as_ref()) { let _ = input.next_slice(nl_end); } // Opens with a line that starts with 3 or more `-` followed by an optional identifier const FENCE_CHAR: char = '-'; let fence_length = input .as_ref() .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) .unwrap_or_else(|| input.eof_offset()); let open_start = input.current_token_start(); let fence_pattern = input.next_slice(fence_length); let open_end = input.current_token_start(); match fence_length { 0 => { return Ok(source); } 1 | 2 => { // either not a frontmatter or invalid frontmatter opening return Err(FrontmatterError::new( format!( "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3" ), raw.len()..raw.len(), ).push_visible_span(open_start..open_end)); } _ => {} } source.open = Some(open_start..open_end); let Some(info_nl) = input.find_slice("\n") else { return Err(FrontmatterError::new( format!("unclosed frontmatter; expected `{fence_pattern}`"), raw.len()..raw.len(), ) .push_visible_span(open_start..open_end)); }; let info = input.next_slice(info_nl.start); let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n` let info = info.trim_matches(is_horizontal_whitespace); if !info.is_empty() { let info_start = info.offset_from(&raw); let info_end = info_start + info.len(); source.info = Some(info_start..info_end); } // Ends with a line that starts with a matching number of `-` only followed by whitespace let nl_fence_pattern = format!("\n{fence_pattern}"); let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else { for len in (2..(nl_fence_pattern.len() - 1)).rev() { let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else { continue; }; let _ = input.next_slice(frontmatter_nl.start + 1); let close_start = input.current_token_start(); let _ = input.next_slice(len); let close_end = input.current_token_start(); let fewer_dashes = fence_length - len; return Err(FrontmatterError::new( format!( "closing code fence has {fewer_dashes} less `-` than the opening fence" ), close_start..close_end, ) .push_visible_span(open_start..open_end)); } return Err(FrontmatterError::new( format!("unclosed frontmatter; expected `{fence_pattern}`"), raw.len()..raw.len(), ) .push_visible_span(open_start..open_end)); }; let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring let _ = input.next_slice(frontmatter_nl.start + 1); let frontmatter_end = input.current_token_start(); source.frontmatter = Some(frontmatter_start..frontmatter_end); let close_start = input.current_token_start(); let _ = input.next_slice(fence_length); let close_end = input.current_token_start(); source.close = Some(close_start..close_end); let nl = input.find_slice("\n"); let after_closing_fence = input.next_slice(nl.map(|span| span.end).unwrap_or_else(|| input.eof_offset())); let content_start = input.current_token_start(); let extra_dashes = after_closing_fence.chars().take_while(|b| *b == FENCE_CHAR).count(); if 0 < extra_dashes { let extra_start = close_end; let extra_end = extra_start + extra_dashes; return Err(FrontmatterError::new( format!("closing code fence has {extra_dashes} more `-` than the opening fence"), extra_start..extra_end, ) .push_visible_span(open_start..open_end)); } else { let after_closing_fence = strip_newline(after_closing_fence); let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace); if !after_closing_fence.is_empty() { // extra characters beyond the original fence pattern let after_start = after_closing_fence.offset_from(&raw); let after_end = after_start + after_closing_fence.len(); return Err(FrontmatterError::new( format!("unexpected characters after frontmatter close"), after_start..after_end, ) .push_visible_span(open_start..open_end)); } } source.content = content_start..content_end; if let Some(nl_end) = strip_ws_lines(input.as_ref()) { let _ = input.next_slice(nl_end); } let fence_length = input .as_ref() .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) .unwrap_or_else(|| input.eof_offset()); if 0 < fence_length { let fence_start = input.current_token_start(); let fence_end = fence_start + fence_length; return Err(FrontmatterError::new( format!("only one frontmatter is supported"), fence_start..fence_end, ) .push_visible_span(open_start..open_end) .push_visible_span(close_start..close_end)); } Ok(source) } pub fn shebang(&self) -> Option<&'s str> { self.shebang.clone().map(|span| &self.raw[span]) } pub fn shebang_span(&self) -> Option { self.shebang.clone() } pub fn open_span(&self) -> Option { self.open.clone() } pub fn info(&self) -> Option<&'s str> { self.info.clone().map(|span| &self.raw[span]) } pub fn info_span(&self) -> Option { self.info.clone() } pub fn frontmatter(&self) -> Option<&'s str> { self.frontmatter.clone().map(|span| &self.raw[span]) } pub fn frontmatter_span(&self) -> Option { self.frontmatter.clone() } pub fn close_span(&self) -> Option { self.close.clone() } pub fn content(&self) -> &'s str { &self.raw[self.content.clone()] } pub fn content_span(&self) -> Span { self.content.clone() } } /// Returns the index after the shebang line, if present pub fn strip_shebang(input: &str) -> Option { // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang` // Shebang must start with `#!` literally, without any preceding whitespace. // For simplicity we consider any line starting with `#!` a shebang, // regardless of restrictions put on shebangs by specific platforms. if let Some(rest) = input.strip_prefix("#!") { // Ok, this is a shebang but if the next non-whitespace token is `[`, // then it may be valid Rust code, so consider it Rust code. // // NOTE: rustc considers line and block comments to be whitespace but to avoid // any more awareness of Rust grammar, we are excluding it. if !rest.trim_start().starts_with('[') { // No other choice than to consider this a shebang. let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len()); return Some(newline_end); } } None } /// Returns the index after any lines with only whitespace, if present pub fn strip_ws_lines(input: &str) -> Option { let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len()); if ws_end == 0 { return None; } let nl_start = input[0..ws_end].rfind('\n')?; let nl_end = nl_start + 1; Some(nl_end) } /// True if `c` is considered a whitespace according to Rust language definition. /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html) /// for definitions of these classes. fn is_whitespace(c: char) -> bool { // This is Pattern_White_Space. // // Note that this set is stable (ie, it doesn't change with different // Unicode versions), so it's ok to just hard-code the values. matches!( c, // End-of-line characters | '\u{000A}' // line feed (\n) | '\u{000B}' // vertical tab | '\u{000C}' // form feed | '\u{000D}' // carriage return (\r) | '\u{0085}' // next line (from latin1) | '\u{2028}' // LINE SEPARATOR | '\u{2029}' // PARAGRAPH SEPARATOR // `Default_Ignorable_Code_Point` characters | '\u{200E}' // LEFT-TO-RIGHT MARK | '\u{200F}' // RIGHT-TO-LEFT MARK // Horizontal space characters | '\u{0009}' // tab (\t) | '\u{0020}' // space ) } /// True if `c` is considered horizontal whitespace according to Rust language definition. fn is_horizontal_whitespace(c: char) -> bool { // This is Pattern_White_Space. // // Note that this set is stable (ie, it doesn't change with different // Unicode versions), so it's ok to just hard-code the values. matches!( c, // Horizontal space characters '\u{0009}' // tab (\t) | '\u{0020}' // space ) } fn strip_newline(text: &str) -> &str { text.strip_suffix("\r\n").or_else(|| text.strip_suffix('\n')).unwrap_or(text) } #[derive(Debug)] pub struct FrontmatterError { message: String, primary_span: Span, visible_spans: Vec, } impl FrontmatterError { pub fn new(message: impl Into, span: Span) -> Self { Self { message: message.into(), primary_span: span, visible_spans: Vec::new() } } pub fn push_visible_span(mut self, span: Span) -> Self { self.visible_spans.push(span); self } pub fn message(&self) -> &str { self.message.as_str() } pub fn primary_span(&self) -> Span { self.primary_span.clone() } pub fn visible_spans(&self) -> &[Span] { &self.visible_spans } } impl std::fmt::Display for FrontmatterError { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.message.fmt(fmt) } } impl std::error::Error for FrontmatterError {}