Unnamed repository; edit this file 'description' to name the repository.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// Copied from https://github.com/rust-lang/cargo/blob/367fd9f213750cd40317803dd0a5a3ce3f0c676d/src/cargo/util/frontmatter.rs
#![expect(dead_code)] // avoid editing
#![expect(unreachable_pub)] // avoid editing
#![expect(clippy::useless_format)] // avoid editing

type Span = std::ops::Range<usize>;

#[derive(Debug)]
pub struct ScriptSource<'s> {
    /// The full file
    raw: &'s str,
    /// The `#!/usr/bin/env cargo` line, if present
    shebang: Option<Span>,
    /// The code fence opener (`---`)
    open: Option<Span>,
    /// Trailing text after `ScriptSource::open` that identifies the meaning of
    /// `ScriptSource::frontmatter`
    info: Option<Span>,
    /// The lines between `ScriptSource::open` and `ScriptSource::close`
    frontmatter: Option<Span>,
    /// The code fence closer (`---`)
    close: Option<Span>,
    /// All content after the frontmatter and shebang
    content: Span,
}

impl<'s> ScriptSource<'s> {
    pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
        use winnow::stream::FindSlice as _;
        use winnow::stream::Location as _;
        use winnow::stream::Offset as _;
        use winnow::stream::Stream as _;

        let content_end = raw.len();
        let mut source = Self {
            raw,
            shebang: None,
            open: None,
            info: None,
            frontmatter: None,
            close: None,
            content: 0..content_end,
        };

        let mut input = winnow::stream::LocatingSlice::new(raw);

        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
            let shebang_start = input.current_token_start();
            let _ = input.next_slice(shebang_end);
            let shebang_end = input.current_token_start();
            source.shebang = Some(shebang_start..shebang_end);
            source.content = shebang_end..content_end;
        }

        // Whitespace may precede a frontmatter but must end with a newline
        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
            let _ = input.next_slice(nl_end);
        }

        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
        const FENCE_CHAR: char = '-';
        let fence_length = input
            .as_ref()
            .char_indices()
            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
            .unwrap_or_else(|| input.eof_offset());
        let open_start = input.current_token_start();
        let fence_pattern = input.next_slice(fence_length);
        let open_end = input.current_token_start();
        match fence_length {
            0 => {
                return Ok(source);
            }
            1 | 2 => {
                // either not a frontmatter or invalid frontmatter opening
                return Err(FrontmatterError::new(
                    format!(
                        "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
                    ),
                    raw.len()..raw.len(),
                ).push_visible_span(open_start..open_end));
            }
            _ => {}
        }
        source.open = Some(open_start..open_end);
        let Some(info_nl) = input.find_slice("\n") else {
            return Err(FrontmatterError::new(
                format!("unclosed frontmatter; expected `{fence_pattern}`"),
                raw.len()..raw.len(),
            )
            .push_visible_span(open_start..open_end));
        };
        let info = input.next_slice(info_nl.start);
        let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
        let info = info.trim_matches(is_horizontal_whitespace);
        if !info.is_empty() {
            let info_start = info.offset_from(&raw);
            let info_end = info_start + info.len();
            source.info = Some(info_start..info_end);
        }

        // Ends with a line that starts with a matching number of `-` only followed by whitespace
        let nl_fence_pattern = format!("\n{fence_pattern}");
        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
            for len in (2..(nl_fence_pattern.len() - 1)).rev() {
                let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
                    continue;
                };
                let _ = input.next_slice(frontmatter_nl.start + 1);
                let close_start = input.current_token_start();
                let _ = input.next_slice(len);
                let close_end = input.current_token_start();
                let fewer_dashes = fence_length - len;
                return Err(FrontmatterError::new(
                    format!(
                        "closing code fence has {fewer_dashes} less `-` than the opening fence"
                    ),
                    close_start..close_end,
                )
                .push_visible_span(open_start..open_end));
            }
            return Err(FrontmatterError::new(
                format!("unclosed frontmatter; expected `{fence_pattern}`"),
                raw.len()..raw.len(),
            )
            .push_visible_span(open_start..open_end));
        };
        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
        let _ = input.next_slice(frontmatter_nl.start + 1);
        let frontmatter_end = input.current_token_start();
        source.frontmatter = Some(frontmatter_start..frontmatter_end);
        let close_start = input.current_token_start();
        let _ = input.next_slice(fence_length);
        let close_end = input.current_token_start();
        source.close = Some(close_start..close_end);

        let nl = input.find_slice("\n");
        let after_closing_fence =
            input.next_slice(nl.map(|span| span.end).unwrap_or_else(|| input.eof_offset()));
        let content_start = input.current_token_start();
        let extra_dashes = after_closing_fence.chars().take_while(|b| *b == FENCE_CHAR).count();
        if 0 < extra_dashes {
            let extra_start = close_end;
            let extra_end = extra_start + extra_dashes;
            return Err(FrontmatterError::new(
                format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
                extra_start..extra_end,
            )
            .push_visible_span(open_start..open_end));
        } else {
            let after_closing_fence = strip_newline(after_closing_fence);
            let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
            if !after_closing_fence.is_empty() {
                // extra characters beyond the original fence pattern
                let after_start = after_closing_fence.offset_from(&raw);
                let after_end = after_start + after_closing_fence.len();
                return Err(FrontmatterError::new(
                    format!("unexpected characters after frontmatter close"),
                    after_start..after_end,
                )
                .push_visible_span(open_start..open_end));
            }
        }

        source.content = content_start..content_end;

        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
            let _ = input.next_slice(nl_end);
        }
        let fence_length = input
            .as_ref()
            .char_indices()
            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
            .unwrap_or_else(|| input.eof_offset());
        if 0 < fence_length {
            let fence_start = input.current_token_start();
            let fence_end = fence_start + fence_length;
            return Err(FrontmatterError::new(
                format!("only one frontmatter is supported"),
                fence_start..fence_end,
            )
            .push_visible_span(open_start..open_end)
            .push_visible_span(close_start..close_end));
        }

        Ok(source)
    }

    pub fn shebang(&self) -> Option<&'s str> {
        self.shebang.clone().map(|span| &self.raw[span])
    }

    pub fn shebang_span(&self) -> Option<Span> {
        self.shebang.clone()
    }

    pub fn open_span(&self) -> Option<Span> {
        self.open.clone()
    }

    pub fn info(&self) -> Option<&'s str> {
        self.info.clone().map(|span| &self.raw[span])
    }

    pub fn info_span(&self) -> Option<Span> {
        self.info.clone()
    }

    pub fn frontmatter(&self) -> Option<&'s str> {
        self.frontmatter.clone().map(|span| &self.raw[span])
    }

    pub fn frontmatter_span(&self) -> Option<Span> {
        self.frontmatter.clone()
    }

    pub fn close_span(&self) -> Option<Span> {
        self.close.clone()
    }

    pub fn content(&self) -> &'s str {
        &self.raw[self.content.clone()]
    }

    pub fn content_span(&self) -> Span {
        self.content.clone()
    }
}

/// Returns the index after the shebang line, if present
pub fn strip_shebang(input: &str) -> Option<usize> {
    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
    // Shebang must start with `#!` literally, without any preceding whitespace.
    // For simplicity we consider any line starting with `#!` a shebang,
    // regardless of restrictions put on shebangs by specific platforms.
    if let Some(rest) = input.strip_prefix("#!") {
        // Ok, this is a shebang but if the next non-whitespace token is `[`,
        // then it may be valid Rust code, so consider it Rust code.
        //
        // NOTE: rustc considers line and block comments to be whitespace but to avoid
        // any more awareness of Rust grammar, we are excluding it.
        if !rest.trim_start().starts_with('[') {
            // No other choice than to consider this a shebang.
            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
            return Some(newline_end);
        }
    }
    None
}

/// Returns the index after any lines with only whitespace, if present
pub fn strip_ws_lines(input: &str) -> Option<usize> {
    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
    if ws_end == 0 {
        return None;
    }

    let nl_start = input[0..ws_end].rfind('\n')?;
    let nl_end = nl_start + 1;
    Some(nl_end)
}

/// True if `c` is considered a whitespace according to Rust language definition.
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
/// for definitions of these classes.
fn is_whitespace(c: char) -> bool {
    // This is Pattern_White_Space.
    //
    // Note that this set is stable (ie, it doesn't change with different
    // Unicode versions), so it's ok to just hard-code the values.

    matches!(
        c,
        // End-of-line characters
        | '\u{000A}' // line feed (\n)
        | '\u{000B}' // vertical tab
        | '\u{000C}' // form feed
        | '\u{000D}' // carriage return (\r)
        | '\u{0085}' // next line (from latin1)
        | '\u{2028}' // LINE SEPARATOR
        | '\u{2029}' // PARAGRAPH SEPARATOR

        // `Default_Ignorable_Code_Point` characters
        | '\u{200E}' // LEFT-TO-RIGHT MARK
        | '\u{200F}' // RIGHT-TO-LEFT MARK

        // Horizontal space characters
        | '\u{0009}'   // tab (\t)
        | '\u{0020}' // space
    )
}

/// True if `c` is considered horizontal whitespace according to Rust language definition.
fn is_horizontal_whitespace(c: char) -> bool {
    // This is Pattern_White_Space.
    //
    // Note that this set is stable (ie, it doesn't change with different
    // Unicode versions), so it's ok to just hard-code the values.

    matches!(
        c,
        // Horizontal space characters
        '\u{0009}'   // tab (\t)
        | '\u{0020}' // space
    )
}

fn strip_newline(text: &str) -> &str {
    text.strip_suffix("\r\n").or_else(|| text.strip_suffix('\n')).unwrap_or(text)
}

#[derive(Debug)]
pub struct FrontmatterError {
    message: String,
    primary_span: Span,
    visible_spans: Vec<Span>,
}

impl FrontmatterError {
    pub fn new(message: impl Into<String>, span: Span) -> Self {
        Self { message: message.into(), primary_span: span, visible_spans: Vec::new() }
    }

    pub fn push_visible_span(mut self, span: Span) -> Self {
        self.visible_spans.push(span);
        self
    }

    pub fn message(&self) -> &str {
        self.message.as_str()
    }

    pub fn primary_span(&self) -> Span {
        self.primary_span.clone()
    }

    pub fn visible_spans(&self) -> &[Span] {
        &self.visible_spans
    }
}

impl std::fmt::Display for FrontmatterError {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.message.fmt(fmt)
    }
}

impl std::error::Error for FrontmatterError {}