Unnamed repository; edit this file 'description' to name the repository.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
use std::{mem, num::NonZeroU32};

use crate::{
    SyntaxKind::{self, *},
    output::Output,
};

/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
///
/// Kept to 8 bytes: error messages live in a side table on the `Parser`
/// (the `errors` vec) and `Event::Error` only stores an index into it.
/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
/// (the offset is always ≥ 1 because the forward parent sits later in the
/// event stream).
#[derive(Debug, PartialEq, Clone, Copy)]
pub(crate) enum Event {
    /// This event signifies the start of the node.
    /// It should be either abandoned (in which case the
    /// `kind` is `TOMBSTONE`, and the event is ignored),
    /// or completed via a `Finish` event.
    ///
    /// All tokens between a `Start` and a `Finish` would
    /// become the children of the respective node.
    ///
    /// For left-recursive syntactic constructs, the parser produces
    /// a child node before it sees a parent. `forward_parent`
    /// saves the position of current event's parent.
    ///
    /// Consider this path
    ///
    /// foo::bar
    ///
    /// The events for it would look like this:
    ///
    /// ```text
    /// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH
    ///       |                          /\
    ///       |                          |
    ///       +------forward-parent------+
    /// ```
    ///
    /// And the tree would look like this
    ///
    /// ```text
    ///    +--PATH---------+
    ///    |   |           |
    ///    |   |           |
    ///    |  '::'       'bar'
    ///    |
    ///   PATH
    ///    |
    ///   'foo'
    /// ```
    ///
    /// See also `CompletedMarker::precede`.
    Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },

    /// Complete the previous `Start` event
    Finish,

    /// Produce a single leaf-element.
    /// `n_raw_tokens` is used to glue complex contextual tokens.
    /// For example, lexer tokenizes `>>` as `>`, `>`, and
    /// `n_raw_tokens = 2` is used to produced a single `>>`.
    Token { kind: SyntaxKind, n_raw_tokens: u8 },
    /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
    /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
    /// This event instructs whatever consumes the events to split the float literal into
    /// the corresponding parts.
    FloatSplitHack { ends_in_dot: bool },
    /// Index into the parser's side `errors` vec.
    Error { err: u32 },
}

impl Event {
    pub(crate) fn tombstone() -> Self {
        Event::Start { kind: TOMBSTONE, forward_parent: None }
    }
}

/// Generate the syntax tree with the control of events. `errors` is the
/// side table of error messages built up alongside the `events` stream.
pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
    // Each event becomes roughly one u32 in Output, so preallocate to avoid
    // the amortized grow-one churn we used to see in Output::enter_node.
    let mut res = Output::with_event_capacity(events.len());
    let mut forward_parents = Vec::new();

    for i in 0..events.len() {
        match events[i] {
            Event::Start { kind, forward_parent } => {
                // For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
                // in the normal control flow, the parent-child relation: `A -> B -> C`,
                // while with the magic forward_parent, it writes: `C <- B <- A`.

                // append `A` into parents.
                forward_parents.push(kind);
                let mut idx = i;
                let mut fp = forward_parent;
                while let Some(fwd) = fp {
                    idx += fwd.get() as usize;
                    // append `A`'s forward_parent `B`
                    fp = match mem::replace(&mut events[idx], Event::tombstone()) {
                        Event::Start { kind, forward_parent } => {
                            forward_parents.push(kind);
                            forward_parent
                        }
                        _ => unreachable!(),
                    };
                    // append `B`'s forward_parent `C` in the next stage.
                }

                for kind in forward_parents.drain(..).rev() {
                    if kind != TOMBSTONE {
                        res.enter_node(kind);
                    }
                }
            }
            Event::Finish => res.leave_node(),
            Event::Token { kind, n_raw_tokens } => {
                res.token(kind, n_raw_tokens);
            }
            Event::FloatSplitHack { ends_in_dot } => {
                res.float_split_hack(ends_in_dot);
                let ev = mem::replace(&mut events[i + 1], Event::tombstone());
                assert!(matches!(ev, Event::Finish), "{ev:?}");
            }
            Event::Error { err } => {
                // Move the string out of the side table; each index is visited
                // exactly once, so swapping with an empty String is cheap and
                // avoids any clone.
                let msg = mem::take(&mut errors[err as usize]);
                res.error(msg);
            }
        }
    }

    res
}