//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
use std::{mem, num::NonZeroU32};
use crate::{
SyntaxKind::{self, *},
output::Output,
};
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
///
/// Kept to 8 bytes: error messages live in a side table on the `Parser`
/// (the `errors` vec) and `Event::Error` only stores an index into it.
/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
/// (the offset is always ≥ 1 because the forward parent sits later in the
/// event stream).
#[derive(Debug, PartialEq, Clone, Copy)]
pub(crate) enum Event {
/// This event signifies the start of the node.
/// It should be either abandoned (in which case the
/// `kind` is `TOMBSTONE`, and the event is ignored),
/// or completed via a `Finish` event.
///
/// All tokens between a `Start` and a `Finish` would
/// become the children of the respective node.
///
/// For left-recursive syntactic constructs, the parser produces
/// a child node before it sees a parent. `forward_parent`
/// saves the position of current event's parent.
///
/// Consider this path
///
/// foo::bar
///
/// The events for it would look like this:
///
/// ```text
/// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH
/// | /\
/// | |
/// +------forward-parent------+
/// ```
///
/// And the tree would look like this
///
/// ```text
/// +--PATH---------+
/// | | |
/// | | |
/// | '::' 'bar'
/// |
/// PATH
/// |
/// 'foo'
/// ```
///
/// See also `CompletedMarker::precede`.
Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },
/// Complete the previous `Start` event
Finish,
/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token { kind: SyntaxKind, n_raw_tokens: u8 },
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the float literal into
/// the corresponding parts.
FloatSplitHack { ends_in_dot: bool },
/// Index into the parser's side `errors` vec.
Error { err: u32 },
}
impl Event {
pub(crate) fn tombstone() -> Self {
Event::Start { kind: TOMBSTONE, forward_parent: None }
}
}
/// Generate the syntax tree with the control of events. `errors` is the
/// side table of error messages built up alongside the `events` stream.
pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
// Each event becomes roughly one u32 in Output, so preallocate to avoid
// the amortized grow-one churn we used to see in Output::enter_node.
let mut res = Output::with_event_capacity(events.len());
let mut forward_parents = Vec::new();
for i in 0..events.len() {
match events[i] {
Event::Start { kind, forward_parent } => {
// For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
// in the normal control flow, the parent-child relation: `A -> B -> C`,
// while with the magic forward_parent, it writes: `C <- B <- A`.
// append `A` into parents.
forward_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd.get() as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start { kind, forward_parent } => {
forward_parents.push(kind);
forward_parent
}
_ => unreachable!(),
};
// append `B`'s forward_parent `C` in the next stage.
}
for kind in forward_parents.drain(..).rev() {
if kind != TOMBSTONE {
res.enter_node(kind);
}
}
}
Event::Finish => res.leave_node(),
Event::Token { kind, n_raw_tokens } => {
res.token(kind, n_raw_tokens);
}
Event::FloatSplitHack { ends_in_dot } => {
res.float_split_hack(ends_in_dot);
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
Event::Error { err } => {
// Move the string out of the side table; each index is visited
// exactly once, so swapping with an empty String is cheap and
// avoids any clone.
let msg = mem::take(&mut errors[err as usize]);
res.error(msg);
}
}
}
res
}