Unnamed repository; edit this file 'description' to name the repository.
perf: optimize allocation strategies of output/parser/event
Lander Brandt 5 weeks ago
parent 251df51 · commit ec88d12
-rw-r--r--crates/parser/src/event.rs46
-rw-r--r--crates/parser/src/lib.rs12
-rw-r--r--crates/parser/src/output.rs8
-rw-r--r--crates/parser/src/parser.rs36
4 files changed, 67 insertions, 35 deletions
diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs
index 5be9cb2a24..9177f4210b 100644
--- a/crates/parser/src/event.rs
+++ b/crates/parser/src/event.rs
@@ -2,7 +2,7 @@
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
-use std::mem;
+use std::{mem, num::NonZeroU32};
use crate::{
SyntaxKind::{self, *},
@@ -12,6 +12,12 @@ use crate::{
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
+///
+/// Kept to 8 bytes: error messages live in a side table on the `Parser`
+/// (the `errors` vec) and `Event::Error` only stores an index into it.
+/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
+/// (the offset is always ≥ 1 because the forward parent sits later in the
+/// event stream).
#[derive(Debug, PartialEq)]
pub(crate) enum Event {
/// This event signifies the start of the node.
@@ -53,10 +59,7 @@ pub(crate) enum Event {
/// ```
///
/// See also `CompletedMarker::precede`.
- Start {
- kind: SyntaxKind,
- forward_parent: Option<u32>,
- },
+ Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },
/// Complete the previous `Start` event
Finish,
@@ -65,20 +68,14 @@ pub(crate) enum Event {
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
- Token {
- kind: SyntaxKind,
- n_raw_tokens: u8,
- },
+ Token { kind: SyntaxKind, n_raw_tokens: u8 },
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the float literal into
/// the corresponding parts.
- FloatSplitHack {
- ends_in_dot: bool,
- },
- Error {
- msg: String,
- },
+ FloatSplitHack { ends_in_dot: bool },
+ /// Index into the parser's side `errors` vec.
+ Error { err: u32 },
}
impl Event {
@@ -87,9 +84,12 @@ impl Event {
}
}
-/// Generate the syntax tree with the control of events.
-pub(super) fn process(mut events: Vec<Event>) -> Output {
- let mut res = Output::default();
+/// Generate the syntax tree with the control of events. `errors` is the
+/// side table of error messages built up alongside the `events` stream.
+pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
+ // Each event becomes roughly one u32 in Output, so preallocate to avoid
+ // the amortized grow-one churn we used to see in Output::enter_node.
+ let mut res = Output::with_event_capacity(events.len());
let mut forward_parents = Vec::new();
for i in 0..events.len() {
@@ -104,7 +104,7 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
- idx += fwd as usize;
+ idx += fwd.get() as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start { kind, forward_parent } => {
@@ -131,7 +131,13 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
- Event::Error { msg } => res.error(msg),
+ Event::Error { err } => {
+ // Move the string out of the side table; each index is visited
+ // exactly once, so swapping with an empty String is cheap and
+ // avoids any clone.
+ let msg = mem::take(&mut errors[err as usize]);
+ res.error(msg);
+ }
}
}
diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs
index 4478bf4e37..3e0e0d4c42 100644
--- a/crates/parser/src/lib.rs
+++ b/crates/parser/src/lib.rs
@@ -104,8 +104,8 @@ impl TopEntryPoint {
};
let mut p = parser::Parser::new(input);
entry_point(&mut p);
- let events = p.finish();
- let res = event::process(events);
+ let (events, errors) = p.finish();
+ let res = event::process(events, errors);
if cfg!(debug_assertions) {
let mut depth = 0;
@@ -169,8 +169,8 @@ impl PrefixEntryPoint {
};
let mut p = parser::Parser::new(input);
entry_point(&mut p);
- let events = p.finish();
- event::process(events)
+ let (events, errors) = p.finish();
+ event::process(events, errors)
}
}
@@ -195,7 +195,7 @@ impl Reparser {
let Reparser(r) = self;
let mut p = parser::Parser::new(tokens);
r(&mut p);
- let events = p.finish();
- event::process(events)
+ let (events, errors) = p.finish();
+ event::process(events, errors)
}
}
diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs
index 0ea15a656c..2f09b11218 100644
--- a/crates/parser/src/output.rs
+++ b/crates/parser/src/output.rs
@@ -33,6 +33,14 @@ pub enum Step<'a> {
}
impl Output {
+ /// Preallocate the event buffer. Each `Event` in the input stream maps to
+ /// roughly one `u32` in the output, so passing the event count as the
+ /// initial capacity avoids most of the amortized `Vec::grow` allocations
+ /// during `event::process`.
+ pub(crate) fn with_event_capacity(cap: usize) -> Self {
+ Output { event: Vec::with_capacity(cap), error: Vec::new() }
+ }
+
const EVENT_MASK: u32 = 0b1;
const TAG_MASK: u32 = 0x0000_00F0;
const N_INPUT_TOKEN_MASK: u32 = 0x0000_FF00;
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs
index 4557078de9..57c501eda2 100644
--- a/crates/parser/src/parser.rs
+++ b/crates/parser/src/parser.rs
@@ -1,6 +1,6 @@
//! See [`Parser`].
-use std::cell::Cell;
+use std::{cell::Cell, num::NonZeroU32};
use drop_bomb::DropBomb;
@@ -12,6 +12,14 @@ use crate::{
input::Input,
};
+/// Build a forward-parent offset. The offset is always ≥ 1 because the
+/// forward-parent event is created *after* the event it forwards to, so
+/// `NonZeroU32` is always valid here. Panics only on a parser bug.
+#[inline]
+fn fwd_parent(offset: u32) -> NonZeroU32 {
+ NonZeroU32::new(offset).expect("forward-parent offset must be non-zero")
+}
+
/// `Parser` struct provides the low-level API for
/// navigating through the stream of tokens and
/// constructing the parse tree. The actual parsing
@@ -25,6 +33,9 @@ pub(crate) struct Parser<'t> {
inp: &'t Input,
pos: usize,
events: Vec<Event>,
+ /// Side table of error messages. `Event::Error { err }` carries an index
+ /// into this vec, keeping `Event` itself a flat 8-byte enum.
+ errors: Vec<String>,
steps: Cell<u32>,
}
@@ -32,11 +43,17 @@ const PARSER_STEP_LIMIT: usize = if cfg!(debug_assertions) { 150_000 } else { 15
impl<'t> Parser<'t> {
pub(super) fn new(inp: &'t Input) -> Parser<'t> {
- Parser { inp, pos: 0, events: Vec::with_capacity(2 * inp.len()), steps: Cell::new(0) }
+ Parser {
+ inp,
+ pos: 0,
+ events: Vec::with_capacity(2 * inp.len()),
+ errors: Vec::new(),
+ steps: Cell::new(0),
+ }
}
- pub(crate) fn finish(self) -> Vec<Event> {
- self.events
+ pub(crate) fn finish(self) -> (Vec<Event>, Vec<String>) {
+ (self.events, self.errors)
}
/// Returns the kind of the current token.
@@ -206,7 +223,7 @@ impl<'t> Parser<'t> {
match &mut self.events[idx] {
Event::Start { forward_parent, kind } => {
*kind = SyntaxKind::FIELD_EXPR;
- *forward_parent = Some(new_marker.pos - marker.pos);
+ *forward_parent = Some(fwd_parent(new_marker.pos - marker.pos));
}
_ => unreachable!(),
}
@@ -237,8 +254,9 @@ impl<'t> Parser<'t> {
/// structured errors with spans and notes, like rustc
/// does.
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
- let msg = message.into();
- self.push_event(Event::Error { msg });
+ let err = self.errors.len() as u32;
+ self.errors.push(message.into());
+ self.push_event(Event::Error { err });
}
/// Consume the next token if it is `kind` or emit an error
@@ -366,7 +384,7 @@ impl CompletedMarker {
let idx = self.start_pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
- *forward_parent = Some(new_pos.pos - self.start_pos);
+ *forward_parent = Some(fwd_parent(new_pos.pos - self.start_pos));
}
_ => unreachable!(),
}
@@ -379,7 +397,7 @@ impl CompletedMarker {
let idx = m.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
- *forward_parent = Some(self.start_pos - m.pos);
+ *forward_parent = Some(fwd_parent(self.start_pos - m.pos));
}
_ => unreachable!(),
}