Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/mbe/src/syntax_bridge.rs')
| -rw-r--r-- | crates/mbe/src/syntax_bridge.rs | 1041 |
1 files changed, 469 insertions, 572 deletions
diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 7b9bb61e69..1c46471a38 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -1,98 +1,102 @@ //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. -use rustc_hash::FxHashMap; -use stdx::{always, non_empty_vec::NonEmptyVec}; +use rustc_hash::{FxHashMap, FxHashSet}; +use stdx::{never, non_empty_vec::NonEmptyVec}; use syntax::{ ast::{self, make::tokens::doc_comment}, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind, SyntaxKind::*, SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T, }; - -use crate::{ - to_parser_input::to_parser_input, - tt::{ - self, - buffer::{Cursor, TokenBuffer}, - }, - tt_iter::TtIter, - TokenMap, +use tt::{ + buffer::{Cursor, TokenBuffer}, + Span, SpanData, SyntaxContext, }; +use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, SpanMap}; + #[cfg(test)] mod tests; -/// Convert the syntax node to a `TokenTree` (what macro -/// will consume). -pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { - let (subtree, token_map, _) = syntax_node_to_token_tree_with_modifications( - node, - Default::default(), - 0, - Default::default(), - Default::default(), - ); - (subtree, token_map) +pub trait SpanMapper<S: Span> { + fn span_for(&self, range: TextRange) -> S; } -/// Convert the syntax node to a `TokenTree` (what macro will consume) -/// with the censored range excluded. -pub fn syntax_node_to_token_tree_with_modifications( - node: &SyntaxNode, - existing_token_map: TokenMap, - next_id: u32, - replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, -) -> (tt::Subtree, TokenMap, u32) { - let global_offset = node.text_range().start(); - let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append); - let subtree = convert_tokens(&mut c); - c.id_alloc.map.shrink_to_fit(); - always!(c.replace.is_empty(), "replace: {:?}", c.replace); - always!(c.append.is_empty(), "append: {:?}", c.append); - (subtree, c.id_alloc.map, c.id_alloc.next_id) +impl<S: Span> SpanMapper<S> for SpanMap<S> { + fn span_for(&self, range: TextRange) -> S { + self.span_at(range.start()) + } } -/// Convert the syntax node to a `TokenTree` (what macro -/// will consume). -pub fn syntax_node_to_token_map(node: &SyntaxNode) -> TokenMap { - syntax_node_to_token_map_with_modifications( - node, - Default::default(), - 0, - Default::default(), - Default::default(), - ) - .0 +impl<S: Span, SM: SpanMapper<S>> SpanMapper<S> for &SM { + fn span_for(&self, range: TextRange) -> S { + SM::span_for(self, range) + } } -/// Convert the syntax node to a `TokenTree` (what macro will consume) -/// with the censored range excluded. -pub fn syntax_node_to_token_map_with_modifications( - node: &SyntaxNode, - existing_token_map: TokenMap, - next_id: u32, - replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, -) -> (TokenMap, u32) { - let global_offset = node.text_range().start(); - let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append); - collect_tokens(&mut c); - c.id_alloc.map.shrink_to_fit(); - always!(c.replace.is_empty(), "replace: {:?}", c.replace); - always!(c.append.is_empty(), "append: {:?}", c.append); - (c.id_alloc.map, c.id_alloc.next_id) +/// Dummy things for testing where spans don't matter. +pub(crate) mod dummy_test_span_utils { + use super::*; + + pub type DummyTestSpanData = tt::SpanData<DummyTestSpanAnchor, DummyTestSyntaxContext>; + pub const DUMMY: DummyTestSpanData = DummyTestSpanData::DUMMY; + + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] + pub struct DummyTestSpanAnchor; + impl tt::SpanAnchor for DummyTestSpanAnchor { + const DUMMY: Self = DummyTestSpanAnchor; + } + #[derive(Debug, Copy, Clone, PartialEq, Eq)] + pub struct DummyTestSyntaxContext; + impl SyntaxContext for DummyTestSyntaxContext { + const DUMMY: Self = DummyTestSyntaxContext; + } + + pub struct DummyTestSpanMap; + + impl SpanMapper<tt::SpanData<DummyTestSpanAnchor, DummyTestSyntaxContext>> for DummyTestSpanMap { + fn span_for( + &self, + range: syntax::TextRange, + ) -> tt::SpanData<DummyTestSpanAnchor, DummyTestSyntaxContext> { + tt::SpanData { range, anchor: DummyTestSpanAnchor, ctx: DummyTestSyntaxContext } + } + } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct SyntheticTokenId(pub u32); +/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the +/// subtree's spans. +pub fn syntax_node_to_token_tree<Anchor, Ctx, SpanMap>( + node: &SyntaxNode, + map: SpanMap, +) -> tt::Subtree<SpanData<Anchor, Ctx>> +where + SpanData<Anchor, Ctx>: Span, + Anchor: Copy, + Ctx: SyntaxContext, + SpanMap: SpanMapper<SpanData<Anchor, Ctx>>, +{ + let mut c = Converter::new(node, map, Default::default(), Default::default()); + convert_tokens(&mut c) +} -#[derive(Debug, Clone)] -pub struct SyntheticToken { - pub kind: SyntaxKind, - pub text: SmolStr, - pub range: TextRange, - pub id: SyntheticTokenId, +/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the +/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can +/// be injected or hidden from the output. +pub fn syntax_node_to_token_tree_modified<Anchor, Ctx, SpanMap>( + node: &SyntaxNode, + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Anchor, Ctx>>>>, + remove: FxHashSet<SyntaxNode>, +) -> tt::Subtree<SpanData<Anchor, Ctx>> +where + SpanMap: SpanMapper<SpanData<Anchor, Ctx>>, + SpanData<Anchor, Ctx>: Span, + Anchor: Copy, + Ctx: SyntaxContext, +{ + let mut c = Converter::new(node, map, append, remove); + convert_tokens(&mut c) } // The following items are what `rustc` macro can be parsed into : @@ -107,10 +111,17 @@ pub struct SyntheticToken { // * AssocItems(SmallVec<[ast::AssocItem; 1]>) // * ForeignItems(SmallVec<[ast::ForeignItem; 1]> -pub fn token_tree_to_syntax_node( - tt: &tt::Subtree, +/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`]. +/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans. +pub fn token_tree_to_syntax_node<Anchor, Ctx>( + tt: &tt::Subtree<SpanData<Anchor, Ctx>>, entry_point: parser::TopEntryPoint, -) -> (Parse<SyntaxNode>, TokenMap) { +) -> (Parse<SyntaxNode>, SpanMap<SpanData<Anchor, Ctx>>) +where + SpanData<Anchor, Ctx>: Span, + Anchor: Copy, + Ctx: SyntaxContext, +{ let buffer = match tt { tt::Subtree { delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. }, @@ -137,29 +148,41 @@ pub fn token_tree_to_syntax_node( tree_sink.finish() } -/// Convert a string to a `TokenTree` -pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { +/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided +/// anchor with the given context. +pub fn parse_to_token_tree<Anchor, Ctx>( + anchor: Anchor, + ctx: Ctx, + text: &str, +) -> Option<tt::Subtree<SpanData<Anchor, Ctx>>> +where + SpanData<Anchor, Ctx>: Span, + Anchor: Copy, + Ctx: SyntaxContext, +{ let lexed = parser::LexedStr::new(text); if lexed.errors().next().is_some() { return None; } + let mut conv = RawConverter { lexed, pos: 0, anchor, ctx }; + Some(convert_tokens(&mut conv)) +} - let mut conv = RawConverter { - lexed, - pos: 0, - id_alloc: TokenIdAlloc { - map: Default::default(), - global_offset: TextSize::default(), - next_id: 0, - }, - }; - - let subtree = convert_tokens(&mut conv); - Some((subtree, conv.id_alloc.map)) +/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree. +pub fn parse_to_token_tree_static_span<S>(span: S, text: &str) -> Option<tt::Subtree<S>> +where + S: Span, +{ + let lexed = parser::LexedStr::new(text); + if lexed.errors().next().is_some() { + return None; + } + let mut conv = StaticRawConverter { lexed, pos: 0, span }; + Some(convert_tokens(&mut conv)) } /// Split token tree with separate expr: $($e:expr)SEP* -pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> { +pub fn parse_exprs_with_sep<S: Span>(tt: &tt::Subtree<S>, sep: char) -> Vec<tt::Subtree<S>> { if tt.token_trees.is_empty() { return Vec::new(); } @@ -172,10 +195,7 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> { res.push(match expanded.value { None => break, - Some(tt @ tt::TokenTree::Leaf(_)) => { - tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![tt] } - } - Some(tt::TokenTree::Subtree(tt)) => tt, + Some(tt) => tt.subtree_or_wrap(), }); let mut fork = iter.clone(); @@ -187,7 +207,7 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> { if iter.peek_n(0).is_some() { res.push(tt::Subtree { - delimiter: tt::Delimiter::unspecified(), + delimiter: tt::Delimiter::DUMMY_INVISIBLE, token_trees: iter.cloned().collect(), }); } @@ -195,136 +215,118 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> { res } -fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree { - struct StackEntry { - subtree: tt::Subtree, - idx: usize, - open_range: TextRange, - } - - let entry = StackEntry { - subtree: tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![] }, - // never used (delimiter is `None`) - idx: !0, - open_range: TextRange::empty(TextSize::of('.')), - }; +fn convert_tokens<S, C>(conv: &mut C) -> tt::Subtree<S> +where + C: TokenConverter<S>, + S: Span, +{ + let entry = tt::Subtree { delimiter: tt::Delimiter::DUMMY_INVISIBLE, token_trees: vec![] }; let mut stack = NonEmptyVec::new(entry); - loop { - let StackEntry { subtree, .. } = stack.last_mut(); - let result = &mut subtree.token_trees; - let (token, range) = match conv.bump() { - Some(it) => it, - None => break, - }; - let synth_id = token.synthetic_id(conv); - - let kind = token.kind(conv); - if kind == COMMENT { - // Since `convert_doc_comment` can fail, we need to peek the next id, so that we can - // figure out which token id to use for the doc comment, if it is converted successfully. - let next_id = conv.id_alloc().peek_next_id(); - if let Some(tokens) = conv.convert_doc_comment(&token, next_id) { - let id = conv.id_alloc().alloc(range, synth_id); - debug_assert_eq!(id, next_id); - result.extend(tokens); - } - continue; - } - let tt = if kind.is_punct() && kind != UNDERSCORE { - if synth_id.is_none() { - assert_eq!(range.len(), TextSize::of('.')); - } - - let expected = match subtree.delimiter.kind { - tt::DelimiterKind::Parenthesis => Some(T![')']), - tt::DelimiterKind::Brace => Some(T!['}']), - tt::DelimiterKind::Bracket => Some(T![']']), - tt::DelimiterKind::Invisible => None, - }; - - if let Some(expected) = expected { - if kind == expected { - if let Some(entry) = stack.pop() { - conv.id_alloc().close_delim(entry.idx, Some(range)); - stack.last_mut().subtree.token_trees.push(entry.subtree.into()); + while let Some((token, abs_range)) = conv.bump() { + let tt::Subtree { delimiter, token_trees: result } = stack.last_mut(); + + let tt = match token.as_leaf() { + Some(leaf) => tt::TokenTree::Leaf(leaf.clone()), + None => match token.kind(conv) { + // Desugar doc comments into doc attributes + COMMENT => { + let span = conv.span_for(abs_range); + if let Some(tokens) = conv.convert_doc_comment(&token, span) { + result.extend(tokens); } continue; } - } - - let delim = match kind { - T!['('] => Some(tt::DelimiterKind::Parenthesis), - T!['{'] => Some(tt::DelimiterKind::Brace), - T!['['] => Some(tt::DelimiterKind::Bracket), - _ => None, - }; + kind if kind.is_punct() && kind != UNDERSCORE => { + let expected = match delimiter.kind { + tt::DelimiterKind::Parenthesis => Some(T![')']), + tt::DelimiterKind::Brace => Some(T!['}']), + tt::DelimiterKind::Bracket => Some(T![']']), + tt::DelimiterKind::Invisible => None, + }; + + // Current token is a closing delimiter that we expect, fix up the closing span + // and end the subtree here + if matches!(expected, Some(expected) if expected == kind) { + if let Some(mut subtree) = stack.pop() { + subtree.delimiter.close = conv.span_for(abs_range); + stack.last_mut().token_trees.push(subtree.into()); + } + continue; + } - if let Some(kind) = delim { - let (id, idx) = conv.id_alloc().open_delim(range, synth_id); - let subtree = tt::Subtree { - delimiter: tt::Delimiter { open: id, close: tt::TokenId::UNSPECIFIED, kind }, - token_trees: vec![], - }; - stack.push(StackEntry { subtree, idx, open_range: range }); - continue; - } + let delim = match kind { + T!['('] => Some(tt::DelimiterKind::Parenthesis), + T!['{'] => Some(tt::DelimiterKind::Brace), + T!['['] => Some(tt::DelimiterKind::Bracket), + _ => None, + }; + + // Start a new subtree + if let Some(kind) = delim { + let open = conv.span_for(abs_range); + stack.push(tt::Subtree { + delimiter: tt::Delimiter { + open, + // will be overwritten on subtree close above + close: open, + kind, + }, + token_trees: vec![], + }); + continue; + } - let spacing = match conv.peek().map(|next| next.kind(conv)) { - Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, - _ => tt::Spacing::Alone, - }; - let char = match token.to_char(conv) { - Some(c) => c, - None => { - panic!("Token from lexer must be single char: token = {token:#?}"); + let spacing = match conv.peek().map(|next| next.kind(conv)) { + Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, + _ => tt::Spacing::Alone, + }; + let Some(char) = token.to_char(conv) else { + panic!("Token from lexer must be single char: token = {token:#?}") + }; + tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) }) + .into() } - }; - tt::Leaf::from(tt::Punct { - char, - spacing, - span: conv.id_alloc().alloc(range, synth_id), - }) - .into() - } else { - macro_rules! make_leaf { - ($i:ident) => { - tt::$i { - span: conv.id_alloc().alloc(range, synth_id), - text: token.to_text(conv), + kind => { + macro_rules! make_leaf { + ($i:ident) => { + tt::$i { span: conv.span_for(abs_range), text: token.to_text(conv) } + .into() + }; } - .into() - }; - } - let leaf: tt::Leaf = match kind { - T![true] | T![false] => make_leaf!(Ident), - IDENT => make_leaf!(Ident), - UNDERSCORE => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), - LIFETIME_IDENT => { - let char_unit = TextSize::of('\''); - let r = TextRange::at(range.start(), char_unit); - let apostrophe = tt::Leaf::from(tt::Punct { - char: '\'', - spacing: tt::Spacing::Joint, - span: conv.id_alloc().alloc(r, synth_id), - }); - result.push(apostrophe.into()); - - let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); - let ident = tt::Leaf::from(tt::Ident { - text: SmolStr::new(&token.to_text(conv)[1..]), - span: conv.id_alloc().alloc(r, synth_id), - }); - result.push(ident.into()); - continue; - } - _ => continue, - }; + let leaf: tt::Leaf<_> = match kind { + T![true] | T![false] => make_leaf!(Ident), + IDENT => make_leaf!(Ident), + UNDERSCORE => make_leaf!(Ident), + k if k.is_keyword() => make_leaf!(Ident), + k if k.is_literal() => make_leaf!(Literal), + LIFETIME_IDENT => { + let apostrophe = tt::Leaf::from(tt::Punct { + char: '\'', + spacing: tt::Spacing::Joint, + span: conv + .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))), + }); + result.push(apostrophe.into()); + + let ident = tt::Leaf::from(tt::Ident { + text: SmolStr::new(&token.to_text(conv)[1..]), + span: conv.span_for(TextRange::at( + abs_range.start() + TextSize::of('\''), + abs_range.end(), + )), + }); + result.push(ident.into()); + continue; + } + _ => continue, + }; - leaf.into() + leaf.into() + } + }, }; + result.push(tt); } @@ -334,10 +336,9 @@ fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree { while let Some(entry) = stack.pop() { let parent = stack.last_mut(); - conv.id_alloc().close_delim(entry.idx, None); - let leaf: tt::Leaf = tt::Punct { - span: conv.id_alloc().alloc(entry.open_range, None), - char: match entry.subtree.delimiter.kind { + let leaf: tt::Leaf<_> = tt::Punct { + span: entry.delimiter.open, + char: match entry.delimiter.kind { tt::DelimiterKind::Parenthesis => '(', tt::DelimiterKind::Brace => '{', tt::DelimiterKind::Bracket => '[', @@ -346,11 +347,11 @@ fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree { spacing: tt::Spacing::Alone, } .into(); - parent.subtree.token_trees.push(leaf.into()); - parent.subtree.token_trees.extend(entry.subtree.token_trees); + parent.token_trees.push(leaf.into()); + parent.token_trees.extend(entry.token_trees); } - let subtree = stack.into_last().subtree; + let subtree = stack.into_last(); if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees { first.clone() } else { @@ -358,111 +359,6 @@ fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree { } } -fn collect_tokens<C: TokenConverter>(conv: &mut C) { - struct StackEntry { - idx: usize, - open_range: TextRange, - delimiter: tt::DelimiterKind, - } - - let entry = StackEntry { - delimiter: tt::DelimiterKind::Invisible, - // never used (delimiter is `None`) - idx: !0, - open_range: TextRange::empty(TextSize::of('.')), - }; - let mut stack = NonEmptyVec::new(entry); - - loop { - let StackEntry { delimiter, .. } = stack.last_mut(); - let (token, range) = match conv.bump() { - Some(it) => it, - None => break, - }; - let synth_id = token.synthetic_id(conv); - - let kind = token.kind(conv); - if kind == COMMENT { - // Since `convert_doc_comment` can fail, we need to peek the next id, so that we can - // figure out which token id to use for the doc comment, if it is converted successfully. - let next_id = conv.id_alloc().peek_next_id(); - if let Some(_tokens) = conv.convert_doc_comment(&token, next_id) { - let id = conv.id_alloc().alloc(range, synth_id); - debug_assert_eq!(id, next_id); - } - continue; - } - if kind.is_punct() && kind != UNDERSCORE { - if synth_id.is_none() { - assert_eq!(range.len(), TextSize::of('.')); - } - - let expected = match delimiter { - tt::DelimiterKind::Parenthesis => Some(T![')']), - tt::DelimiterKind::Brace => Some(T!['}']), - tt::DelimiterKind::Bracket => Some(T![']']), - tt::DelimiterKind::Invisible => None, - }; - - if let Some(expected) = expected { - if kind == expected { - if let Some(entry) = stack.pop() { - conv.id_alloc().close_delim(entry.idx, Some(range)); - } - continue; - } - } - - let delim = match kind { - T!['('] => Some(tt::DelimiterKind::Parenthesis), - T!['{'] => Some(tt::DelimiterKind::Brace), - T!['['] => Some(tt::DelimiterKind::Bracket), - _ => None, - }; - - if let Some(kind) = delim { - let (_id, idx) = conv.id_alloc().open_delim(range, synth_id); - - stack.push(StackEntry { idx, open_range: range, delimiter: kind }); - continue; - } - - conv.id_alloc().alloc(range, synth_id); - } else { - macro_rules! make_leaf { - ($i:ident) => {{ - conv.id_alloc().alloc(range, synth_id); - }}; - } - match kind { - T![true] | T![false] => make_leaf!(Ident), - IDENT => make_leaf!(Ident), - UNDERSCORE => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), - LIFETIME_IDENT => { - let char_unit = TextSize::of('\''); - let r = TextRange::at(range.start(), char_unit); - conv.id_alloc().alloc(r, synth_id); - - let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); - conv.id_alloc().alloc(r, synth_id); - continue; - } - _ => continue, - }; - }; - - // If we get here, we've consumed all input tokens. - // We might have more than one subtree in the stack, if the delimiters are improperly balanced. - // Merge them so we're left with one. - while let Some(entry) = stack.pop() { - conv.id_alloc().close_delim(entry.idx, None); - conv.id_alloc().alloc(entry.open_range, None); - } - } -} - fn is_single_token_op(kind: SyntaxKind) -> bool { matches!( kind, @@ -511,162 +407,126 @@ fn doc_comment_text(comment: &ast::Comment) -> SmolStr { text.into() } -fn convert_doc_comment( +fn convert_doc_comment<S: Copy>( token: &syntax::SyntaxToken, - span: tt::TokenId, -) -> Option<Vec<tt::TokenTree>> { + span: S, +) -> Option<Vec<tt::TokenTree<S>>> { cov_mark::hit!(test_meta_doc_comments); let comment = ast::Comment::cast(token.clone())?; let doc = comment.kind().doc?; - // Make `doc="\" Comments\"" - let meta_tkns = - vec![mk_ident("doc", span), mk_punct('=', span), mk_doc_literal(&comment, span)]; + let mk_ident = + |s: &str| tt::TokenTree::from(tt::Leaf::from(tt::Ident { text: s.into(), span })); - // Make `#![]` - let mut token_trees = Vec::with_capacity(3); - token_trees.push(mk_punct('#', span)); - if let ast::CommentPlacement::Inner = doc { - token_trees.push(mk_punct('!', span)); - } - token_trees.push(tt::TokenTree::from(tt::Subtree { - delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, - token_trees: meta_tkns, - })); - - return Some(token_trees); - - // Helper functions - fn mk_ident(s: &str, span: tt::TokenId) -> tt::TokenTree { - tt::TokenTree::from(tt::Leaf::from(tt::Ident { text: s.into(), span })) - } - - fn mk_punct(c: char, span: tt::TokenId) -> tt::TokenTree { + let mk_punct = |c: char| { tt::TokenTree::from(tt::Leaf::from(tt::Punct { char: c, spacing: tt::Spacing::Alone, span, })) - } + }; - fn mk_doc_literal(comment: &ast::Comment, span: tt::TokenId) -> tt::TokenTree { + let mk_doc_literal = |comment: &ast::Comment| { let lit = tt::Literal { text: doc_comment_text(comment), span }; tt::TokenTree::from(tt::Leaf::from(lit)) - } -} - -struct TokenIdAlloc { - map: TokenMap, - global_offset: TextSize, - next_id: u32, -} - -impl TokenIdAlloc { - fn alloc( - &mut self, - absolute_range: TextRange, - synthetic_id: Option<SyntheticTokenId>, - ) -> tt::TokenId { - let relative_range = absolute_range - self.global_offset; - let token_id = tt::TokenId(self.next_id); - self.next_id += 1; - self.map.insert(token_id, relative_range); - if let Some(id) = synthetic_id { - self.map.insert_synthetic(token_id, id); - } - token_id - } + }; - fn open_delim( - &mut self, - open_abs_range: TextRange, - synthetic_id: Option<SyntheticTokenId>, - ) -> (tt::TokenId, usize) { - let token_id = tt::TokenId(self.next_id); - self.next_id += 1; - let idx = self.map.insert_delim( - token_id, - open_abs_range - self.global_offset, - open_abs_range - self.global_offset, - ); - if let Some(id) = synthetic_id { - self.map.insert_synthetic(token_id, id); - } - (token_id, idx) - } + // Make `doc="\" Comments\"" + let meta_tkns = vec![mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]; - fn close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>) { - match close_abs_range { - None => { - self.map.remove_delim(idx); - } - Some(close) => { - self.map.update_close_delim(idx, close - self.global_offset); - } - } + // Make `#![]` + let mut token_trees = Vec::with_capacity(3); + token_trees.push(mk_punct('#')); + if let ast::CommentPlacement::Inner = doc { + token_trees.push(mk_punct('!')); } + token_trees.push(tt::TokenTree::from(tt::Subtree { + delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, + token_trees: meta_tkns, + })); - fn peek_next_id(&self) -> tt::TokenId { - tt::TokenId(self.next_id) - } + Some(token_trees) } /// A raw token (straight from lexer) converter -struct RawConverter<'a> { +struct RawConverter<'a, Anchor, Ctx> { + lexed: parser::LexedStr<'a>, + pos: usize, + anchor: Anchor, + ctx: Ctx, +} +/// A raw token (straight from lexer) converter that gives every token the same span. +struct StaticRawConverter<'a, S> { lexed: parser::LexedStr<'a>, pos: usize, - id_alloc: TokenIdAlloc, + span: S, } -trait SrcToken<Ctx>: std::fmt::Debug { +trait SrcToken<Ctx, S>: std::fmt::Debug { fn kind(&self, ctx: &Ctx) -> SyntaxKind; fn to_char(&self, ctx: &Ctx) -> Option<char>; fn to_text(&self, ctx: &Ctx) -> SmolStr; - fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>; + fn as_leaf(&self) -> Option<&tt::Leaf<S>> { + None + } } -trait TokenConverter: Sized { - type Token: SrcToken<Self>; +trait TokenConverter<S>: Sized { + type Token: SrcToken<Self, S>; - fn convert_doc_comment( - &self, - token: &Self::Token, - span: tt::TokenId, - ) -> Option<Vec<tt::TokenTree>>; + fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>>; fn bump(&mut self) -> Option<(Self::Token, TextRange)>; fn peek(&self) -> Option<Self::Token>; - fn id_alloc(&mut self) -> &mut TokenIdAlloc; + fn span_for(&self, range: TextRange) -> S; } -impl SrcToken<RawConverter<'_>> for usize { - fn kind(&self, ctx: &RawConverter<'_>) -> SyntaxKind { +impl<Anchor, S, Ctx> SrcToken<RawConverter<'_, Anchor, Ctx>, S> for usize { + fn kind(&self, ctx: &RawConverter<'_, Anchor, Ctx>) -> SyntaxKind { ctx.lexed.kind(*self) } - fn to_char(&self, ctx: &RawConverter<'_>) -> Option<char> { + fn to_char(&self, ctx: &RawConverter<'_, Anchor, Ctx>) -> Option<char> { ctx.lexed.text(*self).chars().next() } - fn to_text(&self, ctx: &RawConverter<'_>) -> SmolStr { + fn to_text(&self, ctx: &RawConverter<'_, Anchor, Ctx>) -> SmolStr { ctx.lexed.text(*self).into() } +} - fn synthetic_id(&self, _ctx: &RawConverter<'_>) -> Option<SyntheticTokenId> { - None +impl<S: Span> SrcToken<StaticRawConverter<'_, S>, S> for usize { + fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind { + ctx.lexed.kind(*self) + } + + fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> { + ctx.lexed.text(*self).chars().next() + } + + fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr { + ctx.lexed.text(*self).into() } } -impl TokenConverter for RawConverter<'_> { +impl<Anchor: Copy, Ctx: SyntaxContext> TokenConverter<SpanData<Anchor, Ctx>> + for RawConverter<'_, Anchor, Ctx> +where + SpanData<Anchor, Ctx>: Span, +{ type Token = usize; - fn convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>> { + fn convert_doc_comment( + &self, + &token: &usize, + span: SpanData<Anchor, Ctx>, + ) -> Option<Vec<tt::TokenTree<SpanData<Anchor, Ctx>>>> { let text = self.lexed.text(token); convert_doc_comment(&doc_comment(text), span) } @@ -678,7 +538,7 @@ impl TokenConverter for RawConverter<'_> { let token = self.pos; self.pos += 1; let range = self.lexed.text_range(token); - let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); + let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); Some((token, range)) } @@ -690,137 +550,184 @@ impl TokenConverter for RawConverter<'_> { Some(self.pos) } - fn id_alloc(&mut self) -> &mut TokenIdAlloc { - &mut self.id_alloc + fn span_for(&self, range: TextRange) -> SpanData<Anchor, Ctx> { + SpanData { range, anchor: self.anchor, ctx: self.ctx } } } -struct Converter { - id_alloc: TokenIdAlloc, +impl<S> TokenConverter<S> for StaticRawConverter<'_, S> +where + S: Span, +{ + type Token = usize; + + fn convert_doc_comment(&self, &token: &usize, span: S) -> Option<Vec<tt::TokenTree<S>>> { + let text = self.lexed.text(token); + convert_doc_comment(&doc_comment(text), span) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + if self.pos == self.lexed.len() { + return None; + } + let token = self.pos; + self.pos += 1; + let range = self.lexed.text_range(token); + let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); + + Some((token, range)) + } + + fn peek(&self) -> Option<Self::Token> { + if self.pos == self.lexed.len() { + return None; + } + Some(self.pos) + } + + fn span_for(&self, _: TextRange) -> S { + self.span + } +} + +struct Converter<SpanMap, S> { current: Option<SyntaxToken>, - current_synthetic: Vec<SyntheticToken>, + current_leafs: Vec<tt::Leaf<S>>, preorder: PreorderWithTokens, - replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, range: TextRange, punct_offset: Option<(SyntaxToken, TextSize)>, + /// Used to make the emitted text ranges in the spans relative to the span anchor. + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, + remove: FxHashSet<SyntaxNode>, } -impl Converter { +impl<SpanMap, S> Converter<SpanMap, S> { fn new( node: &SyntaxNode, - global_offset: TextSize, - existing_token_map: TokenMap, - next_id: u32, - mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - ) -> Converter { - let range = node.text_range(); - let mut preorder = node.preorder_with_tokens(); - let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append); - Converter { - id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } }, - current: first, - current_synthetic: synthetic, - preorder, - range, - replace, - append, + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, + remove: FxHashSet<SyntaxNode>, + ) -> Self { + let mut this = Converter { + current: None, + preorder: node.preorder_with_tokens(), + range: node.text_range(), punct_offset: None, - } - } - - fn next_token( - preorder: &mut PreorderWithTokens, - replace: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - append: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, - ) -> (Option<SyntaxToken>, Vec<SyntheticToken>) { - while let Some(ev) = preorder.next() { - let ele = match ev { - WalkEvent::Enter(ele) => ele, - WalkEvent::Leave(ele) => { - if let Some(mut v) = append.remove(&ele) { - if !v.is_empty() { - v.reverse(); - return (None, v); - } + map, + append, + remove, + current_leafs: vec![], + }; + let first = this.next_token(); + this.current = first; + this + } + + fn next_token(&mut self) -> Option<SyntaxToken> { + // while let Some(ev) = self.preorder.next() { + // match ev { + // WalkEvent::Enter(SyntaxElement::Token(t)) => { + // if let Some(leafs) = self.append.remove(&t.clone().into()) { + // self.current_leafs.extend(leafs); + // } + // return Some(t); + // } + // WalkEvent::Enter(SyntaxElement::Node(n)) if self.remove.contains(&n) => { + // self.preorder.skip_subtree(); + // if let Some(leafs) = self.append.remove(&n.into()) { + // self.current_leafs.extend(leafs); + // } + // } + // _ => (), + // } + // } + // None; + + while let Some(ev) = self.preorder.next() { + match ev { + WalkEvent::Enter(SyntaxElement::Token(t)) => return Some(t), + WalkEvent::Enter(SyntaxElement::Node(n)) if self.remove.contains(&n) => { + self.preorder.skip_subtree(); + if let Some(mut v) = self.append.remove(&n.into()) { + v.reverse(); + self.current_leafs.extend(v); + return None; } - continue; } - }; - if let Some(mut v) = replace.remove(&ele) { - preorder.skip_subtree(); - if !v.is_empty() { - v.reverse(); - return (None, v); + WalkEvent::Enter(SyntaxElement::Node(_)) => (), + WalkEvent::Leave(ele) => { + if let Some(mut v) = self.append.remove(&ele) { + v.reverse(); + self.current_leafs.extend(v); + return None; + } } } - match ele { - SyntaxElement::Token(t) => return (Some(t), Vec::new()), - _ => {} - } } - (None, Vec::new()) + None } } #[derive(Debug)] -enum SynToken { +enum SynToken<S> { Ordinary(SyntaxToken), - // FIXME is this supposed to be `Punct`? - Punch(SyntaxToken, TextSize), - Synthetic(SyntheticToken), + Punct { token: SyntaxToken, offset: usize }, + Leaf(tt::Leaf<S>), } -impl SynToken { - fn token(&self) -> Option<&SyntaxToken> { +impl<S> SynToken<S> { + fn token(&self) -> &SyntaxToken { match self { - SynToken::Ordinary(it) | SynToken::Punch(it, _) => Some(it), - SynToken::Synthetic(_) => None, + SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it, + SynToken::Leaf(_) => unreachable!(), } } } -impl SrcToken<Converter> for SynToken { - fn kind(&self, ctx: &Converter) -> SyntaxKind { +impl<SpanMap, S: std::fmt::Debug> SrcToken<Converter<SpanMap, S>, S> for SynToken<S> { + fn kind(&self, ctx: &Converter<SpanMap, S>) -> SyntaxKind { match self { SynToken::Ordinary(token) => token.kind(), - SynToken::Punch(..) => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(), - SynToken::Synthetic(token) => token.kind, + SynToken::Punct { .. } => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(), + SynToken::Leaf(_) => { + never!(); + SyntaxKind::ERROR + } } } - fn to_char(&self, _ctx: &Converter) -> Option<char> { + fn to_char(&self, _ctx: &Converter<SpanMap, S>) -> Option<char> { match self { SynToken::Ordinary(_) => None, - SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), - SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(), - SynToken::Synthetic(_) => None, + SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i), + SynToken::Leaf(_) => None, } } - fn to_text(&self, _ctx: &Converter) -> SmolStr { + fn to_text(&self, _ctx: &Converter<SpanMap, S>) -> SmolStr { match self { - SynToken::Ordinary(token) => token.text().into(), - SynToken::Punch(token, _) => token.text().into(), - SynToken::Synthetic(token) => token.text.clone(), + SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(), + SynToken::Leaf(_) => { + never!(); + "".into() + } } } - - fn synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId> { + fn as_leaf(&self) -> Option<&tt::Leaf<S>> { match self { - SynToken::Synthetic(token) => Some(token.id), - _ => None, + SynToken::Ordinary(_) | SynToken::Punct { .. } => None, + SynToken::Leaf(it) => Some(it), } } } -impl TokenConverter for Converter { - type Token = SynToken; - fn convert_doc_comment( - &self, - token: &Self::Token, - span: tt::TokenId, - ) -> Option<Vec<tt::TokenTree>> { - convert_doc_comment(token.token()?, span) +impl<S, SpanMap> TokenConverter<S> for Converter<SpanMap, S> +where + S: Span, + SpanMap: SpanMapper<S>, +{ + type Token = SynToken<S>; + fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>> { + convert_doc_comment(token.token(), span) } fn bump(&mut self) -> Option<(Self::Token, TextRange)> { @@ -830,34 +737,31 @@ impl TokenConverter for Converter { let range = punct.text_range(); self.punct_offset = Some((punct.clone(), offset)); let range = TextRange::at(range.start() + offset, TextSize::of('.')); - return Some((SynToken::Punch(punct, offset), range)); + return Some(( + SynToken::Punct { token: punct, offset: u32::from(offset) as usize }, + range, + )); } } - if let Some(synth_token) = self.current_synthetic.pop() { - if self.current_synthetic.is_empty() { - let (new_current, new_synth) = - Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append); - self.current = new_current; - self.current_synthetic = new_synth; + if let Some(leaf) = self.current_leafs.pop() { + if self.current_leafs.is_empty() { + self.current = self.next_token(); } - let range = synth_token.range; - return Some((SynToken::Synthetic(synth_token), range)); + return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0)))); } let curr = self.current.clone()?; if !self.range.contains_range(curr.text_range()) { return None; } - let (new_current, new_synth) = - Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append); - self.current = new_current; - self.current_synthetic = new_synth; + + self.current = self.next_token(); let token = if curr.kind().is_punct() { self.punct_offset = Some((curr.clone(), 0.into())); let range = curr.text_range(); let range = TextRange::at(range.start(), TextSize::of('.')); - (SynToken::Punch(curr, 0.into()), range) + (SynToken::Punct { token: curr, offset: 0 as usize }, range) } else { self.punct_offset = None; let range = curr.text_range(); @@ -871,55 +775,55 @@ impl TokenConverter for Converter { if let Some((punct, mut offset)) = self.punct_offset.clone() { offset += TextSize::of('.'); if usize::from(offset) < punct.text().len() { - return Some(SynToken::Punch(punct, offset)); + return Some(SynToken::Punct { token: punct, offset: usize::from(offset) }); } } - if let Some(synth_token) = self.current_synthetic.last() { - return Some(SynToken::Synthetic(synth_token.clone())); - } - let curr = self.current.clone()?; if !self.range.contains_range(curr.text_range()) { return None; } let token = if curr.kind().is_punct() { - SynToken::Punch(curr, 0.into()) + SynToken::Punct { token: curr, offset: 0 as usize } } else { SynToken::Ordinary(curr) }; Some(token) } - fn id_alloc(&mut self) -> &mut TokenIdAlloc { - &mut self.id_alloc + fn span_for(&self, range: TextRange) -> S { + self.map.span_for(range) } } -struct TtTreeSink<'a> { +struct TtTreeSink<'a, Anchor, Ctx> +where + SpanData<Anchor, Ctx>: Span, +{ buf: String, - cursor: Cursor<'a>, - open_delims: FxHashMap<tt::TokenId, TextSize>, + cursor: Cursor<'a, SpanData<Anchor, Ctx>>, text_pos: TextSize, inner: SyntaxTreeBuilder, - token_map: TokenMap, + token_map: SpanMap<SpanData<Anchor, Ctx>>, } -impl<'a> TtTreeSink<'a> { - fn new(cursor: Cursor<'a>) -> Self { +impl<'a, Anchor, Ctx> TtTreeSink<'a, Anchor, Ctx> +where + SpanData<Anchor, Ctx>: Span, +{ + fn new(cursor: Cursor<'a, SpanData<Anchor, Ctx>>) -> Self { TtTreeSink { buf: String::new(), cursor, - open_delims: FxHashMap::default(), text_pos: 0.into(), inner: SyntaxTreeBuilder::default(), - token_map: TokenMap::default(), + token_map: SpanMap::empty(), } } - fn finish(mut self) -> (Parse<SyntaxNode>, TokenMap) { - self.token_map.shrink_to_fit(); + fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<SpanData<Anchor, Ctx>>) { + self.token_map.finish(); (self.inner.finish(), self.token_map) } } @@ -936,27 +840,34 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { Some(&texts[idx..texts.len() - (1 - idx)]) } -impl TtTreeSink<'_> { +impl<Anchor, Ctx> TtTreeSink<'_, Anchor, Ctx> +where + SpanData<Anchor, Ctx>: Span, +{ /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { - let (text, _span) = match self.cursor.token_tree() { + let (text, span) = match self.cursor.token_tree() { Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => { (lit.text.as_str(), lit.span) } _ => unreachable!(), }; + // FIXME: Span splitting match text.split_once('.') { Some((left, right)) => { assert!(!left.is_empty()); + self.inner.start_node(SyntaxKind::NAME_REF); self.inner.token(SyntaxKind::INT_NUMBER, left); self.inner.finish_node(); + self.token_map.push(self.text_pos + TextSize::of(left), span); // here we move the exit up, the original exit has been deleted in process self.inner.finish_node(); self.inner.token(SyntaxKind::DOT, "."); + self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span); if has_pseudo_dot { assert!(right.is_empty(), "{left}.{right}"); @@ -964,11 +875,13 @@ impl TtTreeSink<'_> { assert!(!right.is_empty(), "{left}.{right}"); self.inner.start_node(SyntaxKind::NAME_REF); self.inner.token(SyntaxKind::INT_NUMBER, right); + self.token_map.push(self.text_pos + TextSize::of(text), span); self.inner.finish_node(); // the parser creates an unbalanced start node, we are required to close it here self.inner.finish_node(); } + self.text_pos += TextSize::of(text); } None => unreachable!(), } @@ -987,11 +900,11 @@ impl TtTreeSink<'_> { break; } last = self.cursor; - let text: &str = loop { + let (text, span) = loop { break match self.cursor.token_tree() { Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { // Mark the range if needed - let (text, id) = match leaf { + let (text, span) = match leaf { tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.span), tt::Leaf::Punct(punct) => { assert!(punct.char.is_ascii()); @@ -1003,18 +916,13 @@ impl TtTreeSink<'_> { } tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.span), }; - let range = TextRange::at(self.text_pos, TextSize::of(text)); - self.token_map.insert(id, range); self.cursor = self.cursor.bump(); - text + (text, span) } Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { self.cursor = self.cursor.subtree().unwrap(); match delim_to_str(subtree.delimiter.kind, false) { - Some(it) => { - self.open_delims.insert(subtree.delimiter.open, self.text_pos); - it - } + Some(it) => (it, subtree.delimiter.open), None => continue, } } @@ -1022,21 +930,7 @@ impl TtTreeSink<'_> { let parent = self.cursor.end().unwrap(); self.cursor = self.cursor.bump(); match delim_to_str(parent.delimiter.kind, true) { - Some(it) => { - if let Some(open_delim) = - self.open_delims.get(&parent.delimiter.open) - { - let open_range = TextRange::at(*open_delim, TextSize::of('(')); - let close_range = - TextRange::at(self.text_pos, TextSize::of('(')); - self.token_map.insert_delim( - parent.delimiter.open, - open_range, - close_range, - ); - } - it - } + Some(it) => (it, parent.delimiter.close), None => continue, } } @@ -1044,10 +938,12 @@ impl TtTreeSink<'_> { }; self.buf += text; self.text_pos += TextSize::of(text); + self.token_map.push(self.text_pos, span); } self.inner.token(kind, self.buf.as_str()); self.buf.clear(); + // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it. // Add whitespace between adjoint puncts let next = last.bump(); if let ( @@ -1063,6 +959,7 @@ impl TtTreeSink<'_> { if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' { self.inner.token(WHITESPACE, " "); self.text_pos += TextSize::of(' '); + self.token_map.push(self.text_pos, curr.span); } } } |