Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/syntax-bridge/src/lib.rs')
| -rw-r--r-- | crates/syntax-bridge/src/lib.rs | 1047 |
1 files changed, 1047 insertions, 0 deletions
diff --git a/crates/syntax-bridge/src/lib.rs b/crates/syntax-bridge/src/lib.rs new file mode 100644 index 0000000000..b0afd245c5 --- /dev/null +++ b/crates/syntax-bridge/src/lib.rs @@ -0,0 +1,1047 @@ +//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. + +use std::fmt; + +use intern::Symbol; +use rustc_hash::{FxHashMap, FxHashSet}; +use span::{Edition, SpanAnchor, SpanData, SpanMap}; +use stdx::{format_to, never, non_empty_vec::NonEmptyVec}; +use syntax::{ + ast::{self, make::tokens::doc_comment}, + format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, + SyntaxKind::{self, *}, + SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T, +}; +use tt::{ + buffer::{Cursor, TokenBuffer}, + token_to_literal, +}; + +mod to_parser_input; +pub use to_parser_input::to_parser_input; +// FIXME: we probably should re-think `token_tree_to_syntax_node` interfaces +pub use ::parser::TopEntryPoint; + +#[cfg(test)] +mod tests; + +pub trait SpanMapper<S> { + fn span_for(&self, range: TextRange) -> S; +} + +impl<S> SpanMapper<SpanData<S>> for SpanMap<S> +where + SpanData<S>: Copy, +{ + fn span_for(&self, range: TextRange) -> SpanData<S> { + self.span_at(range.start()) + } +} + +impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM { + fn span_for(&self, range: TextRange) -> S { + SM::span_for(self, range) + } +} + +/// Dummy things for testing where spans don't matter. +pub mod dummy_test_span_utils { + + use span::{Span, SyntaxContextId}; + + use super::*; + + pub const DUMMY: Span = Span { + range: TextRange::empty(TextSize::new(0)), + anchor: span::SpanAnchor { + file_id: span::EditionedFileId::new( + span::FileId::from_raw(0xe4e4e), + span::Edition::CURRENT, + ), + ast_id: span::ROOT_ERASED_FILE_AST_ID, + }, + ctx: SyntaxContextId::ROOT, + }; + + pub struct DummyTestSpanMap; + + impl SpanMapper<Span> for DummyTestSpanMap { + fn span_for(&self, range: syntax::TextRange) -> Span { + Span { + range, + anchor: span::SpanAnchor { + file_id: span::EditionedFileId::new( + span::FileId::from_raw(0xe4e4e), + span::Edition::CURRENT, + ), + ast_id: span::ROOT_ERASED_FILE_AST_ID, + }, + ctx: SyntaxContextId::ROOT, + } + } + } +} + +/// Doc comment desugaring differs between mbe and proc-macros. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum DocCommentDesugarMode { + /// Desugars doc comments as quoted raw strings + Mbe, + /// Desugars doc comments as quoted strings + ProcMacro, +} + +/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the +/// subtree's spans. +pub fn syntax_node_to_token_tree<Ctx, SpanMap>( + node: &SyntaxNode, + map: SpanMap, + span: SpanData<Ctx>, + mode: DocCommentDesugarMode, +) -> tt::Subtree<SpanData<Ctx>> +where + SpanData<Ctx>: Copy + fmt::Debug, + SpanMap: SpanMapper<SpanData<Ctx>>, +{ + let mut c = Converter::new(node, map, Default::default(), Default::default(), span, mode); + convert_tokens(&mut c) +} + +/// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the +/// subtree's spans. Additionally using the append and remove parameters, the additional tokens can +/// be injected or hidden from the output. +pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap>( + node: &SyntaxNode, + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>, + remove: FxHashSet<SyntaxElement>, + call_site: SpanData<Ctx>, + mode: DocCommentDesugarMode, +) -> tt::Subtree<SpanData<Ctx>> +where + SpanMap: SpanMapper<SpanData<Ctx>>, + SpanData<Ctx>: Copy + fmt::Debug, +{ + let mut c = Converter::new(node, map, append, remove, call_site, mode); + convert_tokens(&mut c) +} + +// The following items are what `rustc` macro can be parsed into : +// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 +// * Expr(P<ast::Expr>) -> token_tree_to_expr +// * Pat(P<ast::Pat>) -> token_tree_to_pat +// * Ty(P<ast::Ty>) -> token_tree_to_ty +// * Stmts(SmallVec<[ast::Stmt; 1]>) -> token_tree_to_stmts +// * Items(SmallVec<[P<ast::Item>; 1]>) -> token_tree_to_items +// +// * TraitItems(SmallVec<[ast::TraitItem; 1]>) +// * AssocItems(SmallVec<[ast::AssocItem; 1]>) +// * ForeignItems(SmallVec<[ast::ForeignItem; 1]> + +/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`]. +/// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans. +pub fn token_tree_to_syntax_node<Ctx>( + tt: &tt::Subtree<SpanData<Ctx>>, + entry_point: parser::TopEntryPoint, + edition: parser::Edition, +) -> (Parse<SyntaxNode>, SpanMap<Ctx>) +where + SpanData<Ctx>: Copy + fmt::Debug, +{ + let buffer = match tt { + tt::Subtree { + delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. }, + token_trees, + } => TokenBuffer::from_tokens(token_trees), + _ => TokenBuffer::from_subtree(tt), + }; + let parser_input = to_parser_input(edition, &buffer); + let parser_output = entry_point.parse(&parser_input, edition); + let mut tree_sink = TtTreeSink::new(buffer.begin()); + for event in parser_output.iter() { + match event { + parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { + tree_sink.token(kind, n_raw_tokens) + } + parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + tree_sink.float_split(has_pseudo_dot) + } + parser::Step::Enter { kind } => tree_sink.start_node(kind), + parser::Step::Exit => tree_sink.finish_node(), + parser::Step::Error { msg } => tree_sink.error(msg.to_owned()), + } + } + tree_sink.finish() +} + +/// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided +/// anchor with the given context. +pub fn parse_to_token_tree<Ctx>( + edition: Edition, + anchor: SpanAnchor, + ctx: Ctx, + text: &str, +) -> Option<tt::Subtree<SpanData<Ctx>>> +where + SpanData<Ctx>: Copy + fmt::Debug, + Ctx: Copy, +{ + let lexed = parser::LexedStr::new(edition, text); + if lexed.errors().next().is_some() { + return None; + } + let mut conv = + RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro }; + Some(convert_tokens(&mut conv)) +} + +/// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree. +pub fn parse_to_token_tree_static_span<S>( + edition: Edition, + span: S, + text: &str, +) -> Option<tt::Subtree<S>> +where + S: Copy + fmt::Debug, +{ + let lexed = parser::LexedStr::new(edition, text); + if lexed.errors().next().is_some() { + return None; + } + let mut conv = + StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro }; + Some(convert_tokens(&mut conv)) +} + +fn convert_tokens<S, C>(conv: &mut C) -> tt::Subtree<S> +where + C: TokenConverter<S>, + S: Copy + fmt::Debug, + C::Token: fmt::Debug, +{ + let entry = tt::SubtreeBuilder { + delimiter: tt::Delimiter::invisible_spanned(conv.call_site()), + token_trees: vec![], + }; + let mut stack = NonEmptyVec::new(entry); + + while let Some((token, abs_range)) = conv.bump() { + let tt::SubtreeBuilder { delimiter, token_trees } = stack.last_mut(); + + let tt = match token.as_leaf() { + Some(leaf) => tt::TokenTree::Leaf(leaf.clone()), + None => match token.kind(conv) { + // Desugar doc comments into doc attributes + COMMENT => { + let span = conv.span_for(abs_range); + if let Some(tokens) = conv.convert_doc_comment(&token, span) { + token_trees.extend(tokens); + } + continue; + } + kind if kind.is_punct() && kind != UNDERSCORE => { + let expected = match delimiter.kind { + tt::DelimiterKind::Parenthesis => Some(T![')']), + tt::DelimiterKind::Brace => Some(T!['}']), + tt::DelimiterKind::Bracket => Some(T![']']), + tt::DelimiterKind::Invisible => None, + }; + + // Current token is a closing delimiter that we expect, fix up the closing span + // and end the subtree here + if matches!(expected, Some(expected) if expected == kind) { + if let Some(mut subtree) = stack.pop() { + subtree.delimiter.close = conv.span_for(abs_range); + stack.last_mut().token_trees.push(subtree.build().into()); + } + continue; + } + + let delim = match kind { + T!['('] => Some(tt::DelimiterKind::Parenthesis), + T!['{'] => Some(tt::DelimiterKind::Brace), + T!['['] => Some(tt::DelimiterKind::Bracket), + _ => None, + }; + + // Start a new subtree + if let Some(kind) = delim { + let open = conv.span_for(abs_range); + stack.push(tt::SubtreeBuilder { + delimiter: tt::Delimiter { + open, + // will be overwritten on subtree close above + close: open, + kind, + }, + token_trees: vec![], + }); + continue; + } + + let spacing = match conv.peek().map(|next| next.kind(conv)) { + Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, + _ => tt::Spacing::Alone, + }; + let Some(char) = token.to_char(conv) else { + panic!("Token from lexer must be single char: token = {token:#?}") + }; + tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) }) + .into() + } + kind => { + macro_rules! make_ident { + () => { + tt::Ident { + span: conv.span_for(abs_range), + sym: Symbol::intern(&token.to_text(conv)), + is_raw: tt::IdentIsRaw::No, + } + .into() + }; + } + let leaf: tt::Leaf<_> = match kind { + T![true] | T![false] => make_ident!(), + IDENT => { + let text = token.to_text(conv); + tt::Ident::new(&text, conv.span_for(abs_range)).into() + } + UNDERSCORE => make_ident!(), + k if k.is_keyword() => make_ident!(), + k if k.is_literal() => { + let text = token.to_text(conv); + let span = conv.span_for(abs_range); + token_to_literal(&text, span).into() + } + LIFETIME_IDENT => { + let apostrophe = tt::Leaf::from(tt::Punct { + char: '\'', + spacing: tt::Spacing::Joint, + span: conv + .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))), + }); + token_trees.push(apostrophe.into()); + + let ident = tt::Leaf::from(tt::Ident { + sym: Symbol::intern(&token.to_text(conv)[1..]), + span: conv.span_for(TextRange::new( + abs_range.start() + TextSize::of('\''), + abs_range.end(), + )), + is_raw: tt::IdentIsRaw::No, + }); + token_trees.push(ident.into()); + continue; + } + _ => continue, + }; + + leaf.into() + } + }, + }; + + token_trees.push(tt); + } + + // If we get here, we've consumed all input tokens. + // We might have more than one subtree in the stack, if the delimiters are improperly balanced. + // Merge them so we're left with one. + while let Some(entry) = stack.pop() { + let parent = stack.last_mut(); + + let leaf: tt::Leaf<_> = tt::Punct { + span: entry.delimiter.open, + char: match entry.delimiter.kind { + tt::DelimiterKind::Parenthesis => '(', + tt::DelimiterKind::Brace => '{', + tt::DelimiterKind::Bracket => '[', + tt::DelimiterKind::Invisible => '$', + }, + spacing: tt::Spacing::Alone, + } + .into(); + parent.token_trees.push(leaf.into()); + parent.token_trees.extend(entry.token_trees); + } + + let subtree = stack.into_last().build(); + if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees { + first.clone() + } else { + subtree + } +} + +fn is_single_token_op(kind: SyntaxKind) -> bool { + matches!( + kind, + EQ | L_ANGLE + | R_ANGLE + | BANG + | AMP + | PIPE + | TILDE + | AT + | DOT + | COMMA + | SEMICOLON + | COLON + | POUND + | DOLLAR + | QUESTION + | PLUS + | MINUS + | STAR + | SLASH + | PERCENT + | CARET + // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an + // identifier. + | LIFETIME_IDENT + ) +} + +/// Returns the textual content of a doc comment block as a quoted string +/// That is, strips leading `///` (or `/**`, etc) +/// and strips the ending `*/` +/// And then quote the string, which is needed to convert to `tt::Literal` +/// +/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals. +pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) { + match mode { + DocCommentDesugarMode::Mbe => { + let mut num_of_hashes = 0; + let mut count = 0; + for ch in text.chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, + }; + num_of_hashes = num_of_hashes.max(count); + } + + // Quote raw string with delimiters + (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes)) + } + // Quote string with delimiters + DocCommentDesugarMode::ProcMacro => { + (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str) + } + } +} + +fn convert_doc_comment<S: Copy>( + token: &syntax::SyntaxToken, + span: S, + mode: DocCommentDesugarMode, +) -> Option<Vec<tt::TokenTree<S>>> { + let comment = ast::Comment::cast(token.clone())?; + let doc = comment.kind().doc?; + + let mk_ident = |s: &str| { + tt::TokenTree::from(tt::Leaf::from(tt::Ident { + sym: Symbol::intern(s), + span, + is_raw: tt::IdentIsRaw::No, + })) + }; + + let mk_punct = |c: char| { + tt::TokenTree::from(tt::Leaf::from(tt::Punct { + char: c, + spacing: tt::Spacing::Alone, + span, + })) + }; + + let mk_doc_literal = |comment: &ast::Comment| { + let prefix_len = comment.prefix().len(); + let mut text = &comment.text()[prefix_len..]; + + // Remove ending "*/" + if comment.kind().shape == ast::CommentShape::Block { + text = &text[0..text.len() - 2]; + } + let (text, kind) = desugar_doc_comment_text(text, mode); + let lit = tt::Literal { symbol: text, span, kind, suffix: None }; + + tt::TokenTree::from(tt::Leaf::from(lit)) + }; + + // Make `doc="\" Comments\"" + let meta_tkns = Box::new([mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]); + + // Make `#![]` + let mut token_trees = Vec::with_capacity(3); + token_trees.push(mk_punct('#')); + if let ast::CommentPlacement::Inner = doc { + token_trees.push(mk_punct('!')); + } + token_trees.push(tt::TokenTree::from(tt::Subtree { + delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, + token_trees: meta_tkns, + })); + + Some(token_trees) +} + +/// A raw token (straight from lexer) converter +struct RawConverter<'a, Ctx> { + lexed: parser::LexedStr<'a>, + pos: usize, + anchor: SpanAnchor, + ctx: Ctx, + mode: DocCommentDesugarMode, +} +/// A raw token (straight from lexer) converter that gives every token the same span. +struct StaticRawConverter<'a, S> { + lexed: parser::LexedStr<'a>, + pos: usize, + span: S, + mode: DocCommentDesugarMode, +} + +trait SrcToken<Ctx, S> { + fn kind(&self, ctx: &Ctx) -> SyntaxKind; + + fn to_char(&self, ctx: &Ctx) -> Option<char>; + + fn to_text(&self, ctx: &Ctx) -> SmolStr; + + fn as_leaf(&self) -> Option<&tt::Leaf<S>> { + None + } +} + +trait TokenConverter<S>: Sized { + type Token: SrcToken<Self, S>; + + fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>>; + + fn bump(&mut self) -> Option<(Self::Token, TextRange)>; + + fn peek(&self) -> Option<Self::Token>; + + fn span_for(&self, range: TextRange) -> S; + + fn call_site(&self) -> S; +} + +impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize { + fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind { + ctx.lexed.kind(*self) + } + + fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> { + ctx.lexed.text(*self).chars().next() + } + + fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr { + ctx.lexed.text(*self).into() + } +} + +impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize { + fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind { + ctx.lexed.kind(*self) + } + + fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> { + ctx.lexed.text(*self).chars().next() + } + + fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr { + ctx.lexed.text(*self).into() + } +} + +impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx> +where + SpanData<Ctx>: Copy, +{ + type Token = usize; + + fn convert_doc_comment( + &self, + &token: &usize, + span: SpanData<Ctx>, + ) -> Option<Vec<tt::TokenTree<SpanData<Ctx>>>> { + let text = self.lexed.text(token); + convert_doc_comment(&doc_comment(text), span, self.mode) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + if self.pos == self.lexed.len() { + return None; + } + let token = self.pos; + self.pos += 1; + let range = self.lexed.text_range(token); + let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); + + Some((token, range)) + } + + fn peek(&self) -> Option<Self::Token> { + if self.pos == self.lexed.len() { + return None; + } + Some(self.pos) + } + + fn span_for(&self, range: TextRange) -> SpanData<Ctx> { + SpanData { range, anchor: self.anchor, ctx: self.ctx } + } + + fn call_site(&self) -> SpanData<Ctx> { + SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx } + } +} + +impl<S> TokenConverter<S> for StaticRawConverter<'_, S> +where + S: Copy, +{ + type Token = usize; + + fn convert_doc_comment(&self, &token: &usize, span: S) -> Option<Vec<tt::TokenTree<S>>> { + let text = self.lexed.text(token); + convert_doc_comment(&doc_comment(text), span, self.mode) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + if self.pos == self.lexed.len() { + return None; + } + let token = self.pos; + self.pos += 1; + let range = self.lexed.text_range(token); + let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); + + Some((token, range)) + } + + fn peek(&self) -> Option<Self::Token> { + if self.pos == self.lexed.len() { + return None; + } + Some(self.pos) + } + + fn span_for(&self, _: TextRange) -> S { + self.span + } + + fn call_site(&self) -> S { + self.span + } +} + +struct Converter<SpanMap, S> { + current: Option<SyntaxToken>, + current_leaves: Vec<tt::Leaf<S>>, + preorder: PreorderWithTokens, + range: TextRange, + punct_offset: Option<(SyntaxToken, TextSize)>, + /// Used to make the emitted text ranges in the spans relative to the span anchor. + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, + remove: FxHashSet<SyntaxElement>, + call_site: S, + mode: DocCommentDesugarMode, +} + +impl<SpanMap, S> Converter<SpanMap, S> { + fn new( + node: &SyntaxNode, + map: SpanMap, + append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, + remove: FxHashSet<SyntaxElement>, + call_site: S, + mode: DocCommentDesugarMode, + ) -> Self { + let mut this = Converter { + current: None, + preorder: node.preorder_with_tokens(), + range: node.text_range(), + punct_offset: None, + map, + append, + remove, + call_site, + current_leaves: vec![], + mode, + }; + let first = this.next_token(); + this.current = first; + this + } + + fn next_token(&mut self) -> Option<SyntaxToken> { + while let Some(ev) = self.preorder.next() { + match ev { + WalkEvent::Enter(token) => { + if self.remove.contains(&token) { + match token { + syntax::NodeOrToken::Token(_) => { + continue; + } + node => { + self.preorder.skip_subtree(); + if let Some(mut v) = self.append.remove(&node) { + v.reverse(); + self.current_leaves.extend(v); + return None; + } + } + } + } else if let syntax::NodeOrToken::Token(token) = token { + return Some(token); + } + } + WalkEvent::Leave(ele) => { + if let Some(mut v) = self.append.remove(&ele) { + v.reverse(); + self.current_leaves.extend(v); + return None; + } + } + } + } + None + } +} + +#[derive(Debug)] +enum SynToken<S> { + Ordinary(SyntaxToken), + Punct { token: SyntaxToken, offset: usize }, + Leaf(tt::Leaf<S>), +} + +impl<S> SynToken<S> { + fn token(&self) -> &SyntaxToken { + match self { + SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it, + SynToken::Leaf(_) => unreachable!(), + } + } +} + +impl<SpanMap, S> SrcToken<Converter<SpanMap, S>, S> for SynToken<S> { + fn kind(&self, _ctx: &Converter<SpanMap, S>) -> SyntaxKind { + match self { + SynToken::Ordinary(token) => token.kind(), + SynToken::Punct { token, offset: i } => { + SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap() + } + SynToken::Leaf(_) => { + never!(); + SyntaxKind::ERROR + } + } + } + fn to_char(&self, _ctx: &Converter<SpanMap, S>) -> Option<char> { + match self { + SynToken::Ordinary(_) => None, + SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i), + SynToken::Leaf(_) => None, + } + } + fn to_text(&self, _ctx: &Converter<SpanMap, S>) -> SmolStr { + match self { + SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(), + SynToken::Leaf(_) => { + never!(); + "".into() + } + } + } + fn as_leaf(&self) -> Option<&tt::Leaf<S>> { + match self { + SynToken::Ordinary(_) | SynToken::Punct { .. } => None, + SynToken::Leaf(it) => Some(it), + } + } +} + +impl<S, SpanMap> TokenConverter<S> for Converter<SpanMap, S> +where + S: Copy, + SpanMap: SpanMapper<S>, +{ + type Token = SynToken<S>; + fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>> { + convert_doc_comment(token.token(), span, self.mode) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + if let Some((punct, offset)) = self.punct_offset.clone() { + if usize::from(offset) + 1 < punct.text().len() { + let offset = offset + TextSize::of('.'); + let range = punct.text_range(); + self.punct_offset = Some((punct.clone(), offset)); + let range = TextRange::at(range.start() + offset, TextSize::of('.')); + return Some(( + SynToken::Punct { token: punct, offset: u32::from(offset) as usize }, + range, + )); + } + } + + if let Some(leaf) = self.current_leaves.pop() { + if self.current_leaves.is_empty() { + self.current = self.next_token(); + } + return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0)))); + } + + let curr = self.current.clone()?; + if !self.range.contains_range(curr.text_range()) { + return None; + } + + self.current = self.next_token(); + let token = if curr.kind().is_punct() { + self.punct_offset = Some((curr.clone(), 0.into())); + let range = curr.text_range(); + let range = TextRange::at(range.start(), TextSize::of('.')); + (SynToken::Punct { token: curr, offset: 0_usize }, range) + } else { + self.punct_offset = None; + let range = curr.text_range(); + (SynToken::Ordinary(curr), range) + }; + + Some(token) + } + + fn peek(&self) -> Option<Self::Token> { + if let Some((punct, mut offset)) = self.punct_offset.clone() { + offset += TextSize::of('.'); + if usize::from(offset) < punct.text().len() { + return Some(SynToken::Punct { token: punct, offset: usize::from(offset) }); + } + } + + let curr = self.current.clone()?; + if !self.range.contains_range(curr.text_range()) { + return None; + } + + let token = if curr.kind().is_punct() { + SynToken::Punct { token: curr, offset: 0_usize } + } else { + SynToken::Ordinary(curr) + }; + Some(token) + } + + fn span_for(&self, range: TextRange) -> S { + self.map.span_for(range) + } + fn call_site(&self) -> S { + self.call_site + } +} + +struct TtTreeSink<'a, Ctx> +where + SpanData<Ctx>: Copy, +{ + buf: String, + cursor: Cursor<'a, SpanData<Ctx>>, + text_pos: TextSize, + inner: SyntaxTreeBuilder, + token_map: SpanMap<Ctx>, +} + +impl<'a, Ctx> TtTreeSink<'a, Ctx> +where + SpanData<Ctx>: Copy, +{ + fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self { + TtTreeSink { + buf: String::new(), + cursor, + text_pos: 0.into(), + inner: SyntaxTreeBuilder::default(), + token_map: SpanMap::empty(), + } + } + + fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) { + self.token_map.finish(); + (self.inner.finish(), self.token_map) + } +} + +fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { + let texts = match d { + tt::DelimiterKind::Parenthesis => "()", + tt::DelimiterKind::Brace => "{}", + tt::DelimiterKind::Bracket => "[]", + tt::DelimiterKind::Invisible => return None, + }; + + let idx = closing as usize; + Some(&texts[idx..texts.len() - (1 - idx)]) +} + +impl<Ctx> TtTreeSink<'_, Ctx> +where + SpanData<Ctx>: Copy + fmt::Debug, +{ + /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. + /// This occurs when a float literal is used as a field access. + fn float_split(&mut self, has_pseudo_dot: bool) { + let (text, span) = match self.cursor.token_tree() { + Some(tt::buffer::TokenTreeRef::Leaf( + tt::Leaf::Literal(tt::Literal { + symbol: text, + span, + kind: tt::LitKind::Float, + suffix: _, + }), + _, + )) => (text.as_str(), *span), + tt => unreachable!("{tt:?}"), + }; + // FIXME: Span splitting + match text.split_once('.') { + Some((left, right)) => { + assert!(!left.is_empty()); + + self.inner.start_node(SyntaxKind::NAME_REF); + self.inner.token(SyntaxKind::INT_NUMBER, left); + self.inner.finish_node(); + self.token_map.push(self.text_pos + TextSize::of(left), span); + + // here we move the exit up, the original exit has been deleted in process + self.inner.finish_node(); + + self.inner.token(SyntaxKind::DOT, "."); + self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span); + + if has_pseudo_dot { + assert!(right.is_empty(), "{left}.{right}"); + } else { + assert!(!right.is_empty(), "{left}.{right}"); + self.inner.start_node(SyntaxKind::NAME_REF); + self.inner.token(SyntaxKind::INT_NUMBER, right); + self.token_map.push(self.text_pos + TextSize::of(text), span); + self.inner.finish_node(); + + // the parser creates an unbalanced start node, we are required to close it here + self.inner.finish_node(); + } + self.text_pos += TextSize::of(text); + } + None => unreachable!(), + } + self.cursor = self.cursor.bump(); + } + + fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { + if kind == LIFETIME_IDENT { + n_tokens = 2; + } + + let mut last = self.cursor; + 'tokens: for _ in 0..n_tokens { + let tmp: u8; + if self.cursor.eof() { + break; + } + last = self.cursor; + let (text, span) = loop { + break match self.cursor.token_tree() { + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => match leaf { + tt::Leaf::Ident(ident) => { + if ident.is_raw.yes() { + self.buf.push_str("r#"); + self.text_pos += TextSize::of("r#"); + } + let r = (ident.sym.as_str(), ident.span); + self.cursor = self.cursor.bump(); + r + } + tt::Leaf::Punct(punct) => { + assert!(punct.char.is_ascii()); + tmp = punct.char as u8; + let r = ( + std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), + punct.span, + ); + self.cursor = self.cursor.bump(); + r + } + tt::Leaf::Literal(lit) => { + let buf_l = self.buf.len(); + format_to!(self.buf, "{lit}"); + debug_assert_ne!(self.buf.len() - buf_l, 0); + self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32); + self.token_map.push(self.text_pos, lit.span); + self.cursor = self.cursor.bump(); + continue 'tokens; + } + }, + Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { + self.cursor = self.cursor.subtree().unwrap(); + match delim_to_str(subtree.delimiter.kind, false) { + Some(it) => (it, subtree.delimiter.open), + None => continue, + } + } + None => { + let parent = self.cursor.end().unwrap(); + self.cursor = self.cursor.bump(); + match delim_to_str(parent.delimiter.kind, true) { + Some(it) => (it, parent.delimiter.close), + None => continue, + } + } + }; + }; + self.buf += text; + self.text_pos += TextSize::of(text); + self.token_map.push(self.text_pos, span); + } + + self.inner.token(kind, self.buf.as_str()); + self.buf.clear(); + // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it. + // Add whitespace between adjoint puncts + let next = last.bump(); + if let ( + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)), + ) = (last.token_tree(), next.token_tree()) + { + // Note: We always assume the semi-colon would be the last token in + // other parts of RA such that we don't add whitespace here. + // + // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't + // need to add whitespace either. + if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' { + self.inner.token(WHITESPACE, " "); + self.text_pos += TextSize::of(' '); + self.token_map.push(self.text_pos, curr.span); + } + } + } + + fn start_node(&mut self, kind: SyntaxKind) { + self.inner.start_node(kind); + } + + fn finish_node(&mut self) { + self.inner.finish_node(); + } + + fn error(&mut self, error: String) { + self.inner.error(error, self.text_pos) + } +} |