Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/proc-macro-api/src/legacy_protocol/msg/flat.rs')
| -rw-r--r-- | crates/proc-macro-api/src/legacy_protocol/msg/flat.rs | 628 |
1 files changed, 628 insertions, 0 deletions
diff --git a/crates/proc-macro-api/src/legacy_protocol/msg/flat.rs b/crates/proc-macro-api/src/legacy_protocol/msg/flat.rs new file mode 100644 index 0000000000..c194f30171 --- /dev/null +++ b/crates/proc-macro-api/src/legacy_protocol/msg/flat.rs @@ -0,0 +1,628 @@ +//! Serialization-friendly representation of `tt::TopSubtree`. +//! +//! It is possible to serialize `TopSubtree` recursively, as a tree, but using +//! arbitrary-nested trees in JSON is problematic, as they can cause the JSON +//! parser to overflow the stack. +//! +//! Additionally, such implementation would be pretty verbose, and we do care +//! about performance here a bit. +//! +//! So what this module does is dumping a `tt::TopSubtree` into a bunch of flat +//! array of numbers. See the test in the parent module to get an example +//! output. +//! +//! ```json +//! { +//! // Array of subtrees, each subtree is represented by 4 numbers: +//! // id of delimiter, delimiter kind, index of first child in `token_tree`, +//! // index of last child in `token_tree` +//! "subtree":[4294967295,0,0,5,2,2,5,5], +//! // 2 ints per literal: [token id, index into `text`] +//! "literal":[4294967295,1], +//! // 3 ints per punct: [token id, char, spacing] +//! "punct":[4294967295,64,1], +//! // 2 ints per ident: [token id, index into `text`] +//! "ident": [0,0,1,1], +//! // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag` +//! // where tag denotes one of subtree, literal, punct or ident. +//! "token_tree":[3,7,1,4], +//! // Strings shared by idents and literals +//! "text": ["struct","Foo"] +//! } +//! ``` +//! +//! We probably should replace most of the code here with bincode someday, but, +//! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for +//! the time being. + +use std::collections::VecDeque; + +use intern::Symbol; +use rustc_hash::FxHashMap; +use serde_derive::{Deserialize, Serialize}; +use span::{ + EditionedFileId, ErasedFileAstId, Span, SpanAnchor, SyntaxContextId, TextRange, TokenId, +}; + +use crate::legacy_protocol::msg::{ENCODE_CLOSE_SPAN_VERSION, EXTENDED_LEAF_DATA}; + +pub type SpanDataIndexMap = + indexmap::IndexSet<Span, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>; + +pub fn serialize_span_data_index_map(map: &SpanDataIndexMap) -> Vec<u32> { + map.iter() + .flat_map(|span| { + [ + span.anchor.file_id.as_u32(), + span.anchor.ast_id.into_raw(), + span.range.start().into(), + span.range.end().into(), + span.ctx.into_u32(), + ] + }) + .collect() +} + +pub fn deserialize_span_data_index_map(map: &[u32]) -> SpanDataIndexMap { + debug_assert!(map.len() % 5 == 0); + map.chunks_exact(5) + .map(|span| { + let &[file_id, ast_id, start, end, e] = span else { unreachable!() }; + Span { + anchor: SpanAnchor { + file_id: EditionedFileId::from_raw(file_id), + ast_id: ErasedFileAstId::from_raw(ast_id), + }, + range: TextRange::new(start.into(), end.into()), + ctx: SyntaxContextId::from_u32(e), + } + }) + .collect() +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct FlatTree { + subtree: Vec<u32>, + literal: Vec<u32>, + punct: Vec<u32>, + ident: Vec<u32>, + token_tree: Vec<u32>, + text: Vec<String>, +} + +struct SubtreeRepr { + open: TokenId, + close: TokenId, + kind: tt::DelimiterKind, + tt: [u32; 2], +} + +struct LiteralRepr { + id: TokenId, + text: u32, + suffix: u32, + kind: u16, +} + +struct PunctRepr { + id: TokenId, + char: char, + spacing: tt::Spacing, +} + +struct IdentRepr { + id: TokenId, + text: u32, + is_raw: bool, +} + +impl FlatTree { + pub fn new( + subtree: tt::SubtreeView<'_, Span>, + version: u32, + span_data_table: &mut SpanDataIndexMap, + ) -> FlatTree { + let mut w = Writer { + string_table: FxHashMap::default(), + work: VecDeque::new(), + span_data_table, + + subtree: Vec::new(), + literal: Vec::new(), + punct: Vec::new(), + ident: Vec::new(), + token_tree: Vec::new(), + text: Vec::new(), + version, + }; + w.write(subtree); + + FlatTree { + subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { + write_vec(w.subtree, SubtreeRepr::write_with_close_span) + } else { + write_vec(w.subtree, SubtreeRepr::write) + }, + literal: if version >= EXTENDED_LEAF_DATA { + write_vec(w.literal, LiteralRepr::write_with_kind) + } else { + write_vec(w.literal, LiteralRepr::write) + }, + punct: write_vec(w.punct, PunctRepr::write), + ident: if version >= EXTENDED_LEAF_DATA { + write_vec(w.ident, IdentRepr::write_with_rawness) + } else { + write_vec(w.ident, IdentRepr::write) + }, + token_tree: w.token_tree, + text: w.text, + } + } + + pub fn new_raw(subtree: tt::SubtreeView<'_, TokenId>, version: u32) -> FlatTree { + let mut w = Writer { + string_table: FxHashMap::default(), + work: VecDeque::new(), + span_data_table: &mut (), + + subtree: Vec::new(), + literal: Vec::new(), + punct: Vec::new(), + ident: Vec::new(), + token_tree: Vec::new(), + text: Vec::new(), + version, + }; + w.write(subtree); + + FlatTree { + subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { + write_vec(w.subtree, SubtreeRepr::write_with_close_span) + } else { + write_vec(w.subtree, SubtreeRepr::write) + }, + literal: if version >= EXTENDED_LEAF_DATA { + write_vec(w.literal, LiteralRepr::write_with_kind) + } else { + write_vec(w.literal, LiteralRepr::write) + }, + punct: write_vec(w.punct, PunctRepr::write), + ident: if version >= EXTENDED_LEAF_DATA { + write_vec(w.ident, IdentRepr::write_with_rawness) + } else { + write_vec(w.ident, IdentRepr::write) + }, + token_tree: w.token_tree, + text: w.text, + } + } + + pub fn to_subtree_resolved( + self, + version: u32, + span_data_table: &SpanDataIndexMap, + ) -> tt::TopSubtree<Span> { + Reader { + subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { + read_vec(self.subtree, SubtreeRepr::read_with_close_span) + } else { + read_vec(self.subtree, SubtreeRepr::read) + }, + literal: if version >= EXTENDED_LEAF_DATA { + read_vec(self.literal, LiteralRepr::read_with_kind) + } else { + read_vec(self.literal, LiteralRepr::read) + }, + punct: read_vec(self.punct, PunctRepr::read), + ident: if version >= EXTENDED_LEAF_DATA { + read_vec(self.ident, IdentRepr::read_with_rawness) + } else { + read_vec(self.ident, IdentRepr::read) + }, + token_tree: self.token_tree, + text: self.text, + span_data_table, + version, + } + .read() + } + + pub fn to_subtree_unresolved(self, version: u32) -> tt::TopSubtree<TokenId> { + Reader { + subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { + read_vec(self.subtree, SubtreeRepr::read_with_close_span) + } else { + read_vec(self.subtree, SubtreeRepr::read) + }, + literal: if version >= EXTENDED_LEAF_DATA { + read_vec(self.literal, LiteralRepr::read_with_kind) + } else { + read_vec(self.literal, LiteralRepr::read) + }, + punct: read_vec(self.punct, PunctRepr::read), + ident: if version >= EXTENDED_LEAF_DATA { + read_vec(self.ident, IdentRepr::read_with_rawness) + } else { + read_vec(self.ident, IdentRepr::read) + }, + token_tree: self.token_tree, + text: self.text, + span_data_table: &(), + version, + } + .read() + } +} + +fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> { + let mut chunks = xs.chunks_exact(N); + let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect(); + assert!(chunks.remainder().is_empty()); + res +} + +fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> { + xs.into_iter().flat_map(f).collect() +} + +impl SubtreeRepr { + fn write(self) -> [u32; 4] { + let kind = match self.kind { + tt::DelimiterKind::Invisible => 0, + tt::DelimiterKind::Parenthesis => 1, + tt::DelimiterKind::Brace => 2, + tt::DelimiterKind::Bracket => 3, + }; + [self.open.0, kind, self.tt[0], self.tt[1]] + } + fn read([open, kind, lo, len]: [u32; 4]) -> SubtreeRepr { + let kind = match kind { + 0 => tt::DelimiterKind::Invisible, + 1 => tt::DelimiterKind::Parenthesis, + 2 => tt::DelimiterKind::Brace, + 3 => tt::DelimiterKind::Bracket, + other => panic!("bad kind {other}"), + }; + SubtreeRepr { open: TokenId(open), close: TokenId(!0), kind, tt: [lo, len] } + } + fn write_with_close_span(self) -> [u32; 5] { + let kind = match self.kind { + tt::DelimiterKind::Invisible => 0, + tt::DelimiterKind::Parenthesis => 1, + tt::DelimiterKind::Brace => 2, + tt::DelimiterKind::Bracket => 3, + }; + [self.open.0, self.close.0, kind, self.tt[0], self.tt[1]] + } + fn read_with_close_span([open, close, kind, lo, len]: [u32; 5]) -> SubtreeRepr { + let kind = match kind { + 0 => tt::DelimiterKind::Invisible, + 1 => tt::DelimiterKind::Parenthesis, + 2 => tt::DelimiterKind::Brace, + 3 => tt::DelimiterKind::Bracket, + other => panic!("bad kind {other}"), + }; + SubtreeRepr { open: TokenId(open), close: TokenId(close), kind, tt: [lo, len] } + } +} + +impl LiteralRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read([id, text]: [u32; 2]) -> LiteralRepr { + LiteralRepr { id: TokenId(id), text, kind: 0, suffix: !0 } + } + fn write_with_kind(self) -> [u32; 4] { + [self.id.0, self.text, self.kind as u32, self.suffix] + } + fn read_with_kind([id, text, kind, suffix]: [u32; 4]) -> LiteralRepr { + LiteralRepr { id: TokenId(id), text, kind: kind as u16, suffix } + } +} + +impl PunctRepr { + fn write(self) -> [u32; 3] { + let spacing = match self.spacing { + tt::Spacing::Alone | tt::Spacing::JointHidden => 0, + tt::Spacing::Joint => 1, + }; + [self.id.0, self.char as u32, spacing] + } + fn read([id, char, spacing]: [u32; 3]) -> PunctRepr { + let spacing = match spacing { + 0 => tt::Spacing::Alone, + 1 => tt::Spacing::Joint, + other => panic!("bad spacing {other}"), + }; + PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing } + } +} + +impl IdentRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read(data: [u32; 2]) -> IdentRepr { + IdentRepr { id: TokenId(data[0]), text: data[1], is_raw: false } + } + fn write_with_rawness(self) -> [u32; 3] { + [self.id.0, self.text, self.is_raw as u32] + } + fn read_with_rawness([id, text, is_raw]: [u32; 3]) -> IdentRepr { + IdentRepr { id: TokenId(id), text, is_raw: is_raw == 1 } + } +} + +trait InternableSpan: Copy { + type Table; + fn token_id_of(table: &mut Self::Table, s: Self) -> TokenId; + fn span_for_token_id(table: &Self::Table, id: TokenId) -> Self; +} + +impl InternableSpan for TokenId { + type Table = (); + fn token_id_of((): &mut Self::Table, token_id: Self) -> TokenId { + token_id + } + + fn span_for_token_id((): &Self::Table, id: TokenId) -> Self { + id + } +} +impl InternableSpan for Span { + type Table = SpanDataIndexMap; + fn token_id_of(table: &mut Self::Table, span: Self) -> TokenId { + TokenId(table.insert_full(span).0 as u32) + } + fn span_for_token_id(table: &Self::Table, id: TokenId) -> Self { + *table.get_index(id.0 as usize).unwrap_or_else(|| &table[0]) + } +} + +struct Writer<'a, 'span, S: InternableSpan> { + work: VecDeque<(usize, tt::iter::TtIter<'a, S>)>, + string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>, + span_data_table: &'span mut S::Table, + version: u32, + + subtree: Vec<SubtreeRepr>, + literal: Vec<LiteralRepr>, + punct: Vec<PunctRepr>, + ident: Vec<IdentRepr>, + token_tree: Vec<u32>, + text: Vec<String>, +} + +impl<'a, S: InternableSpan> Writer<'a, '_, S> { + fn write(&mut self, root: tt::SubtreeView<'a, S>) { + let subtree = root.top_subtree(); + self.enqueue(subtree, root.iter()); + while let Some((idx, subtree)) = self.work.pop_front() { + self.subtree(idx, subtree); + } + } + + fn token_id_of(&mut self, span: S) -> TokenId { + S::token_id_of(self.span_data_table, span) + } + + fn subtree(&mut self, idx: usize, subtree: tt::iter::TtIter<'a, S>) { + let mut first_tt = self.token_tree.len(); + let n_tt = subtree.clone().count(); // FIXME: `count()` walks over the entire iterator. + self.token_tree.resize(first_tt + n_tt, !0); + + self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32]; + + for child in subtree { + let idx_tag = match child { + tt::iter::TtElement::Subtree(subtree, subtree_iter) => { + let idx = self.enqueue(subtree, subtree_iter); + idx << 2 + } + tt::iter::TtElement::Leaf(leaf) => match leaf { + tt::Leaf::Literal(lit) => { + let idx = self.literal.len() as u32; + let id = self.token_id_of(lit.span); + let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA { + ( + self.intern(lit.symbol.as_str()), + lit.suffix.as_ref().map(|s| self.intern(s.as_str())).unwrap_or(!0), + ) + } else { + (self.intern_owned(format!("{lit}")), !0) + }; + self.literal.push(LiteralRepr { + id, + text, + kind: u16::from_le_bytes(match lit.kind { + tt::LitKind::Err(_) => [0, 0], + tt::LitKind::Byte => [1, 0], + tt::LitKind::Char => [2, 0], + tt::LitKind::Integer => [3, 0], + tt::LitKind::Float => [4, 0], + tt::LitKind::Str => [5, 0], + tt::LitKind::StrRaw(r) => [6, r], + tt::LitKind::ByteStr => [7, 0], + tt::LitKind::ByteStrRaw(r) => [8, r], + tt::LitKind::CStr => [9, 0], + tt::LitKind::CStrRaw(r) => [10, r], + }), + suffix, + }); + (idx << 2) | 0b01 + } + tt::Leaf::Punct(punct) => { + let idx = self.punct.len() as u32; + let id = self.token_id_of(punct.span); + self.punct.push(PunctRepr { char: punct.char, spacing: punct.spacing, id }); + (idx << 2) | 0b10 + } + tt::Leaf::Ident(ident) => { + let idx = self.ident.len() as u32; + let id = self.token_id_of(ident.span); + let text = if self.version >= EXTENDED_LEAF_DATA { + self.intern(ident.sym.as_str()) + } else if ident.is_raw.yes() { + self.intern_owned(format!("r#{}", ident.sym.as_str(),)) + } else { + self.intern(ident.sym.as_str()) + }; + self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() }); + (idx << 2) | 0b11 + } + }, + }; + self.token_tree[first_tt] = idx_tag; + first_tt += 1; + } + } + + fn enqueue(&mut self, subtree: &'a tt::Subtree<S>, contents: tt::iter::TtIter<'a, S>) -> u32 { + let idx = self.subtree.len(); + let open = self.token_id_of(subtree.delimiter.open); + let close = self.token_id_of(subtree.delimiter.close); + let delimiter_kind = subtree.delimiter.kind; + self.subtree.push(SubtreeRepr { open, close, kind: delimiter_kind, tt: [!0, !0] }); + self.work.push_back((idx, contents)); + idx as u32 + } + + pub(crate) fn intern(&mut self, text: &'a str) -> u32 { + let table = &mut self.text; + *self.string_table.entry(text.into()).or_insert_with(|| { + let idx = table.len(); + table.push(text.to_owned()); + idx as u32 + }) + } + + pub(crate) fn intern_owned(&mut self, text: String) -> u32 { + let table = &mut self.text; + *self.string_table.entry(text.clone().into()).or_insert_with(|| { + let idx = table.len(); + table.push(text); + idx as u32 + }) + } +} + +struct Reader<'span, S: InternableSpan> { + version: u32, + subtree: Vec<SubtreeRepr>, + literal: Vec<LiteralRepr>, + punct: Vec<PunctRepr>, + ident: Vec<IdentRepr>, + token_tree: Vec<u32>, + text: Vec<String>, + span_data_table: &'span S::Table, +} + +impl<S: InternableSpan> Reader<'_, S> { + pub(crate) fn read(self) -> tt::TopSubtree<S> { + let mut res: Vec<Option<(tt::Delimiter<S>, Vec<tt::TokenTree<S>>)>> = + vec![None; self.subtree.len()]; + let read_span = |id| S::span_for_token_id(self.span_data_table, id); + for i in (0..self.subtree.len()).rev() { + let repr = &self.subtree[i]; + let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize]; + let delimiter = tt::Delimiter { + open: read_span(repr.open), + close: read_span(repr.close), + kind: repr.kind, + }; + let mut s = Vec::new(); + for &idx_tag in token_trees { + let tag = idx_tag & 0b11; + let idx = (idx_tag >> 2) as usize; + match tag { + // XXX: we iterate subtrees in reverse to guarantee + // that this unwrap doesn't fire. + 0b00 => { + let (delimiter, subtree) = res[idx].take().unwrap(); + s.push(tt::TokenTree::Subtree(tt::Subtree { + delimiter, + len: subtree.len() as u32, + })); + s.extend(subtree) + } + 0b01 => { + use tt::LitKind::*; + let repr = &self.literal[idx]; + let text = self.text[repr.text as usize].as_str(); + let span = read_span(repr.id); + s.push( + tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA { + tt::Literal { + symbol: Symbol::intern(text), + span, + kind: match u16::to_le_bytes(repr.kind) { + [0, _] => Err(()), + [1, _] => Byte, + [2, _] => Char, + [3, _] => Integer, + [4, _] => Float, + [5, _] => Str, + [6, r] => StrRaw(r), + [7, _] => ByteStr, + [8, r] => ByteStrRaw(r), + [9, _] => CStr, + [10, r] => CStrRaw(r), + _ => unreachable!(), + }, + suffix: if repr.suffix != !0 { + Some(Symbol::intern( + self.text[repr.suffix as usize].as_str(), + )) + } else { + None + }, + } + } else { + tt::token_to_literal(text, span) + }) + .into(), + ) + } + 0b10 => { + let repr = &self.punct[idx]; + s.push( + tt::Leaf::Punct(tt::Punct { + char: repr.char, + spacing: repr.spacing, + span: read_span(repr.id), + }) + .into(), + ) + } + 0b11 => { + let repr = &self.ident[idx]; + let text = self.text[repr.text as usize].as_str(); + let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA { + ( + if repr.is_raw { tt::IdentIsRaw::Yes } else { tt::IdentIsRaw::No }, + text, + ) + } else { + tt::IdentIsRaw::split_from_symbol(text) + }; + s.push( + tt::Leaf::Ident(tt::Ident { + sym: Symbol::intern(text), + span: read_span(repr.id), + is_raw, + }) + .into(), + ) + } + other => panic!("bad tag: {other}"), + } + } + res[i] = Some((delimiter, s)); + } + + let (delimiter, mut res) = res[0].take().unwrap(); + res.insert(0, tt::TokenTree::Subtree(tt::Subtree { delimiter, len: res.len() as u32 })); + tt::TopSubtree(res.into_boxed_slice()) + } +} |