Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/proc-macro-api/src/msg/flat.rs')
| -rw-r--r-- | crates/proc-macro-api/src/msg/flat.rs | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/crates/proc-macro-api/src/msg/flat.rs b/crates/proc-macro-api/src/msg/flat.rs new file mode 100644 index 0000000000..8437444e18 --- /dev/null +++ b/crates/proc-macro-api/src/msg/flat.rs @@ -0,0 +1,328 @@ +//! Serialization-friendly representation of `tt::Subtree`. +//! +//! It is possible to serialize `Subtree` as is, as a tree, but using +//! arbitrary-nested trees in JSON is problematic, as they can cause the JSON +//! parser to overflow the stack. +//! +//! Additionally, such implementation would be pretty verbose, and we do care +//! about performance here a bit. +//! +//! So what this module does is dumping a `tt::Subtree` into a bunch of flat +//! array of numbers. See the test in the parent module to get an example +//! output. +//! +//! ```json +//! { +//! // Array of subtrees, each subtree is represented by 4 numbers: +//! // id of delimiter, delimiter kind, index of first child in `token_tree`, +//! // index of last child in `token_tree` +//! "subtree":[4294967295,0,0,5,2,2,5,5], +//! // 2 ints per literal: [token id, index into `text`] +//! "literal":[4294967295,1], +//! // 3 ints per punct: [token id, char, spacing] +//! "punct":[4294967295,64,1], +//! // 2 ints per ident: [token id, index into `text`] +//! "ident": [0,0,1,1], +//! // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag` +//! // where tag denotes one of subtree, literal, punct or ident. +//! "token_tree":[3,7,1,4], +//! // Strings shared by idents and literals +//! "text": ["struct","Foo"] +//! } +//! ``` +//! +//! We probably should replace most of the code here with bincode someday, but, +//! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for +//! the time being. + +use std::{ + collections::{HashMap, VecDeque}, + convert::TryInto, +}; + +use serde::{Deserialize, Serialize}; +use tt::TokenId; + +#[derive(Serialize, Deserialize, Debug)] +pub struct FlatTree { + subtree: Vec<u32>, + literal: Vec<u32>, + punct: Vec<u32>, + ident: Vec<u32>, + token_tree: Vec<u32>, + text: Vec<String>, +} + +struct SubtreeRepr { + id: tt::TokenId, + kind: Option<tt::DelimiterKind>, + tt: [u32; 2], +} + +struct LiteralRepr { + id: tt::TokenId, + text: u32, +} + +struct PunctRepr { + id: tt::TokenId, + char: char, + spacing: tt::Spacing, +} + +struct IdentRepr { + id: tt::TokenId, + text: u32, +} + +impl FlatTree { + pub fn new(subtree: &tt::Subtree) -> FlatTree { + let mut w = Writer { + string_table: HashMap::new(), + work: VecDeque::new(), + + subtree: Vec::new(), + literal: Vec::new(), + punct: Vec::new(), + ident: Vec::new(), + token_tree: Vec::new(), + text: Vec::new(), + }; + w.write(subtree); + + return FlatTree { + subtree: write_vec(w.subtree, SubtreeRepr::write), + literal: write_vec(w.literal, LiteralRepr::write), + punct: write_vec(w.punct, PunctRepr::write), + ident: write_vec(w.ident, IdentRepr::write), + token_tree: w.token_tree, + text: w.text, + }; + + fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> { + xs.into_iter().flat_map(f).collect() + } + } + + pub fn to_subtree(self) -> tt::Subtree { + return Reader { + subtree: read_vec(self.subtree, SubtreeRepr::read), + literal: read_vec(self.literal, LiteralRepr::read), + punct: read_vec(self.punct, PunctRepr::read), + ident: read_vec(self.ident, IdentRepr::read), + token_tree: self.token_tree, + text: self.text, + } + .read(); + + fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> { + let mut chunks = xs.chunks_exact(N); + let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect(); + assert!(chunks.remainder().is_empty()); + res + } + } +} + +impl SubtreeRepr { + fn write(self) -> [u32; 4] { + let kind = match self.kind { + None => 0, + Some(tt::DelimiterKind::Parenthesis) => 1, + Some(tt::DelimiterKind::Brace) => 2, + Some(tt::DelimiterKind::Bracket) => 3, + }; + [self.id.0, kind, self.tt[0], self.tt[1]] + } + fn read([id, kind, lo, len]: [u32; 4]) -> SubtreeRepr { + let kind = match kind { + 0 => None, + 1 => Some(tt::DelimiterKind::Parenthesis), + 2 => Some(tt::DelimiterKind::Brace), + 3 => Some(tt::DelimiterKind::Bracket), + other => panic!("bad kind {}", other), + }; + SubtreeRepr { id: TokenId(id), kind, tt: [lo, len] } + } +} + +impl LiteralRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read([id, text]: [u32; 2]) -> LiteralRepr { + LiteralRepr { id: TokenId(id), text } + } +} + +impl PunctRepr { + fn write(self) -> [u32; 3] { + let spacing = match self.spacing { + tt::Spacing::Alone => 0, + tt::Spacing::Joint => 1, + }; + [self.id.0, self.char as u32, spacing] + } + fn read([id, char, spacing]: [u32; 3]) -> PunctRepr { + let spacing = match spacing { + 0 => tt::Spacing::Alone, + 1 => tt::Spacing::Joint, + other => panic!("bad spacing {}", other), + }; + PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing } + } +} + +impl IdentRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read(data: [u32; 2]) -> IdentRepr { + IdentRepr { id: TokenId(data[0]), text: data[1] } + } +} + +struct Writer<'a> { + work: VecDeque<(usize, &'a tt::Subtree)>, + string_table: HashMap<&'a str, u32>, + + subtree: Vec<SubtreeRepr>, + literal: Vec<LiteralRepr>, + punct: Vec<PunctRepr>, + ident: Vec<IdentRepr>, + token_tree: Vec<u32>, + text: Vec<String>, +} + +impl<'a> Writer<'a> { + fn write(&mut self, root: &'a tt::Subtree) { + self.enqueue(root); + while let Some((idx, subtree)) = self.work.pop_front() { + self.subtree(idx, subtree); + } + } + + fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) { + let mut first_tt = self.token_tree.len(); + let n_tt = subtree.token_trees.len(); + self.token_tree.resize(first_tt + n_tt, !0); + + self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32]; + + for child in &subtree.token_trees { + let idx_tag = match child { + tt::TokenTree::Subtree(it) => { + let idx = self.enqueue(it); + idx << 2 | 0b00 + } + tt::TokenTree::Leaf(leaf) => match leaf { + tt::Leaf::Literal(lit) => { + let idx = self.literal.len() as u32; + let text = self.intern(&lit.text); + self.literal.push(LiteralRepr { id: lit.id, text }); + idx << 2 | 0b01 + } + tt::Leaf::Punct(punct) => { + let idx = self.punct.len() as u32; + self.punct.push(PunctRepr { + char: punct.char, + spacing: punct.spacing, + id: punct.id, + }); + idx << 2 | 0b10 + } + tt::Leaf::Ident(ident) => { + let idx = self.ident.len() as u32; + let text = self.intern(&ident.text); + self.ident.push(IdentRepr { id: ident.id, text }); + idx << 2 | 0b11 + } + }, + }; + self.token_tree[first_tt] = idx_tag; + first_tt += 1; + } + } + + fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 { + let idx = self.subtree.len(); + let delimiter_id = subtree.delimiter.map_or(TokenId::unspecified(), |it| it.id); + let delimiter_kind = subtree.delimiter.map(|it| it.kind); + self.subtree.push(SubtreeRepr { id: delimiter_id, kind: delimiter_kind, tt: [!0, !0] }); + self.work.push_back((idx, subtree)); + idx as u32 + } + + pub(crate) fn intern(&mut self, text: &'a str) -> u32 { + let table = &mut self.text; + *self.string_table.entry(text).or_insert_with(|| { + let idx = table.len(); + table.push(text.to_string()); + idx as u32 + }) + } +} + +struct Reader { + subtree: Vec<SubtreeRepr>, + literal: Vec<LiteralRepr>, + punct: Vec<PunctRepr>, + ident: Vec<IdentRepr>, + token_tree: Vec<u32>, + text: Vec<String>, +} + +impl Reader { + pub(crate) fn read(self) -> tt::Subtree { + let mut res: Vec<Option<tt::Subtree>> = vec![None; self.subtree.len()]; + for i in (0..self.subtree.len()).rev() { + let repr = &self.subtree[i]; + let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize]; + let s = tt::Subtree { + delimiter: repr.kind.map(|kind| tt::Delimiter { id: repr.id, kind }), + token_trees: token_trees + .iter() + .copied() + .map(|idx_tag| { + let tag = idx_tag & 0b11; + let idx = (idx_tag >> 2) as usize; + match tag { + // XXX: we iterate subtrees in reverse to guarantee + // that this unwrap doesn't fire. + 0b00 => res[idx].take().unwrap().into(), + 0b01 => { + let repr = &self.literal[idx]; + tt::Leaf::Literal(tt::Literal { + text: self.text[repr.text as usize].as_str().into(), + id: repr.id, + }) + .into() + } + 0b10 => { + let repr = &self.punct[idx]; + tt::Leaf::Punct(tt::Punct { + char: repr.char, + spacing: repr.spacing, + id: repr.id, + }) + .into() + } + 0b11 => { + let repr = &self.ident[idx]; + tt::Leaf::Ident(tt::Ident { + text: self.text[repr.text as usize].as_str().into(), + id: repr.id, + }) + .into() + } + other => panic!("bad tag: {}", other), + } + }) + .collect(), + }; + res[i] = Some(s); + } + + res[0].take().unwrap() + } +} |