Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/tt/src/lib.rs')
| -rw-r--r-- | crates/tt/src/lib.rs | 202 |
1 files changed, 188 insertions, 14 deletions
diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 369744d0e9..24fd0abada 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -12,8 +12,54 @@ use stdx::impl_from; pub use smol_str::SmolStr; pub use text_size::{TextRange, TextSize}; +#[derive(Clone, PartialEq, Debug)] +pub struct Lit { + pub kind: LitKind, + pub symbol: SmolStr, + pub suffix: Option<SmolStr>, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum IdentIsRaw { + No, + Yes, +} +impl IdentIsRaw { + pub fn yes(self) -> bool { + matches!(self, IdentIsRaw::Yes) + } + pub fn as_str(self) -> &'static str { + match self { + IdentIsRaw::No => "", + IdentIsRaw::Yes => "r#", + } + } + pub fn split_from_symbol(sym: &str) -> (Self, &str) { + if let Some(sym) = sym.strip_prefix("r#") { + (IdentIsRaw::Yes, sym) + } else { + (IdentIsRaw::No, sym) + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum LitKind { + Byte, + Char, + Integer, // e.g. `1`, `1u8`, `1f32` + Float, // e.g. `1.`, `1.0`, `1e3f32` + Str, + StrRaw(u8), // raw string delimited by `n` hash symbols + ByteStr, + ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), + Err(()), +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TokenTree<S> { +pub enum TokenTree<S = u32> { Leaf(Leaf<S>), Subtree(Subtree<S>), } @@ -103,6 +149,15 @@ pub struct DelimSpan<S> { pub close: S, } +impl<Span: Copy> DelimSpan<Span> { + pub fn from_single(sp: Span) -> Self { + DelimSpan { open: sp, close: sp } + } + + pub fn from_pair(open: Span, close: Span) -> Self { + DelimSpan { open, close } + } +} #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct Delimiter<S> { pub open: S, @@ -134,8 +189,11 @@ pub enum DelimiterKind { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Literal<S> { + // escaped pub text: SmolStr, pub span: S, + pub kind: LitKind, + pub suffix: Option<Box<SmolStr>>, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -145,23 +203,85 @@ pub struct Punct<S> { pub span: S, } +/// Indicates whether a token can join with the following token to form a +/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to +/// guide pretty-printing, which is where the `JointHidden` value (which isn't +/// part of `proc_macro::Spacing`) comes in useful. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Spacing { + /// The token cannot join with the following token to form a compound + /// token. + /// + /// In token streams parsed from source code, the compiler will use `Alone` + /// for any token immediately followed by whitespace, a non-doc comment, or + /// EOF. + /// + /// When constructing token streams within the compiler, use this for each + /// token that (a) should be pretty-printed with a space after it, or (b) + /// is the last token in the stream. (In the latter case the choice of + /// spacing doesn't matter because it is never used for the last token. We + /// arbitrarily use `Alone`.) + /// + /// Converts to `proc_macro::Spacing::Alone`, and + /// `proc_macro::Spacing::Alone` converts back to this. Alone, - /// Whether the following token is joint to this one. + + /// The token can join with the following token to form a compound token. + /// + /// In token streams parsed from source code, the compiler will use `Joint` + /// for any token immediately followed by punctuation (as determined by + /// `Token::is_punct`). + /// + /// When constructing token streams within the compiler, use this for each + /// token that (a) should be pretty-printed without a space after it, and + /// (b) is followed by a punctuation token. + /// + /// Converts to `proc_macro::Spacing::Joint`, and + /// `proc_macro::Spacing::Joint` converts back to this. Joint, + + /// The token can join with the following token to form a compound token, + /// but this will not be visible at the proc macro level. (This is what the + /// `Hidden` means; see below.) + /// + /// In token streams parsed from source code, the compiler will use + /// `JointHidden` for any token immediately followed by anything not + /// covered by the `Alone` and `Joint` cases: an identifier, lifetime, + /// literal, delimiter, doc comment. + /// + /// When constructing token streams, use this for each token that (a) + /// should be pretty-printed without a space after it, and (b) is followed + /// by a non-punctuation token. + /// + /// Converts to `proc_macro::Spacing::Alone`, but + /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`. + /// Because of that, pretty-printing of `TokenStream`s produced by proc + /// macros is unavoidably uglier (with more whitespace between tokens) than + /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed + /// source code, internally constructed token streams, and token streams + /// produced by declarative macros). + JointHidden, } +/// Identifier or keyword. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -/// Identifier or keyword. Unlike rustc, we keep "r#" prefix when it represents a raw identifier. pub struct Ident<S> { pub text: SmolStr, pub span: S, + pub is_raw: IdentIsRaw, } impl<S> Ident<S> { - pub fn new(text: impl Into<SmolStr>, span: S) -> Self { - Ident { text: text.into(), span } + pub fn new(text: impl Into<SmolStr> + AsRef<str>, span: S) -> Self { + let t = text.as_ref(); + // let raw_stripped = IdentIsRaw::split_from_symbol(text.as_ref()); + let raw_stripped = t.strip_prefix("r#"); + let is_raw = if raw_stripped.is_none() { IdentIsRaw::No } else { IdentIsRaw::Yes }; + let text = match raw_stripped { + Some(derawed) => derawed.into(), + None => text.into(), + }; + Ident { text, span, is_raw } } } @@ -207,22 +327,35 @@ fn print_debug_token<S: fmt::Debug>( match tkn { TokenTree::Leaf(leaf) => match leaf { Leaf::Literal(lit) => { - write!(f, "{}LITERAL {}", align, lit.text)?; - fmt::Debug::fmt(&lit.span, f)?; + write!( + f, + "{}LITERAL {:?} {}{} {:#?}", + align, + lit.kind, + lit.text, + lit.suffix.as_ref().map(|it| &***it).unwrap_or(""), + lit.span + )?; } Leaf::Punct(punct) => { write!( f, - "{}PUNCH {} [{}] ", + "{}PUNCH {} [{}] {:#?}", align, punct.char, if punct.spacing == Spacing::Alone { "alone" } else { "joint" }, + punct.span )?; - fmt::Debug::fmt(&punct.span, f)?; } Leaf::Ident(ident) => { - write!(f, "{}IDENT {} ", align, ident.text)?; - fmt::Debug::fmt(&ident.span, f)?; + write!( + f, + "{}IDENT {}{} {:#?}", + align, + ident.is_raw.as_str(), + ident.text, + ident.span + )?; } }, TokenTree::Subtree(subtree) => { @@ -288,13 +421,52 @@ impl<S> fmt::Display for Leaf<S> { impl<S> fmt::Display for Ident<S> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.is_raw.as_str(), f)?; fmt::Display::fmt(&self.text, f) } } impl<S> fmt::Display for Literal<S> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.text, f) + match self.kind { + LitKind::Byte => write!(f, "b'{}'", self.text), + LitKind::Char => write!(f, "'{}'", self.text), + LitKind::Integer | LitKind::Float | LitKind::Err(_) => write!(f, "{}", self.text), + LitKind::Str => write!(f, "\"{}\"", self.text), + LitKind::ByteStr => write!(f, "b\"{}\"", self.text), + LitKind::CStr => write!(f, "c\"{}\"", self.text), + LitKind::StrRaw(num_of_hashes) => { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"r{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#, + "", + text = self.text + ) + } + LitKind::ByteStrRaw(num_of_hashes) => { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"br{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#, + "", + text = self.text + ) + } + LitKind::CStrRaw(num_of_hashes) => { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"cr{0:#<num_of_hashes$}"{text}"{0:#<num_of_hashes$}"#, + "", + text = self.text + ) + } + }?; + if let Some(suffix) = &self.suffix { + write!(f, "{}", suffix)?; + } + Ok(()) } } @@ -339,7 +511,7 @@ impl<S> Subtree<S> { let s = match it { Leaf::Literal(it) => it.text.to_string(), Leaf::Punct(it) => it.char.to_string(), - Leaf::Ident(it) => it.text.to_string(), + Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.text), }; match (it, last) { (Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => { @@ -369,7 +541,9 @@ impl<S> Subtree<S> { pub fn pretty<S>(tkns: &[TokenTree<S>]) -> String { fn tokentree_to_text<S>(tkn: &TokenTree<S>) -> String { match tkn { - TokenTree::Leaf(Leaf::Ident(ident)) => ident.text.clone().into(), + TokenTree::Leaf(Leaf::Ident(ident)) => { + format!("{}{}", ident.is_raw.as_str(), ident.text) + } TokenTree::Leaf(Leaf::Literal(literal)) => literal.text.clone().into(), TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char), TokenTree::Subtree(subtree) => { |