Unnamed repository; edit this file 'description' to name the repository.
Auto merge of #17601 - Veykril:proc-macro-fix, r=Veykril
Fix incorrect encoding of literals in the proc-macro-api on version 4 Quick follow up on https://github.com/rust-lang/rust-analyzer/pull/17559 breaking things
bors 2024-07-15
parent f913901 · parent 05ce57e · commit 0c95aaa
-rw-r--r--Cargo.lock3
-rw-r--r--crates/hir-expand/src/attrs.rs7
-rw-r--r--crates/hir-expand/src/lib.rs2
-rw-r--r--crates/mbe/Cargo.toml3
-rw-r--r--crates/mbe/src/lib.rs9
-rw-r--r--crates/mbe/src/syntax_bridge.rs53
-rw-r--r--crates/proc-macro-api/Cargo.toml2
-rw-r--r--crates/proc-macro-api/src/msg.rs53
-rw-r--r--crates/proc-macro-api/src/msg/flat.rs118
-rw-r--r--crates/proc-macro-srv/Cargo.toml2
-rw-r--r--crates/tt/Cargo.toml4
-rw-r--r--crates/tt/src/lib.rs61
12 files changed, 183 insertions, 134 deletions
diff --git a/Cargo.lock b/Cargo.lock
index e43f712a6e..500a150b57 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1046,7 +1046,6 @@ dependencies = [
"arrayvec",
"cov-mark",
"parser",
- "ra-ap-rustc_lexer",
"rustc-hash",
"smallvec",
"span",
@@ -1326,6 +1325,7 @@ dependencies = [
"base-db",
"indexmap",
"la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "mbe",
"paths",
"rustc-hash",
"serde",
@@ -2218,6 +2218,7 @@ name = "tt"
version = "0.0.0"
dependencies = [
"arrayvec",
+ "ra-ap-rustc_lexer",
"smol_str",
"stdx",
"text-size",
diff --git a/crates/hir-expand/src/attrs.rs b/crates/hir-expand/src/attrs.rs
index 4fce7c1fde..49a104fa11 100644
--- a/crates/hir-expand/src/attrs.rs
+++ b/crates/hir-expand/src/attrs.rs
@@ -5,9 +5,10 @@ use base_db::CrateId;
use cfg::CfgExpr;
use either::Either;
use intern::{sym, Interned};
+
use mbe::{
- desugar_doc_comment_text, syntax_node_to_token_tree, token_to_literal, DelimiterKind,
- DocCommentDesugarMode, Punct,
+ desugar_doc_comment_text, syntax_node_to_token_tree, DelimiterKind, DocCommentDesugarMode,
+ Punct,
};
use smallvec::{smallvec, SmallVec};
use span::{Span, SyntaxContextId};
@@ -20,7 +21,7 @@ use crate::{
db::ExpandDatabase,
mod_path::ModPath,
span_map::SpanMapRef,
- tt::{self, Subtree},
+ tt::{self, token_to_literal, Subtree},
InFile,
};
diff --git a/crates/hir-expand/src/lib.rs b/crates/hir-expand/src/lib.rs
index c4921da610..3460d1ca3d 100644
--- a/crates/hir-expand/src/lib.rs
+++ b/crates/hir-expand/src/lib.rs
@@ -59,7 +59,7 @@ pub use span::{HirFileId, MacroCallId, MacroFileId};
pub mod tt {
pub use span::Span;
- pub use tt::{DelimiterKind, IdentIsRaw, LitKind, Spacing};
+ pub use tt::{token_to_literal, DelimiterKind, IdentIsRaw, LitKind, Spacing};
pub type Delimiter = ::tt::Delimiter<Span>;
pub type DelimSpan = ::tt::DelimSpan<Span>;
diff --git a/crates/mbe/Cargo.toml b/crates/mbe/Cargo.toml
index 7ce8aadfb3..1002de2104 100644
--- a/crates/mbe/Cargo.toml
+++ b/crates/mbe/Cargo.toml
@@ -17,7 +17,6 @@ rustc-hash.workspace = true
smallvec.workspace = true
tracing.workspace = true
arrayvec.workspace = true
-ra-ap-rustc_lexer.workspace = true
# local deps
syntax.workspace = true
@@ -30,7 +29,7 @@ span.workspace = true
test-utils.workspace = true
[features]
-in-rust-tree = ["parser/in-rust-tree", "syntax/in-rust-tree"]
+in-rust-tree = ["parser/in-rust-tree", "tt/in-rust-tree", "syntax/in-rust-tree"]
[lints]
workspace = true
diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs
index 8ab9269e95..44b056a1ac 100644
--- a/crates/mbe/src/lib.rs
+++ b/crates/mbe/src/lib.rs
@@ -6,13 +6,6 @@
//! The tests for this functionality live in another crate:
//! `hir_def::macro_expansion_tests::mbe`.
-#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
-
-#[cfg(not(feature = "in-rust-tree"))]
-extern crate ra_ap_rustc_lexer as rustc_lexer;
-#[cfg(feature = "in-rust-tree")]
-extern crate rustc_lexer;
-
mod expander;
mod parser;
mod syntax_bridge;
@@ -36,7 +29,7 @@ pub use tt::{Delimiter, DelimiterKind, Punct};
pub use crate::syntax_bridge::{
desugar_doc_comment_text, parse_exprs_with_sep, parse_to_token_tree,
parse_to_token_tree_static_span, syntax_node_to_token_tree, syntax_node_to_token_tree_modified,
- token_to_literal, token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
+ token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
};
pub use crate::syntax_bridge::dummy_test_span_utils::*;
diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs
index 3feddba210..4d66464932 100644
--- a/crates/mbe/src/syntax_bridge.rs
+++ b/crates/mbe/src/syntax_bridge.rs
@@ -4,7 +4,7 @@ use std::fmt;
use rustc_hash::{FxHashMap, FxHashSet};
use span::{Edition, SpanAnchor, SpanData, SpanMap};
-use stdx::{format_to, itertools::Itertools, never, non_empty_vec::NonEmptyVec};
+use stdx::{format_to, never, non_empty_vec::NonEmptyVec};
use syntax::{
ast::{self, make::tokens::doc_comment},
format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
@@ -14,6 +14,7 @@ use syntax::{
use tt::{
buffer::{Cursor, TokenBuffer},
iter::TtIter,
+ token_to_literal,
};
use crate::to_parser_input::to_parser_input;
@@ -400,56 +401,6 @@ where
}
}
-pub fn token_to_literal<S>(text: SmolStr, span: S) -> tt::Literal<S>
-where
- S: Copy,
-{
- use rustc_lexer::LiteralKind;
-
- let token = rustc_lexer::tokenize(&text).next_tuple();
- let Some((rustc_lexer::Token {
- kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
- ..
- },)) = token
- else {
- return tt::Literal { span, text, kind: tt::LitKind::Err(()), suffix: None };
- };
-
- let (kind, start_offset, end_offset) = match kind {
- LiteralKind::Int { .. } => (tt::LitKind::Integer, 0, 0),
- LiteralKind::Float { .. } => (tt::LitKind::Float, 0, 0),
- LiteralKind::Char { terminated } => (tt::LitKind::Char, 1, terminated as usize),
- LiteralKind::Byte { terminated } => (tt::LitKind::Byte, 2, terminated as usize),
- LiteralKind::Str { terminated } => (tt::LitKind::Str, 1, terminated as usize),
- LiteralKind::ByteStr { terminated } => (tt::LitKind::ByteStr, 2, terminated as usize),
- LiteralKind::CStr { terminated } => (tt::LitKind::CStr, 2, terminated as usize),
- LiteralKind::RawStr { n_hashes } => (
- tt::LitKind::StrRaw(n_hashes.unwrap_or_default()),
- 2 + n_hashes.unwrap_or_default() as usize,
- 1 + n_hashes.unwrap_or_default() as usize,
- ),
- LiteralKind::RawByteStr { n_hashes } => (
- tt::LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
- 3 + n_hashes.unwrap_or_default() as usize,
- 1 + n_hashes.unwrap_or_default() as usize,
- ),
- LiteralKind::RawCStr { n_hashes } => (
- tt::LitKind::CStrRaw(n_hashes.unwrap_or_default()),
- 3 + n_hashes.unwrap_or_default() as usize,
- 1 + n_hashes.unwrap_or_default() as usize,
- ),
- };
-
- let (lit, suffix) = text.split_at(suffix_start as usize);
- let lit = &lit[start_offset..lit.len() - end_offset];
- let suffix = match suffix {
- "" | "_" => None,
- suffix => Some(Box::new(suffix.into())),
- };
-
- tt::Literal { span, text: lit.into(), kind, suffix }
-}
-
fn is_single_token_op(kind: SyntaxKind) -> bool {
matches!(
kind,
diff --git a/crates/proc-macro-api/Cargo.toml b/crates/proc-macro-api/Cargo.toml
index 7f633d91ec..889eefa8b5 100644
--- a/crates/proc-macro-api/Cargo.toml
+++ b/crates/proc-macro-api/Cargo.toml
@@ -28,6 +28,8 @@ span.workspace = true
# InternIds for the syntax context
base-db.workspace = true
la-arena.workspace = true
+# only here to parse via token_to_literal
+mbe.workspace = true
[lints]
workspace = true
diff --git a/crates/proc-macro-api/src/msg.rs b/crates/proc-macro-api/src/msg.rs
index b5f3d0c3aa..6583504817 100644
--- a/crates/proc-macro-api/src/msg.rs
+++ b/crates/proc-macro-api/src/msg.rs
@@ -197,7 +197,7 @@ mod tests {
.into(),
),
TokenTree::Leaf(Leaf::Literal(Literal {
- text: "\"Foo\"".into(),
+ text: "Foo".into(),
span: Span {
range: TextRange::at(TextSize::new(10), TextSize::of("\"Foo\"")),
anchor,
@@ -263,32 +263,35 @@ mod tests {
#[test]
fn test_proc_macro_rpc_works() {
let tt = fixture_token_tree();
- let mut span_data_table = Default::default();
- let task = ExpandMacro {
- data: ExpandMacroData {
- macro_body: FlatTree::new(&tt, CURRENT_API_VERSION, &mut span_data_table),
- macro_name: Default::default(),
- attributes: None,
- has_global_spans: ExpnGlobals {
- serialize: true,
- def_site: 0,
- call_site: 0,
- mixed_site: 0,
+ for v in RUST_ANALYZER_SPAN_SUPPORT..=CURRENT_API_VERSION {
+ let mut span_data_table = Default::default();
+ let task = ExpandMacro {
+ data: ExpandMacroData {
+ macro_body: FlatTree::new(&tt, v, &mut span_data_table),
+ macro_name: Default::default(),
+ attributes: None,
+ has_global_spans: ExpnGlobals {
+ serialize: true,
+ def_site: 0,
+ call_site: 0,
+ mixed_site: 0,
+ },
+ span_data_table: Vec::new(),
},
- span_data_table: Vec::new(),
- },
- lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(),
- env: Default::default(),
- current_dir: Default::default(),
- };
+ lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(),
+ env: Default::default(),
+ current_dir: Default::default(),
+ };
- let json = serde_json::to_string(&task).unwrap();
- // println!("{}", json);
- let back: ExpandMacro = serde_json::from_str(&json).unwrap();
+ let json = serde_json::to_string(&task).unwrap();
+ // println!("{}", json);
+ let back: ExpandMacro = serde_json::from_str(&json).unwrap();
- assert_eq!(
- tt,
- back.data.macro_body.to_subtree_resolved(CURRENT_API_VERSION, &span_data_table)
- );
+ assert_eq!(
+ tt,
+ back.data.macro_body.to_subtree_resolved(v, &span_data_table),
+ "version: {v}"
+ );
+ }
}
}
diff --git a/crates/proc-macro-api/src/msg/flat.rs b/crates/proc-macro-api/src/msg/flat.rs
index 7f5afdb727..3d962e99d9 100644
--- a/crates/proc-macro-api/src/msg/flat.rs
+++ b/crates/proc-macro-api/src/msg/flat.rs
@@ -141,6 +141,7 @@ impl FlatTree {
ident: Vec::new(),
token_tree: Vec::new(),
text: Vec::new(),
+ version,
};
w.write(subtree);
@@ -178,6 +179,7 @@ impl FlatTree {
ident: Vec::new(),
token_tree: Vec::new(),
text: Vec::new(),
+ version,
};
w.write(subtree);
@@ -228,6 +230,7 @@ impl FlatTree {
token_tree: self.token_tree,
text: self.text,
span_data_table,
+ version,
}
.read()
}
@@ -253,6 +256,7 @@ impl FlatTree {
token_tree: self.token_tree,
text: self.text,
span_data_table: &(),
+ version,
}
.read()
}
@@ -386,8 +390,9 @@ impl InternableSpan for Span {
struct Writer<'a, 'span, S: InternableSpan> {
work: VecDeque<(usize, &'a tt::Subtree<S>)>,
- string_table: FxHashMap<&'a str, u32>,
+ string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>,
span_data_table: &'span mut S::Table,
+ version: u32,
subtree: Vec<SubtreeRepr>,
literal: Vec<LiteralRepr>,
@@ -425,9 +430,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
tt::TokenTree::Leaf(leaf) => match leaf {
tt::Leaf::Literal(lit) => {
let idx = self.literal.len() as u32;
- let text = self.intern(&lit.text);
let id = self.token_id_of(lit.span);
- let suffix = lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0);
+ let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA {
+ (
+ self.intern(&lit.text),
+ lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0),
+ )
+ } else {
+ (self.intern_owned(format!("{lit}")), !0)
+ };
self.literal.push(LiteralRepr {
id,
text,
@@ -456,13 +467,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
}
tt::Leaf::Ident(ident) => {
let idx = self.ident.len() as u32;
- let text = self.intern(&ident.text);
let id = self.token_id_of(ident.span);
- self.ident.push(IdentRepr {
- id,
- text,
- is_raw: ident.is_raw == tt::IdentIsRaw::Yes,
- });
+ let text = if self.version >= EXTENDED_LEAF_DATA {
+ self.intern(&ident.text)
+ } else if ident.is_raw.yes() {
+ self.intern_owned(format!("r#{}", ident.text,))
+ } else {
+ self.intern(&ident.text)
+ };
+ self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() });
idx << 2 | 0b11
}
},
@@ -484,15 +497,25 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
let table = &mut self.text;
- *self.string_table.entry(text).or_insert_with(|| {
+ *self.string_table.entry(text.into()).or_insert_with(|| {
let idx = table.len();
table.push(text.to_owned());
idx as u32
})
}
+
+ pub(crate) fn intern_owned(&mut self, text: String) -> u32 {
+ let table = &mut self.text;
+ *self.string_table.entry(text.clone().into()).or_insert_with(|| {
+ let idx = table.len();
+ table.push(text);
+ idx as u32
+ })
+ }
}
struct Reader<'span, S: InternableSpan> {
+ version: u32,
subtree: Vec<SubtreeRepr>,
literal: Vec<LiteralRepr>,
punct: Vec<PunctRepr>,
@@ -528,30 +551,36 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
0b01 => {
use tt::LitKind::*;
let repr = &self.literal[idx];
- tt::Leaf::Literal(tt::Literal {
- text: self.text[repr.text as usize].as_str().into(),
- span: read_span(repr.id),
- kind: match u16::to_le_bytes(repr.kind) {
- [0, _] => Err(()),
- [1, _] => Byte,
- [2, _] => Char,
- [3, _] => Integer,
- [4, _] => Float,
- [5, _] => Str,
- [6, r] => StrRaw(r),
- [7, _] => ByteStr,
- [8, r] => ByteStrRaw(r),
- [9, _] => CStr,
- [10, r] => CStrRaw(r),
- _ => unreachable!(),
- },
- suffix: if repr.suffix != !0 {
- Some(Box::new(
- self.text[repr.suffix as usize].as_str().into(),
- ))
- } else {
- None
- },
+ let text = self.text[repr.text as usize].as_str();
+ let span = read_span(repr.id);
+ tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA {
+ tt::Literal {
+ text: text.into(),
+ span,
+ kind: match u16::to_le_bytes(repr.kind) {
+ [0, _] => Err(()),
+ [1, _] => Byte,
+ [2, _] => Char,
+ [3, _] => Integer,
+ [4, _] => Float,
+ [5, _] => Str,
+ [6, r] => StrRaw(r),
+ [7, _] => ByteStr,
+ [8, r] => ByteStrRaw(r),
+ [9, _] => CStr,
+ [10, r] => CStrRaw(r),
+ _ => unreachable!(),
+ },
+ suffix: if repr.suffix != !0 {
+ Some(Box::new(
+ self.text[repr.suffix as usize].as_str().into(),
+ ))
+ } else {
+ None
+ },
+ }
+ } else {
+ tt::token_to_literal(text.into(), span)
})
.into()
}
@@ -566,14 +595,23 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
}
0b11 => {
let repr = &self.ident[idx];
+ let text = self.text[repr.text as usize].as_str();
+ let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA {
+ (
+ if repr.is_raw {
+ tt::IdentIsRaw::Yes
+ } else {
+ tt::IdentIsRaw::No
+ },
+ text,
+ )
+ } else {
+ tt::IdentIsRaw::split_from_symbol(text)
+ };
tt::Leaf::Ident(tt::Ident {
- text: self.text[repr.text as usize].as_str().into(),
+ text: text.into(),
span: read_span(repr.id),
- is_raw: if repr.is_raw {
- tt::IdentIsRaw::Yes
- } else {
- tt::IdentIsRaw::No
- },
+ is_raw,
})
.into()
}
diff --git a/crates/proc-macro-srv/Cargo.toml b/crates/proc-macro-srv/Cargo.toml
index 735f781c43..065701c05c 100644
--- a/crates/proc-macro-srv/Cargo.toml
+++ b/crates/proc-macro-srv/Cargo.toml
@@ -34,7 +34,7 @@ proc-macro-test.path = "./proc-macro-test"
[features]
sysroot-abi = []
-in-rust-tree = ["mbe/in-rust-tree", "sysroot-abi"]
+in-rust-tree = ["mbe/in-rust-tree", "tt/in-rust-tree","sysroot-abi"]
[lints]
workspace = true
diff --git a/crates/tt/Cargo.toml b/crates/tt/Cargo.toml
index 1311e2ddf8..1900635b99 100644
--- a/crates/tt/Cargo.toml
+++ b/crates/tt/Cargo.toml
@@ -17,6 +17,10 @@ smol_str.workspace = true
text-size.workspace = true
stdx.workspace = true
+ra-ap-rustc_lexer.workspace = true
+
+[features]
+in-rust-tree = []
[lints]
workspace = true
diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs
index 24fd0abada..c328b3f8a3 100644
--- a/crates/tt/src/lib.rs
+++ b/crates/tt/src/lib.rs
@@ -2,14 +2,21 @@
//! input and output) of macros. It closely mirrors `proc_macro` crate's
//! `TokenTree`.
+#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
+
+#[cfg(not(feature = "in-rust-tree"))]
+extern crate ra_ap_rustc_lexer as rustc_lexer;
+#[cfg(feature = "in-rust-tree")]
+extern crate rustc_lexer;
+
pub mod buffer;
pub mod iter;
use std::fmt;
-use stdx::impl_from;
+use stdx::{impl_from, itertools::Itertools as _};
-pub use smol_str::SmolStr;
+pub use smol_str::{format_smolstr, SmolStr};
pub use text_size::{TextRange, TextSize};
#[derive(Clone, PartialEq, Debug)]
@@ -196,6 +203,56 @@ pub struct Literal<S> {
pub suffix: Option<Box<SmolStr>>,
}
+pub fn token_to_literal<S>(text: SmolStr, span: S) -> Literal<S>
+where
+ S: Copy,
+{
+ use rustc_lexer::LiteralKind;
+
+ let token = rustc_lexer::tokenize(&text).next_tuple();
+ let Some((rustc_lexer::Token {
+ kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
+ ..
+ },)) = token
+ else {
+ return Literal { span, text, kind: LitKind::Err(()), suffix: None };
+ };
+
+ let (kind, start_offset, end_offset) = match kind {
+ LiteralKind::Int { .. } => (LitKind::Integer, 0, 0),
+ LiteralKind::Float { .. } => (LitKind::Float, 0, 0),
+ LiteralKind::Char { terminated } => (LitKind::Char, 1, terminated as usize),
+ LiteralKind::Byte { terminated } => (LitKind::Byte, 2, terminated as usize),
+ LiteralKind::Str { terminated } => (LitKind::Str, 1, terminated as usize),
+ LiteralKind::ByteStr { terminated } => (LitKind::ByteStr, 2, terminated as usize),
+ LiteralKind::CStr { terminated } => (LitKind::CStr, 2, terminated as usize),
+ LiteralKind::RawStr { n_hashes } => (
+ LitKind::StrRaw(n_hashes.unwrap_or_default()),
+ 2 + n_hashes.unwrap_or_default() as usize,
+ 1 + n_hashes.unwrap_or_default() as usize,
+ ),
+ LiteralKind::RawByteStr { n_hashes } => (
+ LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
+ 3 + n_hashes.unwrap_or_default() as usize,
+ 1 + n_hashes.unwrap_or_default() as usize,
+ ),
+ LiteralKind::RawCStr { n_hashes } => (
+ LitKind::CStrRaw(n_hashes.unwrap_or_default()),
+ 3 + n_hashes.unwrap_or_default() as usize,
+ 1 + n_hashes.unwrap_or_default() as usize,
+ ),
+ };
+
+ let (lit, suffix) = text.split_at(suffix_start as usize);
+ let lit = &lit[start_offset..lit.len() - end_offset];
+ let suffix = match suffix {
+ "" | "_" => None,
+ suffix => Some(Box::new(suffix.into())),
+ };
+
+ Literal { span, text: lit.into(), kind, suffix }
+}
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Punct<S> {
pub char: char,