Unnamed repository; edit this file 'description' to name the repository.
Merge pull request #22267 from ChayimFriedman2/lookup-table
perf: Improve performance of integer-based symbols
Chayim Refael Friedman 14 days ago
parent 868980d · parent 255249c · commit f7c0457
-rw-r--r--Cargo.lock1
-rw-r--r--Cargo.toml2
-rw-r--r--crates/hir-expand/src/builtin/quote.rs6
-rw-r--r--crates/hir-expand/src/name.rs56
-rw-r--r--crates/ide-db/src/prime_caches.rs4
-rw-r--r--crates/ide/src/navigation_target.rs2
-rw-r--r--crates/intern/Cargo.toml1
-rw-r--r--crates/intern/src/symbol.rs32
-rw-r--r--crates/intern/src/symbol/symbols.rs119
-rw-r--r--crates/mbe/src/expander/transcriber.rs6
10 files changed, 149 insertions, 80 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f47bff58bd..be9a8c4915 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1231,6 +1231,7 @@ dependencies = [
name = "intern"
version = "0.0.0"
dependencies = [
+ "arrayvec",
"dashmap",
"hashbrown 0.14.5",
"rayon",
diff --git a/Cargo.toml b/Cargo.toml
index 4a93c26ff7..45fc727b13 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,7 @@ exclude = ["crates/proc-macro-srv/proc-macro-test/imp"]
resolver = "2"
[workspace.package]
-rust-version = "1.91"
+rust-version = "1.95"
edition = "2024"
license = "MIT OR Apache-2.0"
authors = ["rust-analyzer team"]
diff --git a/crates/hir-expand/src/builtin/quote.rs b/crates/hir-expand/src/builtin/quote.rs
index 51c4e22516..d84756377f 100644
--- a/crates/hir-expand/src/builtin/quote.rs
+++ b/crates/hir-expand/src/builtin/quote.rs
@@ -199,9 +199,9 @@ impl<T: ToTokenTree + Clone> ToTokenTree for &T {
}
impl_to_to_tokentrees! {
- span: u32 => self { crate::tt::Literal{text_and_suffix: Symbol::integer(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
- span: usize => self { crate::tt::Literal{text_and_suffix: Symbol::integer(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
- span: i32 => self { crate::tt::Literal{text_and_suffix: Symbol::integer(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
+ span: u32 => self { crate::tt::Literal{text_and_suffix: sym::Integer::get(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
+ span: usize => self { crate::tt::Literal{text_and_suffix: sym::Integer::get(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
+ span: i32 => self { crate::tt::Literal{text_and_suffix: sym::Integer::get(self as _), span, kind: tt::LitKind::Integer, suffix_len: 0 } };
span: bool => self { crate::tt::Ident{sym: if self { sym::true_ } else { sym::false_ }, span, is_raw: tt::IdentIsRaw::No } };
_span: crate::tt::Leaf => self { self };
_span: crate::tt::Literal => self { self };
diff --git a/crates/hir-expand/src/name.rs b/crates/hir-expand/src/name.rs
index 0408a6943d..3ddc305f95 100644
--- a/crates/hir-expand/src/name.rs
+++ b/crates/hir-expand/src/name.rs
@@ -33,12 +33,14 @@ impl fmt::Debug for Name {
}
impl Ord for Name {
+ #[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.symbol.as_str().cmp(other.symbol.as_str())
}
}
impl PartialOrd for Name {
+ #[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
@@ -46,74 +48,62 @@ impl PartialOrd for Name {
// No need to strip `r#`, all comparisons are done against well-known symbols.
impl PartialEq<Symbol> for Name {
+ #[inline]
fn eq(&self, sym: &Symbol) -> bool {
self.symbol == *sym
}
}
impl PartialEq<&Symbol> for Name {
+ #[inline]
fn eq(&self, &sym: &&Symbol) -> bool {
self.symbol == *sym
}
}
impl PartialEq<Name> for Symbol {
+ #[inline]
fn eq(&self, name: &Name) -> bool {
*self == name.symbol
}
}
impl PartialEq<Name> for &Symbol {
+ #[inline]
fn eq(&self, name: &Name) -> bool {
**self == name.symbol
}
}
impl Name {
+ #[inline]
fn new_text(text: &str) -> Name {
Name { symbol: Symbol::intern(text), ctx: () }
}
+ #[inline]
pub fn new(text: &str, mut ctx: SyntaxContext) -> Name {
// For comparisons etc. we remove the edition, because sometimes we search for some `Name`
// and we don't know which edition it came from.
// Can't do that for all `SyntaxContextId`s because it breaks Salsa.
ctx.remove_root_edition();
_ = ctx;
- match text.strip_prefix("r#") {
- Some(text) => Self::new_text(text),
- None => Self::new_text(text),
- }
+ let text = text.strip_prefix("r#").unwrap_or(text);
+ Self::new_text(text)
}
+ #[inline]
pub fn new_root(text: &str) -> Name {
// The edition doesn't matter for hygiene.
Self::new(text, SyntaxContext::root(Edition::Edition2015))
}
+ #[inline]
pub fn new_tuple_field(idx: usize) -> Name {
- let symbol = match idx {
- 0 => sym::INTEGER_0,
- 1 => sym::INTEGER_1,
- 2 => sym::INTEGER_2,
- 3 => sym::INTEGER_3,
- 4 => sym::INTEGER_4,
- 5 => sym::INTEGER_5,
- 6 => sym::INTEGER_6,
- 7 => sym::INTEGER_7,
- 8 => sym::INTEGER_8,
- 9 => sym::INTEGER_9,
- 10 => sym::INTEGER_10,
- 11 => sym::INTEGER_11,
- 12 => sym::INTEGER_12,
- 13 => sym::INTEGER_13,
- 14 => sym::INTEGER_14,
- 15 => sym::INTEGER_15,
- _ => Symbol::intern(&idx.to_string()),
- };
- Name { symbol, ctx: () }
+ Name::new_symbol_root(sym::Integer::get(idx))
}
+ #[inline]
pub fn new_lifetime(lt: &str) -> Name {
match lt.strip_prefix("'r#") {
Some(lt) => Self::new_text(&format_smolstr!("'{lt}")),
@@ -121,6 +111,7 @@ impl Name {
}
}
+ #[inline]
pub fn new_symbol(symbol: Symbol, ctx: SyntaxContext) -> Self {
debug_assert!(!symbol.as_str().starts_with("r#"));
_ = ctx;
@@ -128,6 +119,7 @@ impl Name {
}
// FIXME: This needs to go once we have hygiene
+ #[inline]
pub fn new_symbol_root(sym: Symbol) -> Self {
Self::new_symbol(sym, SyntaxContext::root(Edition::Edition2015))
}
@@ -141,6 +133,7 @@ impl Name {
/// Ideally, we want a `gensym` semantics for missing names -- each missing
/// name is equal only to itself. It's not clear how to implement this in
/// salsa though, so we punt on that bit for a moment.
+ #[inline]
pub const fn missing() -> Name {
Name { symbol: sym::MISSING_NAME, ctx: () }
}
@@ -150,23 +143,27 @@ impl Name {
///
/// Use this method instead of comparing with `Self::missing()` as missing names
/// (ideally should) have a `gensym` semantics.
+ #[inline]
pub fn is_missing(&self) -> bool {
- self == &Name::missing()
+ self.symbol == sym::MISSING_NAME
}
/// Generates a new name that attempts to be unique. Should only be used when body lowering and
/// creating desugared locals and labels. The caller is responsible for picking an index
/// that is stable across re-executions
+ #[inline]
pub fn generate_new_name(idx: usize) -> Name {
- Name::new_text(&format!("<ra@gennew>{idx}"))
+ Name::new_symbol_root(sym::RaGeneratedName::get(idx))
}
/// Returns the tuple index this name represents if it is a tuple field.
+ #[inline]
pub fn as_tuple_index(&self) -> Option<usize> {
- self.symbol.as_str().parse().ok()
+ sym::Integer::as_uint(&self.symbol)
}
/// Whether this name needs to be escaped in the given edition via `r#`.
+ #[inline]
pub fn needs_escape(&self, edition: Edition) -> bool {
is_raw_identifier(self.symbol.as_str(), edition)
}
@@ -175,10 +172,12 @@ impl Name {
///
/// Do not use this for user-facing text, use `display` instead to handle editions properly.
// FIXME: This should take a database argument to hide the interning
+ #[inline]
pub fn as_str(&self) -> &str {
self.symbol.as_str()
}
+ #[inline]
pub fn display<'a>(
&'a self,
db: &dyn crate::db::ExpandDatabase,
@@ -190,14 +189,17 @@ impl Name {
// FIXME: Remove this in favor of `display`, see fixme on `as_str`
#[doc(hidden)]
+ #[inline]
pub fn display_no_db(&self, edition: Edition) -> impl fmt::Display + '_ {
Display { name: self, edition }
}
+ #[inline]
pub fn symbol(&self) -> &Symbol {
&self.symbol
}
+ #[inline]
pub fn is_generated(&self) -> bool {
self.as_str().starts_with("<ra@gennew>")
}
diff --git a/crates/ide-db/src/prime_caches.rs b/crates/ide-db/src/prime_caches.rs
index 12a48d65ac..fb7edb1acd 100644
--- a/crates/ide-db/src/prime_caches.rs
+++ b/crates/ide-db/src/prime_caches.rs
@@ -5,7 +5,7 @@
use std::panic::AssertUnwindSafe;
use base_db::all_crates;
-use hir::{Symbol, import_map::ImportMap};
+use hir::{Symbol, import_map::ImportMap, sym};
use rustc_hash::FxHashMap;
use salsa::{Cancelled, Database};
@@ -315,5 +315,5 @@ fn crate_name(db: &RootDatabase, krate: Crate) -> Symbol {
.display_name
.as_deref()
.cloned()
- .unwrap_or_else(|| Symbol::integer(salsa::plumbing::AsId::as_id(&krate).index() as usize))
+ .unwrap_or_else(|| sym::Integer::get(salsa::plumbing::AsId::as_id(&krate).index() as usize))
}
diff --git a/crates/ide/src/navigation_target.rs b/crates/ide/src/navigation_target.rs
index 99f8634bcb..03af25a9c0 100644
--- a/crates/ide/src/navigation_target.rs
+++ b/crates/ide/src/navigation_target.rs
@@ -580,7 +580,7 @@ impl TryToNav for hir::Field {
|(FileRange { file_id, range: full_range }, focus_range)| {
NavigationTarget::from_syntax(
file_id,
- Symbol::integer(self.index()),
+ sym::Integer::get(self.index()),
focus_range,
full_range,
SymbolKind::Field,
diff --git a/crates/intern/Cargo.toml b/crates/intern/Cargo.toml
index 39320ebd1c..1e6e8ff32f 100644
--- a/crates/intern/Cargo.toml
+++ b/crates/intern/Cargo.toml
@@ -19,6 +19,7 @@ hashbrown.workspace = true
rustc-hash.workspace = true
triomphe.workspace = true
rayon.workspace = true
+arrayvec.workspace = true
[lints]
workspace = true
diff --git a/crates/intern/src/symbol.rs b/crates/intern/src/symbol.rs
index 8b2d6e8717..72d32d1017 100644
--- a/crates/intern/src/symbol.rs
+++ b/crates/intern/src/symbol.rs
@@ -73,10 +73,9 @@ impl TaggedArcPtr {
///
/// You can only drop the `Arc` if the instance is dropped.
#[inline]
- pub(crate) unsafe fn try_as_arc_owned(self) -> Option<ManuallyDrop<Arc<Box<str>>>> {
+ unsafe fn try_as_arc_owned(self) -> Option<ManuallyDrop<Arc<Box<str>>>> {
// Unpack the tag from the alignment niche
- let tag = self.packed.as_ptr().addr() & Self::BOOL_BITS;
- if tag != 0 {
+ if self.is_arc() {
// Safety: We checked that the tag is non-zero -> true, so we are pointing to the data offset of an `Arc`
Some(ManuallyDrop::new(unsafe {
Arc::from_raw(self.pointer().as_ptr().cast::<Box<str>>())
@@ -87,6 +86,11 @@ impl TaggedArcPtr {
}
#[inline]
+ fn is_arc(&self) -> bool {
+ (self.packed.as_ptr().addr() & Self::BOOL_BITS) != 0
+ }
+
+ #[inline]
fn pack_arc(ptr: NonNull<*const str>) -> NonNull<*const str> {
let packed_tag = true as usize;
@@ -161,28 +165,6 @@ impl Symbol {
unsafe { bucket.as_ref().0.clone() }
}
- pub fn integer(i: usize) -> Self {
- match i {
- 0 => symbols::INTEGER_0,
- 1 => symbols::INTEGER_1,
- 2 => symbols::INTEGER_2,
- 3 => symbols::INTEGER_3,
- 4 => symbols::INTEGER_4,
- 5 => symbols::INTEGER_5,
- 6 => symbols::INTEGER_6,
- 7 => symbols::INTEGER_7,
- 8 => symbols::INTEGER_8,
- 9 => symbols::INTEGER_9,
- 10 => symbols::INTEGER_10,
- 11 => symbols::INTEGER_11,
- 12 => symbols::INTEGER_12,
- 13 => symbols::INTEGER_13,
- 14 => symbols::INTEGER_14,
- 15 => symbols::INTEGER_15,
- i => Symbol::intern(&format!("{i}")),
- }
- }
-
pub fn empty() -> Self {
symbols::__empty
}
diff --git a/crates/intern/src/symbol/symbols.rs b/crates/intern/src/symbol/symbols.rs
index db3c3c5200..ac6daaf006 100644
--- a/crates/intern/src/symbol/symbols.rs
+++ b/crates/intern/src/symbol/symbols.rs
@@ -1,15 +1,78 @@
//! Module defining all known symbols required by the rest of rust-analyzer.
#![allow(non_upper_case_globals)]
-use std::hash::{BuildHasher, BuildHasherDefault};
+use std::{
+ hash::{BuildHasher, BuildHasherDefault},
+ ptr::NonNull,
+};
+use arrayvec::ArrayString;
use dashmap::{DashMap, SharedValue};
use rustc_hash::FxHasher;
use crate::{Symbol, symbol::TaggedArcPtr};
+macro_rules! last {
+ ( $($elems:literal,)+ ) => {
+ *[ $($elems,)* ].last().unwrap()
+ };
+}
+
+impl Integer {
+ #[inline]
+ pub fn as_uint(sym: &Symbol) -> Option<usize> {
+ if !sym.repr.is_arc() {
+ let elem_ref = sym.repr.pointer();
+ // SAFETY: The types have the same layout.
+ let elem_ref = unsafe { std::mem::transmute::<NonNull<*const str>, &&str>(elem_ref) };
+ Self::LIST.element_offset(elem_ref)
+ } else {
+ Self::as_uint_cold(sym)
+ }
+ }
+
+ #[cold]
+ fn as_uint_cold(sym: &Symbol) -> Option<usize> {
+ sym.as_str().parse().ok()
+ }
+}
+
macro_rules! define_symbols {
- (@WITH_NAME: $($alias:ident = $value:literal,)* @PLAIN: $($name:ident,)*) => {
+ (
+ @LISTS: $($list_type_name:ident = $list_prefix:literal + [ $($list_idx:literal,)+ ],)*
+ @WITH_NAME: $($alias:ident = $value:literal,)*
+ @PLAIN: $($name:ident,)*
+ ) => {
+ $(
+ pub enum $list_type_name {}
+ impl $list_type_name {
+ // Ensure we covered all numbers.
+ const LIST: &[&str; last!($($list_idx,)+) + 1] = {
+ static LIST: [&str; last!($($list_idx,)+) + 1] = [ $( concat!($list_prefix, $list_idx), )* ];
+ &LIST
+ };
+
+ #[cold]
+ #[inline(never)]
+ fn create(idx: usize) -> Symbol {
+ use std::fmt::Write;
+ const MAX_LEN: usize = $list_prefix.len() + u64::MAX.ilog10() as usize + 1;
+ let mut s = ArrayString::<MAX_LEN>::new();
+ s.push_str($list_prefix);
+ _ = write!(s, "{idx}");
+ Symbol::intern(&s)
+ }
+
+ #[inline]
+ pub fn get(idx: usize) -> Symbol {
+ match Self::LIST.get(idx) {
+ Option::Some(s) => Symbol { repr: TaggedArcPtr::non_arc(s) },
+ Option::None => Self::create(idx),
+ }
+ }
+ }
+ )*
+
// The strings should be in `static`s so that symbol equality holds.
$(
pub const $name: Symbol = {
@@ -32,6 +95,14 @@ macro_rules! define_symbols {
let hash_one = |it_: &str| hasher_.hash_one(it_);
{
$(
+ for s in $list_type_name::LIST {
+ let hash_ = hash_one(s);
+ let shard_idx_ = dashmap_.determine_shard(hash_ as usize);
+ let symbol = Symbol { repr: TaggedArcPtr::non_arc(s) };
+ dashmap_.shards_mut()[shard_idx_].get_mut().insert(hash_, (symbol, SharedValue::new(())), |(x, _)| hash_one(x.as_str()));
+ }
+ )*
+ $(
let s = stringify!($name);
let hash_ = hash_one(s);
let shard_idx_ = dashmap_.determine_shard(hash_ as usize);
@@ -49,25 +120,37 @@ macro_rules! define_symbols {
};
}
define_symbols! {
+ @LISTS:
+ Integer = "" + [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+ 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
+ 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
+ 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
+ 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,
+ 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
+ 251, 252, 253, 254, 255,
+ ],
+ RaGeneratedName = "<ra@gennew>" + [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+ 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
+ 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
+ 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
+ 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,
+ 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
+ 251, 252, 253, 254, 255,
+ ],
+
@WITH_NAME:
dotdotdot = "...",
- INTEGER_0 = "0",
- INTEGER_1 = "1",
- INTEGER_2 = "2",
- INTEGER_3 = "3",
- INTEGER_4 = "4",
- INTEGER_5 = "5",
- INTEGER_6 = "6",
- INTEGER_7 = "7",
- INTEGER_8 = "8",
- INTEGER_9 = "9",
- INTEGER_10 = "10",
- INTEGER_11 = "11",
- INTEGER_12 = "12",
- INTEGER_13 = "13",
- INTEGER_14 = "14",
- INTEGER_15 = "15",
__empty = "",
unsafe_ = "unsafe",
in_ = "in",
diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs
index e8e7928c26..e135291d89 100644
--- a/crates/mbe/src/expander/transcriber.rs
+++ b/crates/mbe/src/expander/transcriber.rs
@@ -222,7 +222,7 @@ fn expand_subtree(
let index =
ctx.nesting.get(ctx.nesting.len() - 1 - depth).map_or(0, |nest| nest.idx);
builder.push(tt::Leaf::Literal(tt::Literal {
- text_and_suffix: Symbol::integer(index),
+ text_and_suffix: sym::Integer::get(index),
span: ctx.call_site,
kind: tt::LitKind::Integer,
suffix_len: 0,
@@ -234,7 +234,7 @@ fn expand_subtree(
0
});
builder.push(tt::Leaf::Literal(tt::Literal {
- text_and_suffix: Symbol::integer(length),
+ text_and_suffix: sym::Integer::get(length),
span: ctx.call_site,
kind: tt::LitKind::Integer,
suffix_len: 0,
@@ -278,7 +278,7 @@ fn expand_subtree(
let res = count(binding, 0, depth.unwrap_or(0));
builder.push(tt::Leaf::Literal(tt::Literal {
- text_and_suffix: Symbol::integer(res),
+ text_and_suffix: sym::Integer::get(res),
span: ctx.call_site,
suffix_len: 0,
kind: tt::LitKind::Integer,