Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/span/src/ast_id.rs')
-rw-r--r--crates/span/src/ast_id.rs923
1 files changed, 779 insertions, 144 deletions
diff --git a/crates/span/src/ast_id.rs b/crates/span/src/ast_id.rs
index 228fba1fa0..a9288ecd6f 100644
--- a/crates/span/src/ast_id.rs
+++ b/crates/span/src/ast_id.rs
@@ -4,137 +4,550 @@
//! Specifically, it enumerates all items in a file and uses position of a an
//! item as an ID. That way, id's don't change unless the set of items itself
//! changes.
+//!
+//! These IDs are tricky. If one of them invalidates, its interned ID invalidates,
+//! and this can cause *a lot* to be recomputed. For example, if you invalidate the ID
+//! of a struct, and that struct has an impl (any impl!) this will cause the `Self`
+//! type of the impl to invalidate, which will cause the all impls queries to be
+//! invalidated, which will cause every trait solve query in this crate *and* all
+//! transitive reverse dependencies to be invalidated, which is pretty much the worst
+//! thing that can happen incrementality wise.
+//!
+//! So we want these IDs to stay as stable as possible. For top-level items, we store
+//! their kind and name, which should be unique, but since they can still not be, we
+//! also store an index disambiguator. For nested items, we also store the ID of their
+//! parent. For macro calls, we store the macro name and an index. There aren't usually
+//! a lot of macro calls in item position, and invalidation in bodies is not much of
+//! a problem, so this should be enough.
use std::{
any::type_name,
fmt,
- hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
+ hash::{BuildHasher, Hash, Hasher},
marker::PhantomData,
};
use la_arena::{Arena, Idx, RawIdx};
-use rustc_hash::FxHasher;
-use syntax::{AstNode, AstPtr, SyntaxNode, SyntaxNodePtr, ast};
+use rustc_hash::{FxBuildHasher, FxHashMap};
+use syntax::{
+ AstNode, AstPtr, SyntaxKind, SyntaxNode, SyntaxNodePtr,
+ ast::{self, HasName},
+ match_ast,
+};
+
+// The first index is always the root node's AstId
+/// The root ast id always points to the encompassing file, using this in spans is discouraged as
+/// any range relative to it will be effectively absolute, ruining the entire point of anchored
+/// relative text ranges.
+pub const ROOT_ERASED_FILE_AST_ID: ErasedFileAstId =
+ ErasedFileAstId(pack_hash_index_and_kind(0, 0, ErasedFileAstIdKind::Root as u32));
+
+/// ErasedFileAstId used as the span for syntax node fixups. Any Span containing this file id is to be
+/// considered fake.
+pub const FIXUP_ERASED_FILE_AST_ID_MARKER: ErasedFileAstId =
+ ErasedFileAstId(pack_hash_index_and_kind(0, 0, ErasedFileAstIdKind::Fixup as u32));
-/// See crates\hir-expand\src\ast_id_map.rs
/// This is a type erased FileAstId.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct ErasedFileAstId(u32);
+impl fmt::Debug for ErasedFileAstId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let kind = self.kind();
+ macro_rules! kind {
+ ($($kind:ident),* $(,)?) => {
+ if false {
+ // Ensure we covered all variants.
+ match ErasedFileAstIdKind::Root {
+ $( ErasedFileAstIdKind::$kind => {} )*
+ }
+ unreachable!()
+ }
+ $( else if kind == ErasedFileAstIdKind::$kind as u32 {
+ stringify!($kind)
+ } )*
+ else {
+ "Unknown"
+ }
+ };
+ }
+ let kind = kind!(
+ Root,
+ Enum,
+ Struct,
+ Union,
+ ExternCrate,
+ MacroDef,
+ MacroRules,
+ Module,
+ Static,
+ Trait,
+ TraitAlias,
+ Variant,
+ Const,
+ Fn,
+ MacroCall,
+ TypeAlias,
+ ExternBlock,
+ Use,
+ Impl,
+ BlockExpr,
+ AsmExpr,
+ Fixup,
+ );
+ if f.alternate() {
+ write!(f, "{kind}[{:04X}, {}]", self.hash_value(), self.index())
+ } else {
+ f.debug_struct("ErasedFileAstId")
+ .field("kind", &format_args!("{kind}"))
+ .field("index", &self.index())
+ .field("hash", &format_args!("{:04X}", self.hash_value()))
+ .finish()
+ }
+ }
+}
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+#[repr(u8)]
+enum ErasedFileAstIdKind {
+ /// This needs to not change because it's depended upon by the proc macro server.
+ Fixup = 0,
+ // The following are associated with `ErasedHasNameFileAstId`.
+ Enum,
+ Struct,
+ Union,
+ ExternCrate,
+ MacroDef,
+ MacroRules,
+ Module,
+ Static,
+ Trait,
+ TraitAlias,
+ // Until here associated with `ErasedHasNameFileAstId`.
+ // The following are associated with `ErasedAssocItemFileAstId`.
+ Variant,
+ Const,
+ Fn,
+ MacroCall,
+ TypeAlias,
+ // Until here associated with `ErasedAssocItemFileAstId`.
+ // Extern blocks don't really have any identifying property unfortunately.
+ ExternBlock,
+ // FIXME: If we store the final `UseTree` instead of the top-level `Use`, we can store its name,
+ // and be way more granular for incrementality, at the expense of increased memory usage.
+ // Use IDs aren't used a lot. The main thing that stores them is the def map. So everything that
+ // uses the def map will be invalidated. That includes infers, and so is pretty bad, but our
+ // def map incrementality story is pretty bad anyway and needs to be improved (see
+ // https://rust-lang.zulipchat.com/#narrow/channel/185405-t-compiler.2Frust-analyzer/topic/.60infer.60.20queries.20and.20splitting.20.60DefMap.60).
+ // So I left this as-is for now, as the def map improvement should also mitigate this.
+ Use,
+ /// Associated with [`ImplFileAstId`].
+ Impl,
+ /// Associated with [`BlockExprFileAstId`].
+ BlockExpr,
+ // `global_asm!()` is an item, so we need to give it an `AstId`. So we give to all inline asm
+ // because incrementality is not a problem, they will always be the only item in the macro file,
+ // and memory usage also not because they're rare.
+ AsmExpr,
+ /// Keep this last.
+ Root,
+}
+
+// First hash, then index, then kind.
+const HASH_BITS: u32 = 16;
+const INDEX_BITS: u32 = 11;
+const KIND_BITS: u32 = 5;
+const _: () = assert!(ErasedFileAstIdKind::Fixup as u32 <= ((1 << KIND_BITS) - 1));
+const _: () = assert!(HASH_BITS + INDEX_BITS + KIND_BITS == u32::BITS);
+
+#[inline]
+const fn u16_hash(hash: u64) -> u16 {
+ // We do basically the same as `FxHasher`. We don't use rustc-hash and truncate because the
+ // higher bits have more entropy, but unlike rustc-hash we don't rotate because it rotates
+ // for hashmaps that just use the low bits, but we compare all bits.
+ const K: u16 = 0xecc5;
+ let (part1, part2, part3, part4) =
+ (hash as u16, (hash >> 16) as u16, (hash >> 32) as u16, (hash >> 48) as u16);
+ part1
+ .wrapping_add(part2)
+ .wrapping_mul(K)
+ .wrapping_add(part3)
+ .wrapping_mul(K)
+ .wrapping_add(part4)
+ .wrapping_mul(K)
+}
+
+#[inline]
+const fn pack_hash_index_and_kind(hash: u16, index: u32, kind: u32) -> u32 {
+ (hash as u32) | (index << HASH_BITS) | (kind << (HASH_BITS + INDEX_BITS))
+}
+
impl ErasedFileAstId {
- pub const fn into_raw(self) -> u32 {
- self.0
+ #[inline]
+ fn hash_value(self) -> u16 {
+ self.0 as u16
}
- pub const fn from_raw(u32: u32) -> Self {
- Self(u32)
+
+ #[inline]
+ fn index(self) -> u32 {
+ (self.0 << KIND_BITS) >> (HASH_BITS + KIND_BITS)
}
-}
-impl fmt::Display for ErasedFileAstId {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- self.0.fmt(f)
+ #[inline]
+ fn kind(self) -> u32 {
+ self.0 >> (HASH_BITS + INDEX_BITS)
}
-}
-impl fmt::Debug for ErasedFileAstId {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- self.0.fmt(f)
+
+ fn ast_id_for(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+ parent: Option<&ErasedFileAstId>,
+ ) -> Option<ErasedFileAstId> {
+ // Blocks are deliberately not here - we only want to allocate a block if it contains items.
+ has_name_ast_id(node, index_map)
+ .or_else(|| assoc_item_ast_id(node, index_map, parent))
+ .or_else(|| extern_block_ast_id(node, index_map))
+ .or_else(|| use_ast_id(node, index_map))
+ .or_else(|| impl_ast_id(node, index_map))
+ .or_else(|| asm_expr_ast_id(node, index_map))
+ }
+
+ fn should_alloc(node: &SyntaxNode) -> bool {
+ let kind = node.kind();
+ should_alloc_has_name(kind)
+ || should_alloc_assoc_item(kind)
+ || ast::ExternBlock::can_cast(kind)
+ || ast::Use::can_cast(kind)
+ || ast::Impl::can_cast(kind)
+ || ast::AsmExpr::can_cast(kind)
+ }
+
+ #[inline]
+ pub fn into_raw(self) -> u32 {
+ self.0
+ }
+
+ #[inline]
+ pub const fn from_raw(v: u32) -> Self {
+ Self(v)
}
}
+pub trait AstIdNode: AstNode {}
+
/// `AstId` points to an AST node in a specific file.
-pub struct FileAstId<N: AstIdNode> {
+pub struct FileAstId<N> {
raw: ErasedFileAstId,
- covariant: PhantomData<fn() -> N>,
+ _marker: PhantomData<fn() -> N>,
}
-impl<N: AstIdNode> Clone for FileAstId<N> {
+/// Traits are manually implemented because `derive` adds redundant bounds.
+impl<N> Clone for FileAstId<N> {
+ #[inline]
fn clone(&self) -> FileAstId<N> {
*self
}
}
-impl<N: AstIdNode> Copy for FileAstId<N> {}
+impl<N> Copy for FileAstId<N> {}
-impl<N: AstIdNode> PartialEq for FileAstId<N> {
+impl<N> PartialEq for FileAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.raw == other.raw
}
}
-impl<N: AstIdNode> Eq for FileAstId<N> {}
-impl<N: AstIdNode> Hash for FileAstId<N> {
+impl<N> Eq for FileAstId<N> {}
+impl<N> Hash for FileAstId<N> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.raw.hash(hasher);
}
}
-impl<N: AstIdNode> fmt::Debug for FileAstId<N> {
+impl<N> fmt::Debug for FileAstId<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(f, "FileAstId::<{}>({})", type_name::<N>(), self.raw)
+ write!(f, "FileAstId::<{}>({:?})", type_name::<N>(), self.raw)
}
}
-impl<N: AstIdNode> FileAstId<N> {
+impl<N> FileAstId<N> {
// Can't make this a From implementation because of coherence
+ #[inline]
pub fn upcast<M: AstIdNode>(self) -> FileAstId<M>
where
N: Into<M>,
{
- FileAstId { raw: self.raw, covariant: PhantomData }
+ FileAstId { raw: self.raw, _marker: PhantomData }
}
+ #[inline]
pub fn erase(self) -> ErasedFileAstId {
self.raw
}
}
-pub trait AstIdNode: AstNode {}
-macro_rules! register_ast_id_node {
- (impl AstIdNode for $($ident:ident),+ ) => {
+#[derive(Hash)]
+struct ErasedHasNameFileAstId<'a> {
+ name: &'a str,
+}
+
+/// This holds the ast ID for variants too (they're a kind of assoc item).
+#[derive(Hash)]
+struct ErasedAssocItemFileAstId<'a> {
+ /// Subtle: items in `extern` blocks **do not** store the ID of the extern block here.
+ /// Instead this is left empty. The reason is that `ExternBlockFileAstId` is pretty unstable
+ /// (it contains only an index), and extern blocks don't introduce a new scope, so storing
+ /// the extern block ID will do more harm to incrementality than help.
+ parent: Option<ErasedFileAstId>,
+ properties: ErasedHasNameFileAstId<'a>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct ImplFileAstId<'a> {
+ /// This can be `None` if the `Self` type is not a named type, or if it is inside a macro call.
+ self_ty_name: Option<&'a str>,
+ /// This can be `None` if this is an inherent impl, or if the trait name is inside a macro call.
+ trait_name: Option<&'a str>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct BlockExprFileAstId {
+ parent: Option<ErasedFileAstId>,
+}
+
+impl AstIdNode for ast::ExternBlock {}
+
+fn extern_block_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+) -> Option<ErasedFileAstId> {
+ if ast::ExternBlock::can_cast(node.kind()) {
+ Some(index_map.new_id(ErasedFileAstIdKind::ExternBlock, ()))
+ } else {
+ None
+ }
+}
+
+impl AstIdNode for ast::Use {}
+
+fn use_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+) -> Option<ErasedFileAstId> {
+ if ast::Use::can_cast(node.kind()) {
+ Some(index_map.new_id(ErasedFileAstIdKind::Use, ()))
+ } else {
+ None
+ }
+}
+
+impl AstIdNode for ast::AsmExpr {}
+
+fn asm_expr_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+) -> Option<ErasedFileAstId> {
+ if ast::AsmExpr::can_cast(node.kind()) {
+ Some(index_map.new_id(ErasedFileAstIdKind::AsmExpr, ()))
+ } else {
+ None
+ }
+}
+
+impl AstIdNode for ast::Impl {}
+
+fn impl_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+) -> Option<ErasedFileAstId> {
+ if let Some(node) = ast::Impl::cast(node.clone()) {
+ let type_as_name = |ty: Option<ast::Type>| match ty? {
+ ast::Type::PathType(it) => Some(it.path()?.segment()?.name_ref()?),
+ _ => None,
+ };
+ let self_ty_name = type_as_name(node.self_ty());
+ let trait_name = type_as_name(node.trait_());
+ let data = ImplFileAstId {
+ self_ty_name: self_ty_name.as_ref().map(|it| it.text_non_mutable()),
+ trait_name: trait_name.as_ref().map(|it| it.text_non_mutable()),
+ };
+ Some(index_map.new_id(ErasedFileAstIdKind::Impl, data))
+ } else {
+ None
+ }
+}
+
+// Blocks aren't `AstIdNode`s deliberately, because unlike other nodes, not all blocks get their own
+// ast id, only if they have items. To account for that we have a different, fallible, API for blocks.
+// impl !AstIdNode for ast::BlockExpr {}
+
+fn block_expr_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+ parent: Option<&ErasedFileAstId>,
+) -> Option<ErasedFileAstId> {
+ if ast::BlockExpr::can_cast(node.kind()) {
+ Some(
+ index_map.new_id(
+ ErasedFileAstIdKind::BlockExpr,
+ BlockExprFileAstId { parent: parent.copied() },
+ ),
+ )
+ } else {
+ None
+ }
+}
+
+#[derive(Default)]
+struct ErasedAstIdNextIndexMap(FxHashMap<(ErasedFileAstIdKind, u16), u32>);
+
+impl ErasedAstIdNextIndexMap {
+ #[inline]
+ fn new_id(&mut self, kind: ErasedFileAstIdKind, data: impl Hash) -> ErasedFileAstId {
+ let hash = FxBuildHasher.hash_one(&data);
+ let initial_hash = u16_hash(hash);
+ // Even though 2^INDEX_BITS=2048 items with the same hash seems like a lot,
+ // it could happen with macro calls or `use`s in macro-generated files. So we want
+ // to handle it gracefully. We just increment the hash.
+ let mut hash = initial_hash;
+ let index = loop {
+ match self.0.entry((kind, hash)) {
+ std::collections::hash_map::Entry::Occupied(mut entry) => {
+ let i = entry.get_mut();
+ if *i < ((1 << INDEX_BITS) - 1) {
+ *i += 1;
+ break *i;
+ }
+ }
+ std::collections::hash_map::Entry::Vacant(entry) => {
+ entry.insert(0);
+ break 0;
+ }
+ }
+ hash = hash.wrapping_add(1);
+ if hash == initial_hash {
+ // That's 2^27=134,217,728 items!
+ panic!("you have way too many items in the same file!");
+ }
+ };
+ let kind = kind as u32;
+ ErasedFileAstId(pack_hash_index_and_kind(hash, index, kind))
+ }
+}
+
+macro_rules! register_enum_ast_id {
+ (impl $AstIdNode:ident for $($ident:ident),+ ) => {
+ $(
+ impl $AstIdNode for ast::$ident {}
+ )+
+ };
+}
+register_enum_ast_id! {
+ impl AstIdNode for
+ Item, AnyHasGenericParams, Adt, Macro,
+ AssocItem
+}
+
+macro_rules! register_has_name_ast_id {
+ (impl $AstIdNode:ident for $($ident:ident = $name_method:ident),+ ) => {
$(
- impl AstIdNode for ast::$ident {}
+ impl $AstIdNode for ast::$ident {}
)+
- fn should_alloc_id(kind: syntax::SyntaxKind) -> bool {
- $(
- ast::$ident::can_cast(kind)
- )||+
+
+ fn has_name_ast_id(node: &SyntaxNode, index_map: &mut ErasedAstIdNextIndexMap) -> Option<ErasedFileAstId> {
+ match_ast! {
+ match node {
+ $(
+ ast::$ident(node) => {
+ let name = node.$name_method();
+ let name = name.as_ref().map_or("", |it| it.text_non_mutable());
+ let result = ErasedHasNameFileAstId {
+ name,
+ };
+ Some(index_map.new_id(ErasedFileAstIdKind::$ident, result))
+ },
+ )*
+ _ => None,
+ }
+ }
+ }
+
+ fn should_alloc_has_name(kind: SyntaxKind) -> bool {
+ false $( || ast::$ident::can_cast(kind) )*
}
};
}
-register_ast_id_node! {
+register_has_name_ast_id! {
impl AstIdNode for
- Item, AnyHasGenericParams,
- Adt,
- Enum,
- Variant,
- Struct,
- Union,
- AssocItem,
- Const,
- Fn,
- MacroCall,
- TypeAlias,
- ExternBlock,
- ExternCrate,
- Impl,
- Macro,
- MacroDef,
- MacroRules,
- Module,
- Static,
- Trait,
- TraitAlias,
- Use,
- BlockExpr, ConstArg
+ Enum = name,
+ Struct = name,
+ Union = name,
+ ExternCrate = name_ref,
+ MacroDef = name,
+ MacroRules = name,
+ Module = name,
+ Static = name,
+ Trait = name,
+ TraitAlias = name
+}
+
+macro_rules! register_assoc_item_ast_id {
+ (impl $AstIdNode:ident for $($ident:ident = $name_callback:expr),+ ) => {
+ $(
+ impl $AstIdNode for ast::$ident {}
+ )+
+
+ fn assoc_item_ast_id(
+ node: &SyntaxNode,
+ index_map: &mut ErasedAstIdNextIndexMap,
+ parent: Option<&ErasedFileAstId>,
+ ) -> Option<ErasedFileAstId> {
+ match_ast! {
+ match node {
+ $(
+ ast::$ident(node) => {
+ let name = $name_callback(node);
+ let name = name.as_ref().map_or("", |it| it.text_non_mutable());
+ let properties = ErasedHasNameFileAstId {
+ name,
+ };
+ let result = ErasedAssocItemFileAstId {
+ parent: parent.copied(),
+ properties,
+ };
+ Some(index_map.new_id(ErasedFileAstIdKind::$ident, result))
+ },
+ )*
+ _ => None,
+ }
+ }
+ }
+
+ fn should_alloc_assoc_item(kind: SyntaxKind) -> bool {
+ false $( || ast::$ident::can_cast(kind) )*
+ }
+ };
+}
+register_assoc_item_ast_id! {
+ impl AstIdNode for
+ Variant = |it: ast::Variant| it.name(),
+ Const = |it: ast::Const| it.name(),
+ Fn = |it: ast::Fn| it.name(),
+ MacroCall = |it: ast::MacroCall| it.path().and_then(|path| path.segment()?.name_ref()),
+ TypeAlias = |it: ast::TypeAlias| it.name()
}
/// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back.
#[derive(Default)]
pub struct AstIdMap {
- /// Maps stable id to unstable ptr.
- arena: Arena<SyntaxNodePtr>,
- /// Reverse: map ptr to id.
- map: hashbrown::HashTable<Idx<SyntaxNodePtr>>,
+ /// An arena of the ptrs and their associated ID.
+ arena: Arena<(SyntaxNodePtr, ErasedFileAstId)>,
+ /// Map ptr to id.
+ ptr_map: hashbrown::HashTable<ArenaId>,
+ /// Map id to ptr.
+ id_map: hashbrown::HashTable<ArenaId>,
}
+type ArenaId = Idx<(SyntaxNodePtr, ErasedFileAstId)>;
+
impl fmt::Debug for AstIdMap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("AstIdMap").field("arena", &self.arena).finish()
@@ -148,31 +561,116 @@ impl PartialEq for AstIdMap {
}
impl Eq for AstIdMap {}
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ContainsItems {
+ Yes,
+ No,
+}
+
impl AstIdMap {
pub fn from_source(node: &SyntaxNode) -> AstIdMap {
assert!(node.parent().is_none());
let mut res = AstIdMap::default();
+ let mut index_map = ErasedAstIdNextIndexMap::default();
+
+ // Ensure we allocate the root.
+ res.arena.alloc((SyntaxNodePtr::new(node), ROOT_ERASED_FILE_AST_ID));
- // make sure to allocate the root node
- if !should_alloc_id(node.kind()) {
- res.alloc(node);
- }
// By walking the tree in breadth-first order we make sure that parents
// get lower ids then children. That is, adding a new child does not
// change parent's id. This means that, say, adding a new function to a
// trait does not change ids of top-level items, which helps caching.
- bdfs(node, |it| {
- if should_alloc_id(it.kind()) {
- res.alloc(&it);
- TreeOrder::BreadthFirst
- } else {
- TreeOrder::DepthFirst
+
+ // This contains the stack of the `BlockExpr`s we are under. We do this
+ // so we only allocate `BlockExpr`s if they contain items.
+ // The general idea is: when we enter a block we push `(block, false)` here.
+ // Items inside the block are attributed to the block's container, not the block.
+ // For the first item we find inside a block, we make this `(block, true)`
+ // and create an ast id for the block. When exiting the block we pop it,
+ // whether or not we created an ast id for it.
+ // It may seem that with this setup we will generate an ID for blocks that
+ // have no items directly but have items inside other items inside them.
+ // This is true, but it doesn't matter, because such blocks can't exist.
+ // After all, the block will then contain the *outer* item, so we allocate
+ // an ID for it anyway.
+ let mut blocks = Vec::new();
+ let mut curr_layer = vec![(node.clone(), None)];
+ let mut next_layer = vec![];
+ while !curr_layer.is_empty() {
+ curr_layer.drain(..).for_each(|(node, parent_idx)| {
+ let mut preorder = node.preorder();
+ while let Some(event) = preorder.next() {
+ match event {
+ syntax::WalkEvent::Enter(node) => {
+ if ast::BlockExpr::can_cast(node.kind()) {
+ blocks.push((node, ContainsItems::No));
+ } else if ErasedFileAstId::should_alloc(&node) {
+ // Allocate blocks on-demand, only if they have items.
+ // We don't associate items with blocks, only with items, since block IDs can be quite unstable.
+ // FIXME: Is this the correct thing to do? Macro calls might actually be more incremental if
+ // associated with blocks (not sure). Either way it's not a big deal.
+ if let Some((
+ last_block_node,
+ already_allocated @ ContainsItems::No,
+ )) = blocks.last_mut()
+ {
+ let block_ast_id = block_expr_ast_id(
+ last_block_node,
+ &mut index_map,
+ parent_of(parent_idx, &res),
+ )
+ .expect("not a BlockExpr");
+ res.arena
+ .alloc((SyntaxNodePtr::new(last_block_node), block_ast_id));
+ *already_allocated = ContainsItems::Yes;
+ }
+
+ let parent = parent_of(parent_idx, &res);
+ let ast_id =
+ ErasedFileAstId::ast_id_for(&node, &mut index_map, parent)
+ .expect("this node should have an ast id");
+ let idx = res.arena.alloc((SyntaxNodePtr::new(&node), ast_id));
+
+ next_layer.extend(node.children().map(|child| (child, Some(idx))));
+ preorder.skip_subtree();
+ }
+ }
+ syntax::WalkEvent::Leave(node) => {
+ if ast::BlockExpr::can_cast(node.kind()) {
+ assert_eq!(
+ blocks.pop().map(|it| it.0),
+ Some(node),
+ "left a BlockExpr we never entered"
+ );
+ }
+ }
+ }
+ }
+ });
+ std::mem::swap(&mut curr_layer, &mut next_layer);
+ assert!(blocks.is_empty(), "didn't leave all BlockExprs");
+ }
+
+ res.ptr_map = hashbrown::HashTable::with_capacity(res.arena.len());
+ res.id_map = hashbrown::HashTable::with_capacity(res.arena.len());
+ for (idx, (ptr, ast_id)) in res.arena.iter() {
+ let ptr_hash = hash_ptr(ptr);
+ let ast_id_hash = hash_ast_id(ast_id);
+ match res.ptr_map.entry(
+ ptr_hash,
+ |idx2| *idx2 == idx,
+ |&idx| hash_ptr(&res.arena[idx].0),
+ ) {
+ hashbrown::hash_table::Entry::Occupied(_) => unreachable!(),
+ hashbrown::hash_table::Entry::Vacant(entry) => {
+ entry.insert(idx);
+ }
}
- });
- res.map = hashbrown::HashTable::with_capacity(res.arena.len());
- for (idx, ptr) in res.arena.iter() {
- let hash = hash_ptr(ptr);
- match res.map.entry(hash, |&idx2| idx2 == idx, |&idx| hash_ptr(&res.arena[idx])) {
+ match res.id_map.entry(
+ ast_id_hash,
+ |idx2| *idx2 == idx,
+ |&idx| hash_ast_id(&res.arena[idx].1),
+ ) {
hashbrown::hash_table::Entry::Occupied(_) => unreachable!(),
hashbrown::hash_table::Entry::Vacant(entry) => {
entry.insert(idx);
@@ -180,98 +678,235 @@ impl AstIdMap {
}
}
res.arena.shrink_to_fit();
- res
+ return res;
+
+ fn parent_of(parent_idx: Option<ArenaId>, res: &AstIdMap) -> Option<&ErasedFileAstId> {
+ let mut parent = parent_idx.map(|parent_idx| &res.arena[parent_idx].1);
+ if parent.is_some_and(|parent| parent.kind() == ErasedFileAstIdKind::ExternBlock as u32)
+ {
+ // See the comment on `ErasedAssocItemFileAstId` for why is this.
+ // FIXME: Technically there could be an extern block inside another item, e.g.:
+ // ```
+ // fn foo() {
+ // extern "C" {
+ // fn bar();
+ // }
+ // }
+ // ```
+ // Here we want to make `foo()` the parent of `bar()`, but we make it `None`.
+ // Shouldn't be a big deal though.
+ parent = None;
+ }
+ parent
+ }
}
/// The [`AstId`] of the root node
pub fn root(&self) -> SyntaxNodePtr {
- self.arena[Idx::from_raw(RawIdx::from_u32(0))]
+ self.arena[Idx::from_raw(RawIdx::from_u32(0))].0
}
pub fn ast_id<N: AstIdNode>(&self, item: &N) -> FileAstId<N> {
- let raw = self.erased_ast_id(item.syntax());
- FileAstId { raw, covariant: PhantomData }
+ self.ast_id_for_ptr(AstPtr::new(item))
+ }
+
+ /// Blocks may not be allocated (if they have no items), so they have a different API.
+ pub fn ast_id_for_block(&self, block: &ast::BlockExpr) -> Option<FileAstId<ast::BlockExpr>> {
+ self.ast_id_for_ptr_for_block(AstPtr::new(block))
}
pub fn ast_id_for_ptr<N: AstIdNode>(&self, ptr: AstPtr<N>) -> FileAstId<N> {
let ptr = ptr.syntax_node_ptr();
- let hash = hash_ptr(&ptr);
- match self.map.find(hash, |&idx| self.arena[idx] == ptr) {
- Some(&raw) => FileAstId {
- raw: ErasedFileAstId(raw.into_raw().into_u32()),
- covariant: PhantomData,
- },
- None => panic!(
- "Can't find {:?} in AstIdMap:\n{:?}",
+ FileAstId { raw: self.erased_ast_id(ptr), _marker: PhantomData }
+ }
+
+ /// Blocks may not be allocated (if they have no items), so they have a different API.
+ pub fn ast_id_for_ptr_for_block(
+ &self,
+ ptr: AstPtr<ast::BlockExpr>,
+ ) -> Option<FileAstId<ast::BlockExpr>> {
+ let ptr = ptr.syntax_node_ptr();
+ self.try_erased_ast_id(ptr).map(|raw| FileAstId { raw, _marker: PhantomData })
+ }
+
+ fn erased_ast_id(&self, ptr: SyntaxNodePtr) -> ErasedFileAstId {
+ self.try_erased_ast_id(ptr).unwrap_or_else(|| {
+ panic!(
+ "Can't find SyntaxNodePtr {:?} in AstIdMap:\n{:?}",
ptr,
self.arena.iter().map(|(_id, i)| i).collect::<Vec<_>>(),
- ),
- }
+ )
+ })
}
- pub fn get<N: AstIdNode>(&self, id: FileAstId<N>) -> AstPtr<N> {
- AstPtr::try_from_raw(self.arena[Idx::from_raw(RawIdx::from_u32(id.raw.into_raw()))])
- .unwrap()
+ fn try_erased_ast_id(&self, ptr: SyntaxNodePtr) -> Option<ErasedFileAstId> {
+ let hash = hash_ptr(&ptr);
+ let idx = *self.ptr_map.find(hash, |&idx| self.arena[idx].0 == ptr)?;
+ Some(self.arena[idx].1)
}
- pub fn get_erased(&self, id: ErasedFileAstId) -> SyntaxNodePtr {
- self.arena[Idx::from_raw(RawIdx::from_u32(id.into_raw()))]
+ // Don't bound on `AstIdNode` here, because `BlockExpr`s are also valid here (`ast::BlockExpr`
+ // doesn't always have a matching `FileAstId`, but a `FileAstId<ast::BlockExpr>` always has
+ // a matching node).
+ pub fn get<N: AstNode>(&self, id: FileAstId<N>) -> AstPtr<N> {
+ let ptr = self.get_erased(id.raw);
+ AstPtr::try_from_raw(ptr)
+ .unwrap_or_else(|| panic!("AstIdMap node mismatch with node `{ptr:?}`"))
}
- fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId {
- let ptr = SyntaxNodePtr::new(item);
- let hash = hash_ptr(&ptr);
- match self.map.find(hash, |&idx| self.arena[idx] == ptr) {
- Some(&idx) => ErasedFileAstId(idx.into_raw().into_u32()),
+ pub fn get_erased(&self, id: ErasedFileAstId) -> SyntaxNodePtr {
+ let hash = hash_ast_id(&id);
+ match self.id_map.find(hash, |&idx| self.arena[idx].1 == id) {
+ Some(&idx) => self.arena[idx].0,
None => panic!(
- "Can't find {:?} in AstIdMap:\n{:?}\n source text: {}",
- item,
+ "Can't find ast id {:?} in AstIdMap:\n{:?}",
+ id,
self.arena.iter().map(|(_id, i)| i).collect::<Vec<_>>(),
- item
),
}
}
-
- fn alloc(&mut self, item: &SyntaxNode) -> ErasedFileAstId {
- ErasedFileAstId(self.arena.alloc(SyntaxNodePtr::new(item)).into_raw().into_u32())
- }
}
+#[inline]
fn hash_ptr(ptr: &SyntaxNodePtr) -> u64 {
- BuildHasherDefault::<FxHasher>::default().hash_one(ptr)
-}
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-enum TreeOrder {
- BreadthFirst,
- DepthFirst,
-}
-
-/// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs
-/// order? It is a mix of breadth-first and depth first orders. Nodes for which
-/// `f` returns [`TreeOrder::BreadthFirst`] are visited breadth-first, all the other nodes are explored
-/// [`TreeOrder::DepthFirst`].
-///
-/// In other words, the size of the bfs queue is bound by the number of "true"
-/// nodes.
-fn bdfs(node: &SyntaxNode, mut f: impl FnMut(SyntaxNode) -> TreeOrder) {
- let mut curr_layer = vec![node.clone()];
- let mut next_layer = vec![];
- while !curr_layer.is_empty() {
- curr_layer.drain(..).for_each(|node| {
- let mut preorder = node.preorder();
- while let Some(event) = preorder.next() {
- match event {
- syntax::WalkEvent::Enter(node) => {
- if f(node.clone()) == TreeOrder::BreadthFirst {
- next_layer.extend(node.children());
- preorder.skip_subtree();
- }
- }
- syntax::WalkEvent::Leave(_) => {}
- }
+ FxBuildHasher.hash_one(ptr)
+}
+
+#[inline]
+fn hash_ast_id(ptr: &ErasedFileAstId) -> u64 {
+ FxBuildHasher.hash_one(ptr)
+}
+
+#[cfg(test)]
+mod tests {
+ use syntax::{AstNode, Edition, SourceFile, SyntaxKind, SyntaxNodePtr, WalkEvent, ast};
+
+ use crate::AstIdMap;
+
+ #[test]
+ fn check_all_nodes() {
+ let syntax = SourceFile::parse(
+ r#"
+extern crate foo;
+fn foo() {
+ union U {}
+}
+struct S;
+macro_rules! m {}
+macro m2() {}
+trait Trait {}
+impl Trait for S {}
+impl S {}
+impl m!() {}
+impl m2!() for m!() {}
+type T = i32;
+enum E {
+ V1(),
+ V2 {},
+ V3,
+}
+struct S; // duplicate
+extern "C" {
+ static S: i32;
+}
+static mut S: i32 = 0;
+const FOO: i32 = 0;
+ "#,
+ Edition::CURRENT,
+ )
+ .syntax_node();
+ let ast_id_map = AstIdMap::from_source(&syntax);
+ for node in syntax.preorder() {
+ let WalkEvent::Enter(node) = node else { continue };
+ if !matches!(
+ node.kind(),
+ SyntaxKind::EXTERN_CRATE
+ | SyntaxKind::FN
+ | SyntaxKind::UNION
+ | SyntaxKind::STRUCT
+ | SyntaxKind::MACRO_RULES
+ | SyntaxKind::MACRO_DEF
+ | SyntaxKind::MACRO_CALL
+ | SyntaxKind::TRAIT
+ | SyntaxKind::IMPL
+ | SyntaxKind::TYPE_ALIAS
+ | SyntaxKind::ENUM
+ | SyntaxKind::VARIANT
+ | SyntaxKind::EXTERN_BLOCK
+ | SyntaxKind::STATIC
+ | SyntaxKind::CONST
+ ) {
+ continue;
}
- });
- std::mem::swap(&mut curr_layer, &mut next_layer);
+ let ptr = SyntaxNodePtr::new(&node);
+ let ast_id = ast_id_map.erased_ast_id(ptr);
+ let turn_back = ast_id_map.get_erased(ast_id);
+ assert_eq!(ptr, turn_back);
+ }
+ }
+
+ #[test]
+ fn different_names_get_different_hashes() {
+ let syntax = SourceFile::parse(
+ r#"
+fn foo() {}
+fn bar() {}
+ "#,
+ Edition::CURRENT,
+ )
+ .syntax_node();
+ let ast_id_map = AstIdMap::from_source(&syntax);
+ let fns = syntax.descendants().filter_map(ast::Fn::cast).collect::<Vec<_>>();
+ let [foo_fn, bar_fn] = fns.as_slice() else {
+ panic!("not exactly 2 functions");
+ };
+ let foo_fn_id = ast_id_map.ast_id(foo_fn);
+ let bar_fn_id = ast_id_map.ast_id(bar_fn);
+ assert_ne!(foo_fn_id.raw.hash_value(), bar_fn_id.raw.hash_value(), "hashes are equal");
+ }
+
+ #[test]
+ fn different_parents_get_different_hashes() {
+ let syntax = SourceFile::parse(
+ r#"
+fn foo() {
+ m!();
+}
+fn bar() {
+ m!();
+}
+ "#,
+ Edition::CURRENT,
+ )
+ .syntax_node();
+ let ast_id_map = AstIdMap::from_source(&syntax);
+ let macro_calls = syntax.descendants().filter_map(ast::MacroCall::cast).collect::<Vec<_>>();
+ let [macro_call_foo, macro_call_bar] = macro_calls.as_slice() else {
+ panic!("not exactly 2 macro calls");
+ };
+ let macro_call_foo_id = ast_id_map.ast_id(macro_call_foo);
+ let macro_call_bar_id = ast_id_map.ast_id(macro_call_bar);
+ assert_ne!(
+ macro_call_foo_id.raw.hash_value(),
+ macro_call_bar_id.raw.hash_value(),
+ "hashes are equal"
+ );
+ }
+
+ #[test]
+ fn blocks_with_no_items_have_no_id() {
+ let syntax = SourceFile::parse(
+ r#"
+fn foo() {
+ let foo = 1;
+ bar(foo);
+}
+ "#,
+ Edition::CURRENT,
+ )
+ .syntax_node();
+ let ast_id_map = AstIdMap::from_source(&syntax);
+ let block = syntax.descendants().find_map(ast::BlockExpr::cast).expect("no block");
+ assert!(ast_id_map.ast_id_for_block(&block).is_none());
}
}