rust-analyzer

Unnamed repository; edit this file 'description' to name the repository.

master 2Branches 0Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'crates/span/src/ast_id.rs')

-rw-r--r--

crates/span/src/ast_id.rs

923

1 files changed, 779 insertions, 144 deletions

diff --git a/crates/span/src/ast_id.rs b/crates/span/src/ast_id.rs
index 228fba1fa0..a9288ecd6f 100644
--- a/crates/span/src/ast_id.rs
+++ b/crates/span/src/ast_id.rs

@@ -4,137 +4,550 @@

//! Specifically, it enumerates all items in a file and uses position of a an

//! item as an ID. That way, id's don't change unless the set of items itself

//! changes.

+//!

+//! These IDs are tricky. If one of them invalidates, its interned ID invalidates,

+//! and this can cause *a lot* to be recomputed. For example, if you invalidate the ID

+//! of a struct, and that struct has an impl (any impl!) this will cause the `Self`

+//! type of the impl to invalidate, which will cause the all impls queries to be

+//! invalidated, which will cause every trait solve query in this crate *and* all

+//! transitive reverse dependencies to be invalidated, which is pretty much the worst

+//! thing that can happen incrementality wise.

+//!

+//! So we want these IDs to stay as stable as possible. For top-level items, we store

+//! their kind and name, which should be unique, but since they can still not be, we

+//! also store an index disambiguator. For nested items, we also store the ID of their

+//! parent. For macro calls, we store the macro name and an index. There aren't usually

+//! a lot of macro calls in item position, and invalidation in bodies is not much of

+//! a problem, so this should be enough.

use std::{

any::type_name,

fmt,

- hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},

+ hash::{BuildHasher, Hash, Hasher},

marker::PhantomData,

};

use la_arena::{Arena, Idx, RawIdx};

-use rustc_hash::FxHasher;

-use syntax::{AstNode, AstPtr, SyntaxNode, SyntaxNodePtr, ast};

+use rustc_hash::{FxBuildHasher, FxHashMap};

+use syntax::{

+ AstNode, AstPtr, SyntaxKind, SyntaxNode, SyntaxNodePtr,

+ ast::{self, HasName},

+ match_ast,

+};

+// The first index is always the root node's AstId

+/// The root ast id always points to the encompassing file, using this in spans is discouraged as

+/// any range relative to it will be effectively absolute, ruining the entire point of anchored

+/// relative text ranges.

+pub const ROOT_ERASED_FILE_AST_ID: ErasedFileAstId =

+ ErasedFileAstId(pack_hash_index_and_kind(0, 0, ErasedFileAstIdKind::Root as u32));

+/// ErasedFileAstId used as the span for syntax node fixups. Any Span containing this file id is to be

+/// considered fake.

+pub const FIXUP_ERASED_FILE_AST_ID_MARKER: ErasedFileAstId =

+ ErasedFileAstId(pack_hash_index_and_kind(0, 0, ErasedFileAstIdKind::Fixup as u32));

-/// See crates\hir-expand\src\ast_id_map.rs

/// This is a type erased FileAstId.

-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]

+#[derive(Clone, Copy, PartialEq, Eq, Hash)]

pub struct ErasedFileAstId(u32);

+impl fmt::Debug for ErasedFileAstId {

+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

+ let kind = self.kind();

+ macro_rules! kind {

+ ($($kind:ident),* $(,)?) => {

+ if false {

+ // Ensure we covered all variants.

+ match ErasedFileAstIdKind::Root {

+ $( ErasedFileAstIdKind::$kind => {} )*

+ }

+ unreachable!()

+ }

+ $( else if kind == ErasedFileAstIdKind::$kind as u32 {

+ stringify!($kind)

+ } )*

+ else {

+ "Unknown"

+ }

+ };

+ }

+ let kind = kind!(

+ Root,

+ Enum,

+ Struct,

+ Union,

+ ExternCrate,

+ MacroDef,

+ MacroRules,

+ Module,

+ Static,

+ Trait,

+ TraitAlias,

+ Variant,

+ Const,

+ Fn,

+ MacroCall,

+ TypeAlias,

+ ExternBlock,

+ Use,

+ Impl,

+ BlockExpr,

+ AsmExpr,

+ Fixup,

+ );

+ if f.alternate() {

+ write!(f, "{kind}[{:04X}, {}]", self.hash_value(), self.index())

+ } else {

+ f.debug_struct("ErasedFileAstId")

+ .field("kind", &format_args!("{kind}"))

+ .field("index", &self.index())

+ .field("hash", &format_args!("{:04X}", self.hash_value()))

+ .finish()

+ }

+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]

+#[repr(u8)]

+enum ErasedFileAstIdKind {

+ /// This needs to not change because it's depended upon by the proc macro server.

+ Fixup = 0,

+ // The following are associated with `ErasedHasNameFileAstId`.

+ Enum,

+ Struct,

+ Union,

+ ExternCrate,

+ MacroDef,

+ MacroRules,

+ Module,

+ Static,

+ Trait,

+ TraitAlias,

+ // Until here associated with `ErasedHasNameFileAstId`.

+ // The following are associated with `ErasedAssocItemFileAstId`.

+ Variant,

+ Const,

+ Fn,

+ MacroCall,

+ TypeAlias,

+ // Until here associated with `ErasedAssocItemFileAstId`.

+ // Extern blocks don't really have any identifying property unfortunately.

+ ExternBlock,

+ // FIXME: If we store the final `UseTree` instead of the top-level `Use`, we can store its name,

+ // and be way more granular for incrementality, at the expense of increased memory usage.

+ // Use IDs aren't used a lot. The main thing that stores them is the def map. So everything that

+ // uses the def map will be invalidated. That includes infers, and so is pretty bad, but our

+ // def map incrementality story is pretty bad anyway and needs to be improved (see

+ // https://rust-lang.zulipchat.com/#narrow/channel/185405-t-compiler.2Frust-analyzer/topic/.60infer.60.20queries.20and.20splitting.20.60DefMap.60).

+ // So I left this as-is for now, as the def map improvement should also mitigate this.

+ Use,

+ /// Associated with [`ImplFileAstId`].

+ Impl,

+ /// Associated with [`BlockExprFileAstId`].

+ BlockExpr,

+ // `global_asm!()` is an item, so we need to give it an `AstId`. So we give to all inline asm

+ // because incrementality is not a problem, they will always be the only item in the macro file,

+ // and memory usage also not because they're rare.

+ AsmExpr,

+ /// Keep this last.

+ Root,

+// First hash, then index, then kind.

+const HASH_BITS: u32 = 16;

+const INDEX_BITS: u32 = 11;

+const KIND_BITS: u32 = 5;

+const _: () = assert!(ErasedFileAstIdKind::Fixup as u32 <= ((1 << KIND_BITS) - 1));

+const _: () = assert!(HASH_BITS + INDEX_BITS + KIND_BITS == u32::BITS);

+#[inline]

+const fn u16_hash(hash: u64) -> u16 {

+ // We do basically the same as `FxHasher`. We don't use rustc-hash and truncate because the

+ // higher bits have more entropy, but unlike rustc-hash we don't rotate because it rotates

+ // for hashmaps that just use the low bits, but we compare all bits.

+ const K: u16 = 0xecc5;

+ let (part1, part2, part3, part4) =

+ (hash as u16, (hash >> 16) as u16, (hash >> 32) as u16, (hash >> 48) as u16);

+ part1

+ .wrapping_add(part2)

+ .wrapping_mul(K)

+ .wrapping_add(part3)

+ .wrapping_mul(K)

+ .wrapping_add(part4)

+ .wrapping_mul(K)

+#[inline]

+const fn pack_hash_index_and_kind(hash: u16, index: u32, kind: u32) -> u32 {

+ (hash as u32) | (index << HASH_BITS) | (kind << (HASH_BITS + INDEX_BITS))

impl ErasedFileAstId {

- pub const fn into_raw(self) -> u32 {

- self.0

+ #[inline]

+ fn hash_value(self) -> u16 {

+ self.0 as u16

}

- pub const fn from_raw(u32: u32) -> Self {

- Self(u32)

+ #[inline]

+ fn index(self) -> u32 {

+ (self.0 << KIND_BITS) >> (HASH_BITS + KIND_BITS)

}

-impl fmt::Display for ErasedFileAstId {

- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

- self.0.fmt(f)

+ #[inline]

+ fn kind(self) -> u32 {

+ self.0 >> (HASH_BITS + INDEX_BITS)

}

-impl fmt::Debug for ErasedFileAstId {

- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

- self.0.fmt(f)

+ fn ast_id_for(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+ parent: Option<&ErasedFileAstId>,

+ ) -> Option<ErasedFileAstId> {

+ // Blocks are deliberately not here - we only want to allocate a block if it contains items.

+ has_name_ast_id(node, index_map)

+ .or_else(|| assoc_item_ast_id(node, index_map, parent))

+ .or_else(|| extern_block_ast_id(node, index_map))

+ .or_else(|| use_ast_id(node, index_map))

+ .or_else(|| impl_ast_id(node, index_map))

+ .or_else(|| asm_expr_ast_id(node, index_map))

+ }

+ fn should_alloc(node: &SyntaxNode) -> bool {

+ let kind = node.kind();

+ should_alloc_has_name(kind)

+ || should_alloc_assoc_item(kind)

+ || ast::ExternBlock::can_cast(kind)

+ || ast::Use::can_cast(kind)

+ || ast::Impl::can_cast(kind)

+ || ast::AsmExpr::can_cast(kind)

+ }

+ #[inline]

+ pub fn into_raw(self) -> u32 {

+ self.0

+ }

+ #[inline]

+ pub const fn from_raw(v: u32) -> Self {

+ Self(v)

}

+pub trait AstIdNode: AstNode {}

/// `AstId` points to an AST node in a specific file.

-pub struct FileAstId<N: AstIdNode> {

+pub struct FileAstId<N> {

raw: ErasedFileAstId,

- covariant: PhantomData<fn() -> N>,

+ _marker: PhantomData<fn() -> N>,

}

-impl<N: AstIdNode> Clone for FileAstId<N> {

+/// Traits are manually implemented because `derive` adds redundant bounds.

+impl<N> Clone for FileAstId<N> {

+ #[inline]

fn clone(&self) -> FileAstId<N> {

*self

}

-impl<N: AstIdNode> Copy for FileAstId<N> {}

+impl<N> Copy for FileAstId<N> {}

-impl<N: AstIdNode> PartialEq for FileAstId<N> {

+impl<N> PartialEq for FileAstId<N> {

fn eq(&self, other: &Self) -> bool {

self.raw == other.raw

}

-impl<N: AstIdNode> Eq for FileAstId<N> {}

-impl<N: AstIdNode> Hash for FileAstId<N> {

+impl<N> Eq for FileAstId<N> {}

+impl<N> Hash for FileAstId<N> {

fn hash<H: Hasher>(&self, hasher: &mut H) {

self.raw.hash(hasher);

}

-impl<N: AstIdNode> fmt::Debug for FileAstId<N> {

+impl<N> fmt::Debug for FileAstId<N> {

fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

- write!(f, "FileAstId::<{}>({})", type_name::<N>(), self.raw)

+ write!(f, "FileAstId::<{}>({:?})", type_name::<N>(), self.raw)

}

-impl<N: AstIdNode> FileAstId<N> {

+impl<N> FileAstId<N> {

// Can't make this a From implementation because of coherence

+ #[inline]

pub fn upcast<M: AstIdNode>(self) -> FileAstId<M>

where

N: Into<M>,

{

- FileAstId { raw: self.raw, covariant: PhantomData }

+ FileAstId { raw: self.raw, _marker: PhantomData }

}

+ #[inline]

pub fn erase(self) -> ErasedFileAstId {

self.raw

}

-pub trait AstIdNode: AstNode {}

-macro_rules! register_ast_id_node {

- (impl AstIdNode for $($ident:ident),+ ) => {

+#[derive(Hash)]

+struct ErasedHasNameFileAstId<'a> {

+ name: &'a str,

+/// This holds the ast ID for variants too (they're a kind of assoc item).

+#[derive(Hash)]

+struct ErasedAssocItemFileAstId<'a> {

+ /// Subtle: items in `extern` blocks **do not** store the ID of the extern block here.

+ /// Instead this is left empty. The reason is that `ExternBlockFileAstId` is pretty unstable

+ /// (it contains only an index), and extern blocks don't introduce a new scope, so storing

+ /// the extern block ID will do more harm to incrementality than help.

+ parent: Option<ErasedFileAstId>,

+ properties: ErasedHasNameFileAstId<'a>,

+#[derive(Debug, Clone, PartialEq, Eq, Hash)]

+struct ImplFileAstId<'a> {

+ /// This can be `None` if the `Self` type is not a named type, or if it is inside a macro call.

+ self_ty_name: Option<&'a str>,

+ /// This can be `None` if this is an inherent impl, or if the trait name is inside a macro call.

+ trait_name: Option<&'a str>,

+#[derive(Debug, Clone, PartialEq, Eq, Hash)]

+struct BlockExprFileAstId {

+ parent: Option<ErasedFileAstId>,

+impl AstIdNode for ast::ExternBlock {}

+fn extern_block_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+) -> Option<ErasedFileAstId> {

+ if ast::ExternBlock::can_cast(node.kind()) {

+ Some(index_map.new_id(ErasedFileAstIdKind::ExternBlock, ()))

+ } else {

+ None

+ }

+impl AstIdNode for ast::Use {}

+fn use_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+) -> Option<ErasedFileAstId> {

+ if ast::Use::can_cast(node.kind()) {

+ Some(index_map.new_id(ErasedFileAstIdKind::Use, ()))

+ } else {

+ None

+ }

+impl AstIdNode for ast::AsmExpr {}

+fn asm_expr_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+) -> Option<ErasedFileAstId> {

+ if ast::AsmExpr::can_cast(node.kind()) {

+ Some(index_map.new_id(ErasedFileAstIdKind::AsmExpr, ()))

+ } else {

+ None

+ }

+impl AstIdNode for ast::Impl {}

+fn impl_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+) -> Option<ErasedFileAstId> {

+ if let Some(node) = ast::Impl::cast(node.clone()) {

+ let type_as_name = |ty: Option<ast::Type>| match ty? {

+ ast::Type::PathType(it) => Some(it.path()?.segment()?.name_ref()?),

+ _ => None,

+ };

+ let self_ty_name = type_as_name(node.self_ty());

+ let trait_name = type_as_name(node.trait_());

+ let data = ImplFileAstId {

+ self_ty_name: self_ty_name.as_ref().map(|it| it.text_non_mutable()),

+ trait_name: trait_name.as_ref().map(|it| it.text_non_mutable()),

+ };

+ Some(index_map.new_id(ErasedFileAstIdKind::Impl, data))

+ } else {

+ None

+ }

+// Blocks aren't `AstIdNode`s deliberately, because unlike other nodes, not all blocks get their own

+// ast id, only if they have items. To account for that we have a different, fallible, API for blocks.

+// impl !AstIdNode for ast::BlockExpr {}

+fn block_expr_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+ parent: Option<&ErasedFileAstId>,

+) -> Option<ErasedFileAstId> {

+ if ast::BlockExpr::can_cast(node.kind()) {

+ Some(

+ index_map.new_id(

+ ErasedFileAstIdKind::BlockExpr,

+ BlockExprFileAstId { parent: parent.copied() },

+ ),

+ )

+ } else {

+ None

+ }

+#[derive(Default)]

+struct ErasedAstIdNextIndexMap(FxHashMap<(ErasedFileAstIdKind, u16), u32>);

+impl ErasedAstIdNextIndexMap {

+ #[inline]

+ fn new_id(&mut self, kind: ErasedFileAstIdKind, data: impl Hash) -> ErasedFileAstId {

+ let hash = FxBuildHasher.hash_one(&data);

+ let initial_hash = u16_hash(hash);

+ // Even though 2^INDEX_BITS=2048 items with the same hash seems like a lot,

+ // it could happen with macro calls or `use`s in macro-generated files. So we want

+ // to handle it gracefully. We just increment the hash.

+ let mut hash = initial_hash;

+ let index = loop {

+ match self.0.entry((kind, hash)) {

+ std::collections::hash_map::Entry::Occupied(mut entry) => {

+ let i = entry.get_mut();

+ if *i < ((1 << INDEX_BITS) - 1) {

+ *i += 1;

+ break *i;

+ }

+ std::collections::hash_map::Entry::Vacant(entry) => {

+ entry.insert(0);

+ break 0;

+ }

+ hash = hash.wrapping_add(1);

+ if hash == initial_hash {

+ // That's 2^27=134,217,728 items!

+ panic!("you have way too many items in the same file!");

+ }

+ };

+ let kind = kind as u32;

+ ErasedFileAstId(pack_hash_index_and_kind(hash, index, kind))

+ }

+macro_rules! register_enum_ast_id {

+ (impl $AstIdNode:ident for $($ident:ident),+ ) => {

+ $(

+ impl $AstIdNode for ast::$ident {}

+ )+

+ };

+register_enum_ast_id! {

+ impl AstIdNode for

+ Item, AnyHasGenericParams, Adt, Macro,

+ AssocItem

+macro_rules! register_has_name_ast_id {

+ (impl $AstIdNode:ident for $($ident:ident = $name_method:ident),+ ) => {

- impl AstIdNode for ast::$ident {}

+ impl $AstIdNode for ast::$ident {}

- fn should_alloc_id(kind: syntax::SyntaxKind) -> bool {

- $(

- ast::$ident::can_cast(kind)

- )||+

+ fn has_name_ast_id(node: &SyntaxNode, index_map: &mut ErasedAstIdNextIndexMap) -> Option<ErasedFileAstId> {

+ match_ast! {

+ match node {

+ $(

+ ast::$ident(node) => {

+ let name = node.$name_method();

+ let name = name.as_ref().map_or("", |it| it.text_non_mutable());

+ let result = ErasedHasNameFileAstId {

+ name,

+ };

+ Some(index_map.new_id(ErasedFileAstIdKind::$ident, result))

+ },

+ )*

+ _ => None,

+ }

+ fn should_alloc_has_name(kind: SyntaxKind) -> bool {

+ false $( || ast::$ident::can_cast(kind) )*

}

};

}

-register_ast_id_node! {

+register_has_name_ast_id! {

impl AstIdNode for

- Item, AnyHasGenericParams,

- Adt,

- Enum,

- Variant,

- Struct,

- Union,

- AssocItem,

- Const,

- Fn,

- MacroCall,

- TypeAlias,

- ExternBlock,

- ExternCrate,

- Impl,

- Macro,

- MacroDef,

- MacroRules,

- Module,

- Static,

- Trait,

- TraitAlias,

- Use,

- BlockExpr, ConstArg

+ Enum = name,

+ Struct = name,

+ Union = name,

+ ExternCrate = name_ref,

+ MacroDef = name,

+ MacroRules = name,

+ Module = name,

+ Static = name,

+ Trait = name,

+ TraitAlias = name

+macro_rules! register_assoc_item_ast_id {

+ (impl $AstIdNode:ident for $($ident:ident = $name_callback:expr),+ ) => {

+ $(

+ impl $AstIdNode for ast::$ident {}

+ )+

+ fn assoc_item_ast_id(

+ node: &SyntaxNode,

+ index_map: &mut ErasedAstIdNextIndexMap,

+ parent: Option<&ErasedFileAstId>,

+ ) -> Option<ErasedFileAstId> {

+ match_ast! {

+ match node {

+ $(

+ ast::$ident(node) => {

+ let name = $name_callback(node);

+ let name = name.as_ref().map_or("", |it| it.text_non_mutable());

+ let properties = ErasedHasNameFileAstId {

+ name,

+ };

+ let result = ErasedAssocItemFileAstId {

+ parent: parent.copied(),

+ properties,

+ };

+ Some(index_map.new_id(ErasedFileAstIdKind::$ident, result))

+ },

+ )*

+ _ => None,

+ }

+ fn should_alloc_assoc_item(kind: SyntaxKind) -> bool {

+ false $( || ast::$ident::can_cast(kind) )*

+ }

+ };

+register_assoc_item_ast_id! {

+ impl AstIdNode for

+ Variant = |it: ast::Variant| it.name(),

+ Const = |it: ast::Const| it.name(),

+ Fn = |it: ast::Fn| it.name(),

+ MacroCall = |it: ast::MacroCall| it.path().and_then(|path| path.segment()?.name_ref()),

+ TypeAlias = |it: ast::TypeAlias| it.name()

}

/// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back.

#[derive(Default)]

pub struct AstIdMap {

- /// Maps stable id to unstable ptr.

- arena: Arena<SyntaxNodePtr>,

- /// Reverse: map ptr to id.

- map: hashbrown::HashTable<Idx<SyntaxNodePtr>>,

+ /// An arena of the ptrs and their associated ID.

+ arena: Arena<(SyntaxNodePtr, ErasedFileAstId)>,

+ /// Map ptr to id.

+ ptr_map: hashbrown::HashTable<ArenaId>,

+ /// Map id to ptr.

+ id_map: hashbrown::HashTable<ArenaId>,

}

+type ArenaId = Idx<(SyntaxNodePtr, ErasedFileAstId)>;

impl fmt::Debug for AstIdMap {

fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

f.debug_struct("AstIdMap").field("arena", &self.arena).finish()

@@ -148,31 +561,116 @@ impl PartialEq for AstIdMap {

}

impl Eq for AstIdMap {}

+#[derive(Debug, Clone, Copy, PartialEq, Eq)]

+enum ContainsItems {

+ Yes,

+ No,

impl AstIdMap {

pub fn from_source(node: &SyntaxNode) -> AstIdMap {

assert!(node.parent().is_none());

let mut res = AstIdMap::default();

+ let mut index_map = ErasedAstIdNextIndexMap::default();

+ // Ensure we allocate the root.

+ res.arena.alloc((SyntaxNodePtr::new(node), ROOT_ERASED_FILE_AST_ID));

- // make sure to allocate the root node

- if !should_alloc_id(node.kind()) {

- res.alloc(node);

- }

// By walking the tree in breadth-first order we make sure that parents

// get lower ids then children. That is, adding a new child does not

// change parent's id. This means that, say, adding a new function to a

// trait does not change ids of top-level items, which helps caching.

- bdfs(node, |it| {

- if should_alloc_id(it.kind()) {

- res.alloc(&it);

- TreeOrder::BreadthFirst

- } else {

- TreeOrder::DepthFirst

+ // This contains the stack of the `BlockExpr`s we are under. We do this

+ // so we only allocate `BlockExpr`s if they contain items.

+ // The general idea is: when we enter a block we push `(block, false)` here.

+ // Items inside the block are attributed to the block's container, not the block.

+ // For the first item we find inside a block, we make this `(block, true)`

+ // and create an ast id for the block. When exiting the block we pop it,

+ // whether or not we created an ast id for it.

+ // It may seem that with this setup we will generate an ID for blocks that

+ // have no items directly but have items inside other items inside them.

+ // This is true, but it doesn't matter, because such blocks can't exist.

+ // After all, the block will then contain the *outer* item, so we allocate

+ // an ID for it anyway.

+ let mut blocks = Vec::new();

+ let mut curr_layer = vec![(node.clone(), None)];

+ let mut next_layer = vec![];

+ while !curr_layer.is_empty() {

+ curr_layer.drain(..).for_each(|(node, parent_idx)| {

+ let mut preorder = node.preorder();

+ while let Some(event) = preorder.next() {

+ match event {

+ syntax::WalkEvent::Enter(node) => {

+ if ast::BlockExpr::can_cast(node.kind()) {

+ blocks.push((node, ContainsItems::No));

+ } else if ErasedFileAstId::should_alloc(&node) {

+ // Allocate blocks on-demand, only if they have items.

+ // We don't associate items with blocks, only with items, since block IDs can be quite unstable.

+ // FIXME: Is this the correct thing to do? Macro calls might actually be more incremental if

+ // associated with blocks (not sure). Either way it's not a big deal.

+ if let Some((

+ last_block_node,

+ already_allocated @ ContainsItems::No,

+ )) = blocks.last_mut()

+ {

+ let block_ast_id = block_expr_ast_id(

+ last_block_node,

+ &mut index_map,

+ parent_of(parent_idx, &res),

+ )

+ .expect("not a BlockExpr");

+ res.arena

+ .alloc((SyntaxNodePtr::new(last_block_node), block_ast_id));

+ *already_allocated = ContainsItems::Yes;

+ }

+ let parent = parent_of(parent_idx, &res);

+ let ast_id =

+ ErasedFileAstId::ast_id_for(&node, &mut index_map, parent)

+ .expect("this node should have an ast id");

+ let idx = res.arena.alloc((SyntaxNodePtr::new(&node), ast_id));

+ next_layer.extend(node.children().map(|child| (child, Some(idx))));

+ preorder.skip_subtree();

+ }

+ syntax::WalkEvent::Leave(node) => {

+ if ast::BlockExpr::can_cast(node.kind()) {

+ assert_eq!(

+ blocks.pop().map(|it| it.0),

+ Some(node),

+ "left a BlockExpr we never entered"

+ );

+ }

+ });

+ std::mem::swap(&mut curr_layer, &mut next_layer);

+ assert!(blocks.is_empty(), "didn't leave all BlockExprs");

+ }

+ res.ptr_map = hashbrown::HashTable::with_capacity(res.arena.len());

+ res.id_map = hashbrown::HashTable::with_capacity(res.arena.len());

+ for (idx, (ptr, ast_id)) in res.arena.iter() {

+ let ptr_hash = hash_ptr(ptr);

+ let ast_id_hash = hash_ast_id(ast_id);

+ match res.ptr_map.entry(

+ ptr_hash,

+ |idx2| *idx2 == idx,

+ |&idx| hash_ptr(&res.arena[idx].0),

+ ) {

+ hashbrown::hash_table::Entry::Occupied(_) => unreachable!(),

+ hashbrown::hash_table::Entry::Vacant(entry) => {

+ entry.insert(idx);

+ }

}

- });

- res.map = hashbrown::HashTable::with_capacity(res.arena.len());

- for (idx, ptr) in res.arena.iter() {

- let hash = hash_ptr(ptr);

- match res.map.entry(hash, |&idx2| idx2 == idx, |&idx| hash_ptr(&res.arena[idx])) {

+ match res.id_map.entry(

+ ast_id_hash,

+ |idx2| *idx2 == idx,

+ |&idx| hash_ast_id(&res.arena[idx].1),

+ ) {

hashbrown::hash_table::Entry::Occupied(_) => unreachable!(),

hashbrown::hash_table::Entry::Vacant(entry) => {

entry.insert(idx);

@@ -180,98 +678,235 @@ impl AstIdMap {

}

res.arena.shrink_to_fit();

- res

+ return res;

+ fn parent_of(parent_idx: Option<ArenaId>, res: &AstIdMap) -> Option<&ErasedFileAstId> {

+ let mut parent = parent_idx.map(|parent_idx| &res.arena[parent_idx].1);

+ if parent.is_some_and(|parent| parent.kind() == ErasedFileAstIdKind::ExternBlock as u32)

+ {

+ // See the comment on `ErasedAssocItemFileAstId` for why is this.

+ // FIXME: Technically there could be an extern block inside another item, e.g.:

+ // ```

+ // fn foo() {

+ // extern "C" {

+ // fn bar();

+ // }

+ // ```

+ // Here we want to make `foo()` the parent of `bar()`, but we make it `None`.

+ // Shouldn't be a big deal though.

+ parent = None;

+ }

+ parent

+ }

}

/// The [`AstId`] of the root node

pub fn root(&self) -> SyntaxNodePtr {

- self.arena[Idx::from_raw(RawIdx::from_u32(0))]

+ self.arena[Idx::from_raw(RawIdx::from_u32(0))].0

}

pub fn ast_id<N: AstIdNode>(&self, item: &N) -> FileAstId<N> {

- let raw = self.erased_ast_id(item.syntax());

- FileAstId { raw, covariant: PhantomData }

+ self.ast_id_for_ptr(AstPtr::new(item))

+ }

+ /// Blocks may not be allocated (if they have no items), so they have a different API.

+ pub fn ast_id_for_block(&self, block: &ast::BlockExpr) -> Option<FileAstId<ast::BlockExpr>> {

+ self.ast_id_for_ptr_for_block(AstPtr::new(block))

}

pub fn ast_id_for_ptr<N: AstIdNode>(&self, ptr: AstPtr<N>) -> FileAstId<N> {

let ptr = ptr.syntax_node_ptr();

- let hash = hash_ptr(&ptr);

- match self.map.find(hash, |&idx| self.arena[idx] == ptr) {

- Some(&raw) => FileAstId {

- raw: ErasedFileAstId(raw.into_raw().into_u32()),

- covariant: PhantomData,

- },

- None => panic!(

- "Can't find {:?} in AstIdMap:\n{:?}",

+ FileAstId { raw: self.erased_ast_id(ptr), _marker: PhantomData }

+ }

+ /// Blocks may not be allocated (if they have no items), so they have a different API.

+ pub fn ast_id_for_ptr_for_block(

+ &self,

+ ptr: AstPtr<ast::BlockExpr>,

+ ) -> Option<FileAstId<ast::BlockExpr>> {

+ let ptr = ptr.syntax_node_ptr();

+ self.try_erased_ast_id(ptr).map(|raw| FileAstId { raw, _marker: PhantomData })

+ }

+ fn erased_ast_id(&self, ptr: SyntaxNodePtr) -> ErasedFileAstId {

+ self.try_erased_ast_id(ptr).unwrap_or_else(|| {

+ panic!(

+ "Can't find SyntaxNodePtr {:?} in AstIdMap:\n{:?}",

ptr,

self.arena.iter().map(|(_id, i)| i).collect::<Vec<_>>(),

- ),

- }

+ )

+ })

}

- pub fn get<N: AstIdNode>(&self, id: FileAstId<N>) -> AstPtr<N> {

- AstPtr::try_from_raw(self.arena[Idx::from_raw(RawIdx::from_u32(id.raw.into_raw()))])

- .unwrap()

+ fn try_erased_ast_id(&self, ptr: SyntaxNodePtr) -> Option<ErasedFileAstId> {

+ let hash = hash_ptr(&ptr);

+ let idx = *self.ptr_map.find(hash, |&idx| self.arena[idx].0 == ptr)?;

+ Some(self.arena[idx].1)

}

- pub fn get_erased(&self, id: ErasedFileAstId) -> SyntaxNodePtr {

- self.arena[Idx::from_raw(RawIdx::from_u32(id.into_raw()))]

+ // Don't bound on `AstIdNode` here, because `BlockExpr`s are also valid here (`ast::BlockExpr`

+ // doesn't always have a matching `FileAstId`, but a `FileAstId<ast::BlockExpr>` always has

+ // a matching node).

+ pub fn get<N: AstNode>(&self, id: FileAstId<N>) -> AstPtr<N> {

+ let ptr = self.get_erased(id.raw);

+ AstPtr::try_from_raw(ptr)

+ .unwrap_or_else(|| panic!("AstIdMap node mismatch with node `{ptr:?}`"))

}

- fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId {

- let ptr = SyntaxNodePtr::new(item);

- let hash = hash_ptr(&ptr);

- match self.map.find(hash, |&idx| self.arena[idx] == ptr) {

- Some(&idx) => ErasedFileAstId(idx.into_raw().into_u32()),

+ pub fn get_erased(&self, id: ErasedFileAstId) -> SyntaxNodePtr {

+ let hash = hash_ast_id(&id);

+ match self.id_map.find(hash, |&idx| self.arena[idx].1 == id) {

+ Some(&idx) => self.arena[idx].0,

None => panic!(

- "Can't find {:?} in AstIdMap:\n{:?}\n source text: {}",

- item,

+ "Can't find ast id {:?} in AstIdMap:\n{:?}",

+ id,

self.arena.iter().map(|(_id, i)| i).collect::<Vec<_>>(),

- item

}

- fn alloc(&mut self, item: &SyntaxNode) -> ErasedFileAstId {

- ErasedFileAstId(self.arena.alloc(SyntaxNodePtr::new(item)).into_raw().into_u32())

- }

}

+#[inline]

fn hash_ptr(ptr: &SyntaxNodePtr) -> u64 {

- BuildHasherDefault::<FxHasher>::default().hash_one(ptr)

-#[derive(Copy, Clone, PartialEq, Eq)]

-enum TreeOrder {

- BreadthFirst,

- DepthFirst,

-/// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs

-/// order? It is a mix of breadth-first and depth first orders. Nodes for which

-/// `f` returns [`TreeOrder::BreadthFirst`] are visited breadth-first, all the other nodes are explored

-/// [`TreeOrder::DepthFirst`].

-///

-/// In other words, the size of the bfs queue is bound by the number of "true"

-/// nodes.

-fn bdfs(node: &SyntaxNode, mut f: impl FnMut(SyntaxNode) -> TreeOrder) {

- let mut curr_layer = vec![node.clone()];

- let mut next_layer = vec![];

- while !curr_layer.is_empty() {

- curr_layer.drain(..).for_each(|node| {

- let mut preorder = node.preorder();

- while let Some(event) = preorder.next() {

- match event {

- syntax::WalkEvent::Enter(node) => {

- if f(node.clone()) == TreeOrder::BreadthFirst {

- next_layer.extend(node.children());

- preorder.skip_subtree();

- }

- syntax::WalkEvent::Leave(_) => {}

- }

+ FxBuildHasher.hash_one(ptr)

+#[inline]

+fn hash_ast_id(ptr: &ErasedFileAstId) -> u64 {

+ FxBuildHasher.hash_one(ptr)

+#[cfg(test)]

+mod tests {

+ use syntax::{AstNode, Edition, SourceFile, SyntaxKind, SyntaxNodePtr, WalkEvent, ast};

+ use crate::AstIdMap;

+ #[test]

+ fn check_all_nodes() {

+ let syntax = SourceFile::parse(

+ r#"

+extern crate foo;

+fn foo() {

+ union U {}

+struct S;

+macro_rules! m {}

+macro m2() {}

+trait Trait {}

+impl Trait for S {}

+impl S {}

+impl m!() {}

+impl m2!() for m!() {}

+type T = i32;

+enum E {

+ V1(),

+ V2 {},

+ V3,

+struct S; // duplicate

+extern "C" {

+ static S: i32;

+static mut S: i32 = 0;

+const FOO: i32 = 0;

+ "#,

+ Edition::CURRENT,

+ )

+ .syntax_node();

+ let ast_id_map = AstIdMap::from_source(&syntax);

+ for node in syntax.preorder() {

+ let WalkEvent::Enter(node) = node else { continue };

+ if !matches!(

+ node.kind(),

+ SyntaxKind::EXTERN_CRATE

+ | SyntaxKind::FN

+ | SyntaxKind::UNION

+ | SyntaxKind::STRUCT

+ | SyntaxKind::MACRO_RULES

+ | SyntaxKind::MACRO_DEF

+ | SyntaxKind::MACRO_CALL

+ | SyntaxKind::TRAIT

+ | SyntaxKind::IMPL

+ | SyntaxKind::TYPE_ALIAS

+ | SyntaxKind::ENUM

+ | SyntaxKind::VARIANT

+ | SyntaxKind::EXTERN_BLOCK

+ | SyntaxKind::STATIC

+ | SyntaxKind::CONST

+ ) {

+ continue;

}

- });

- std::mem::swap(&mut curr_layer, &mut next_layer);

+ let ptr = SyntaxNodePtr::new(&node);

+ let ast_id = ast_id_map.erased_ast_id(ptr);

+ let turn_back = ast_id_map.get_erased(ast_id);

+ assert_eq!(ptr, turn_back);

+ }

+ #[test]

+ fn different_names_get_different_hashes() {

+ let syntax = SourceFile::parse(

+ r#"

+fn foo() {}

+fn bar() {}

+ "#,

+ Edition::CURRENT,

+ )

+ .syntax_node();

+ let ast_id_map = AstIdMap::from_source(&syntax);

+ let fns = syntax.descendants().filter_map(ast::Fn::cast).collect::<Vec<_>>();

+ let [foo_fn, bar_fn] = fns.as_slice() else {

+ panic!("not exactly 2 functions");

+ };

+ let foo_fn_id = ast_id_map.ast_id(foo_fn);

+ let bar_fn_id = ast_id_map.ast_id(bar_fn);

+ assert_ne!(foo_fn_id.raw.hash_value(), bar_fn_id.raw.hash_value(), "hashes are equal");

+ }

+ #[test]

+ fn different_parents_get_different_hashes() {

+ let syntax = SourceFile::parse(

+ r#"

+fn foo() {

+ m!();

+fn bar() {

+ m!();

+ "#,

+ Edition::CURRENT,

+ )

+ .syntax_node();

+ let ast_id_map = AstIdMap::from_source(&syntax);

+ let macro_calls = syntax.descendants().filter_map(ast::MacroCall::cast).collect::<Vec<_>>();

+ let [macro_call_foo, macro_call_bar] = macro_calls.as_slice() else {

+ panic!("not exactly 2 macro calls");

+ };

+ let macro_call_foo_id = ast_id_map.ast_id(macro_call_foo);

+ let macro_call_bar_id = ast_id_map.ast_id(macro_call_bar);

+ assert_ne!(

+ macro_call_foo_id.raw.hash_value(),

+ macro_call_bar_id.raw.hash_value(),

+ "hashes are equal"

+ );

+ }

+ #[test]

+ fn blocks_with_no_items_have_no_id() {

+ let syntax = SourceFile::parse(

+ r#"

+fn foo() {

+ let foo = 1;

+ bar(foo);

+ "#,

+ Edition::CURRENT,

+ )

+ .syntax_node();

+ let ast_id_map = AstIdMap::from_source(&syntax);

+ let block = syntax.descendants().find_map(ast::BlockExpr::cast).expect("no block");

+ assert!(ast_id_map.ast_id_for_block(&block).is_none());

}