rust-analyzer

Unnamed repository; edit this file 'description' to name the repository.

master 2Branches 0Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'crates/ide-db/src/search.rs')

-rw-r--r--

crates/ide-db/src/search.rs

514

1 files changed, 443 insertions, 71 deletions

diff --git a/crates/ide-db/src/search.rs b/crates/ide-db/src/search.rs
index 9e01a6d440..12ce5a403f 100644
--- a/crates/ide-db/src/search.rs
+++ b/crates/ide-db/src/search.rs

@@ -5,18 +5,23 @@

//! name resolution.

use std::mem;

+use std::{cell::LazyCell, cmp::Reverse};

use base_db::{salsa::Database, SourceDatabase, SourceRootDatabase};

use hir::{

- sym, AsAssocItem, DefWithBody, DescendPreference, FileRange, HasAttrs, HasSource, HirFileIdExt,

- InFile, InRealFile, ModuleSource, PathResolution, Semantics, Visibility,

+ sym, Adt, AsAssocItem, DefWithBody, FileRange, FileRangeWrapper, HasAttrs, HasContainer,

+ HasSource, HirFileIdExt, InFile, InFileWrapper, InRealFile, ItemContainer, ModuleSource,

+ PathResolution, Semantics, Visibility,

};

use memchr::memmem::Finder;

-use once_cell::unsync::Lazy;

use parser::SyntaxKind;

-use rustc_hash::FxHashMap;

+use rustc_hash::{FxHashMap, FxHashSet};

use span::EditionedFileId;

-use syntax::{ast, match_ast, AstNode, AstToken, SyntaxElement, TextRange, TextSize, ToSmolStr};

+use syntax::{

+ ast::{self, HasName},

+ match_ast, AstNode, AstToken, SmolStr, SyntaxElement, SyntaxNode, TextRange, TextSize,

+ ToSmolStr,

+};

use triomphe::Arc;

use crate::{

@@ -442,6 +447,411 @@ impl<'a> FindUsages<'a> {

res

}

+ fn scope_files<'b>(

+ db: &'b RootDatabase,

+ scope: &'b SearchScope,

+ ) -> impl Iterator<Item = (Arc<str>, EditionedFileId, TextRange)> + 'b {

+ scope.entries.iter().map(|(&file_id, &search_range)| {

+ let text = db.file_text(file_id.file_id());

+ let search_range =

+ search_range.unwrap_or_else(|| TextRange::up_to(TextSize::of(&*text)));

+ (text, file_id, search_range)

+ })

+ }

+ fn match_indices<'b>(

+ text: &'b str,

+ finder: &'b Finder<'b>,

+ search_range: TextRange,

+ ) -> impl Iterator<Item = TextSize> + 'b {

+ finder.find_iter(text.as_bytes()).filter_map(move |idx| {

+ let offset: TextSize = idx.try_into().unwrap();

+ if !search_range.contains_inclusive(offset) {

+ return None;

+ }

+ // If this is not a word boundary, that means this is only part of an identifier,

+ // so it can't be what we're looking for.

+ // This speeds up short identifiers significantly.

+ if text[..idx]

+ .chars()

+ .next_back()

+ .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_'))

+ || text[idx + finder.needle().len()..]

+ .chars()

+ .next()

+ .is_some_and(|ch| matches!(ch, 'A'..='Z' | 'a'..='z' | '_' | '0'..='9'))

+ {

+ return None;

+ }

+ Some(offset)

+ })

+ }

+ fn find_nodes<'b>(

+ sema: &'b Semantics<'_, RootDatabase>,

+ name: &str,

+ node: &syntax::SyntaxNode,

+ offset: TextSize,

+ ) -> impl Iterator<Item = SyntaxNode> + 'b {

+ node.token_at_offset(offset)

+ .find(|it| {

+ // `name` is stripped of raw ident prefix. See the comment on name retrieval below.

+ it.text().trim_start_matches("r#") == name

+ })

+ .into_iter()

+ .flat_map(move |token| {

+ sema.descend_into_macros_exact_if_in_macro(token)

+ .into_iter()

+ .filter_map(|it| it.parent())

+ })

+ }

+ /// Performs a special fast search for associated functions. This is mainly intended

+ /// to speed up `new()` which can take a long time.

+ ///

+ /// The trick is instead of searching for `func_name` search for `TypeThatContainsContainerName::func_name`.

+ /// We cannot search exactly that (not even in tokens), because `ContainerName` may be aliased.

+ /// Instead, we perform a textual search for `ContainerName`. Then, we look for all cases where

+ /// `ContainerName` may be aliased (that includes `use ContainerName as Xyz` and

+ /// `type Xyz = ContainerName`). We collect a list of all possible aliases of `ContainerName`.

+ /// The list can have false positives (because there may be multiple types named `ContainerName`),

+ /// but it cannot have false negatives. Then, we look for `TypeThatContainsContainerNameOrAnyAlias::func_name`.

+ /// Those that will be found are of high chance to be actual hits (of course, we will need to verify

+ /// that).

+ ///

+ /// Returns true if completed the search.

+ // FIXME: Extend this to other cases, such as associated types/consts/enum variants (note those can be `use`d).

+ fn short_associated_function_fast_search(

+ &self,

+ sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool,

+ search_scope: &SearchScope,

+ name: &str,

+ ) -> bool {

+ if self.scope.is_some() {

+ return false;

+ }

+ let _p = tracing::info_span!("short_associated_function_fast_search").entered();

+ let container = (|| {

+ let Definition::Function(function) = self.def else {

+ return None;

+ };

+ if function.has_self_param(self.sema.db) {

+ return None;

+ }

+ match function.container(self.sema.db) {

+ // Only freestanding `impl`s qualify; methods from trait

+ // can be called from within subtraits and bounds.

+ ItemContainer::Impl(impl_) => {

+ let has_trait = impl_.trait_(self.sema.db).is_some();

+ if has_trait {

+ return None;

+ }

+ let adt = impl_.self_ty(self.sema.db).as_adt()?;

+ Some(adt)

+ }

+ _ => None,

+ }

+ })();

+ let Some(container) = container else {

+ return false;

+ };

+ fn has_any_name(node: &SyntaxNode, mut predicate: impl FnMut(&str) -> bool) -> bool {

+ node.descendants().any(|node| {

+ match_ast! {

+ match node {

+ ast::Name(it) => predicate(it.text().trim_start_matches("r#")),

+ ast::NameRef(it) => predicate(it.text().trim_start_matches("r#")),

+ _ => false

+ }

+ })

+ }

+ // This is a fixpoint algorithm with O(number of aliases), but most types have no or few aliases,

+ // so this should stay fast.

+ //

+ /// Returns `(aliases, ranges_where_Self_can_refer_to_our_type)`.

+ fn collect_possible_aliases(

+ sema: &Semantics<'_, RootDatabase>,

+ container: Adt,

+ ) -> Option<(FxHashSet<SmolStr>, Vec<FileRangeWrapper<EditionedFileId>>)> {

+ fn insert_type_alias(

+ db: &RootDatabase,

+ to_process: &mut Vec<(SmolStr, SearchScope)>,

+ alias_name: &str,

+ def: Definition,

+ ) {

+ let alias = alias_name.trim_start_matches("r#").to_smolstr();

+ tracing::debug!("found alias: {alias}");

+ to_process.push((alias, def.search_scope(db)));

+ }

+ let _p = tracing::info_span!("collect_possible_aliases").entered();

+ let db = sema.db;

+ let container_name = container.name(db).unescaped().display(db).to_smolstr();

+ let search_scope = Definition::from(container).search_scope(db);

+ let mut seen = FxHashSet::default();

+ let mut completed = FxHashSet::default();

+ let mut to_process = vec![(container_name, search_scope)];

+ let mut is_possibly_self = Vec::new();

+ let mut total_files_searched = 0;

+ while let Some((current_to_process, current_to_process_search_scope)) = to_process.pop()

+ {

+ let is_alias = |alias: &ast::TypeAlias| {

+ let def = sema.to_def(alias)?;

+ let ty = def.ty(db);

+ let is_alias = ty.as_adt()? == container;

+ is_alias.then_some(def)

+ };

+ let finder = Finder::new(current_to_process.as_bytes());

+ for (file_text, file_id, search_range) in

+ FindUsages::scope_files(db, &current_to_process_search_scope)

+ {

+ let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone());

+ for offset in FindUsages::match_indices(&file_text, &finder, search_range) {

+ let usages =

+ FindUsages::find_nodes(sema, &current_to_process, &tree, offset)

+ .filter(|it| {

+ matches!(it.kind(), SyntaxKind::NAME | SyntaxKind::NAME_REF)

+ });

+ for usage in usages {

+ if let Some(alias) = usage.parent().and_then(|it| {

+ let path = ast::PathSegment::cast(it)?.parent_path();

+ let use_tree = ast::UseTree::cast(path.syntax().parent()?)?;

+ use_tree.rename()?.name()

+ }) {

+ if seen.insert(InFileWrapper::new(

+ file_id,

+ alias.syntax().text_range(),

+ )) {

+ tracing::debug!("found alias: {alias}");

+ cov_mark::hit!(container_use_rename);

+ // FIXME: `use`s have no easy way to determine their search scope, but they are rare.

+ to_process.push((

+ alias.text().to_smolstr(),

+ current_to_process_search_scope.clone(),

+ ));

+ }

+ } else if let Some(alias) =

+ usage.ancestors().find_map(ast::TypeAlias::cast)

+ {

+ if let Some(name) = alias.name() {

+ if seen.insert(InFileWrapper::new(

+ file_id,

+ name.syntax().text_range(),

+ )) {

+ if let Some(def) = is_alias(&alias) {

+ cov_mark::hit!(container_type_alias);

+ insert_type_alias(

+ sema.db,

+ &mut to_process,

+ name.text().as_str(),

+ def.into(),

+ );

+ } else {

+ cov_mark::hit!(same_name_different_def_type_alias);

+ }

+ // We need to account for `Self`. It can only refer to our type inside an impl.

+ let impl_ = 'impl_: {

+ for ancestor in usage.ancestors() {

+ if let Some(parent) = ancestor.parent() {

+ if let Some(parent) = ast::Impl::cast(parent) {

+ // Only if the GENERIC_PARAM_LIST is directly under impl, otherwise it may be in the self ty.

+ if matches!(

+ ancestor.kind(),

+ SyntaxKind::ASSOC_ITEM_LIST

+ | SyntaxKind::WHERE_CLAUSE

+ | SyntaxKind::GENERIC_PARAM_LIST

+ ) {

+ break;

+ }

+ if parent

+ .trait_()

+ .is_some_and(|trait_| *trait_.syntax() == ancestor)

+ {

+ break;

+ }

+ // Otherwise, found an impl where its self ty may be our type.

+ break 'impl_ Some(parent);

+ }

+ None

+ };

+ (|| {

+ let impl_ = impl_?;

+ is_possibly_self.push(sema.original_range(impl_.syntax()));

+ let assoc_items = impl_.assoc_item_list()?;

+ let type_aliases = assoc_items

+ .syntax()

+ .descendants()

+ .filter_map(ast::TypeAlias::cast);

+ for type_alias in type_aliases {

+ let Some(ty) = type_alias.ty() else { continue };

+ let Some(name) = type_alias.name() else { continue };

+ let contains_self = ty

+ .syntax()

+ .descendants_with_tokens()

+ .any(|node| node.kind() == SyntaxKind::SELF_TYPE_KW);

+ if !contains_self {

+ continue;

+ }

+ if seen.insert(InFileWrapper::new(

+ file_id,

+ name.syntax().text_range(),

+ )) {

+ if let Some(def) = is_alias(&type_alias) {

+ cov_mark::hit!(self_type_alias);

+ insert_type_alias(

+ sema.db,

+ &mut to_process,

+ name.text().as_str(),

+ def.into(),

+ );

+ } else {

+ cov_mark::hit!(same_name_different_def_type_alias);

+ }

+ Some(())

+ })();

+ }

+ completed.insert(current_to_process);

+ total_files_searched += current_to_process_search_scope.entries.len();

+ // FIXME: Maybe this needs to be relative to the project size, or at least to the initial search scope?

+ if total_files_searched > 20_000 && completed.len() > 100 {

+ // This case is extremely unlikely (even searching for `Vec::new()` on rust-analyzer does not enter

+ // here - it searches less than 10,000 files, and it does so in five seconds), but if we get here,

+ // we at a risk of entering an almost-infinite loop of growing the aliases list. So just stop and

+ // let normal search handle this case.

+ tracing::info!(aliases_count = %completed.len(), "too much aliases; leaving fast path");

+ return None;

+ }

+ // Impls can contain each other, so we need to deduplicate their ranges.

+ is_possibly_self.sort_unstable_by_key(|position| {

+ (position.file_id, position.range.start(), Reverse(position.range.end()))

+ });

+ is_possibly_self.dedup_by(|pos2, pos1| {

+ pos1.file_id == pos2.file_id

+ && pos1.range.start() <= pos2.range.start()

+ && pos1.range.end() >= pos2.range.end()

+ });

+ tracing::info!(aliases_count = %completed.len(), "aliases search completed");

+ Some((completed, is_possibly_self))

+ }

+ fn search(

+ this: &FindUsages<'_>,

+ finder: &Finder<'_>,

+ name: &str,

+ files: impl Iterator<Item = (Arc<str>, EditionedFileId, TextRange)>,

+ mut container_predicate: impl FnMut(

+ &SyntaxNode,

+ InFileWrapper<EditionedFileId, TextRange>,

+ ) -> bool,

+ sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool,

+ ) {

+ for (file_text, file_id, search_range) in files {

+ let tree = LazyCell::new(move || this.sema.parse(file_id).syntax().clone());

+ for offset in FindUsages::match_indices(&file_text, finder, search_range) {

+ let usages = FindUsages::find_nodes(this.sema, name, &tree, offset)

+ .filter_map(ast::NameRef::cast);

+ for usage in usages {

+ let found_usage = usage

+ .syntax()

+ .parent()

+ .and_then(ast::PathSegment::cast)

+ .map(|path_segment| {

+ container_predicate(

+ path_segment.parent_path().syntax(),

+ InFileWrapper::new(file_id, usage.syntax().text_range()),

+ )

+ })

+ .unwrap_or(false);

+ if found_usage {

+ this.found_name_ref(&usage, sink);

+ }

+ let Some((container_possible_aliases, is_possibly_self)) =

+ collect_possible_aliases(self.sema, container)

+ else {

+ return false;

+ };

+ cov_mark::hit!(short_associated_function_fast_search);

+ // FIXME: If Rust ever gains the ability to `use Struct::method` we'll also need to account for free

+ // functions.

+ let finder = Finder::new(name.as_bytes());

+ // The search for `Self` may return duplicate results with `ContainerName`, so deduplicate them.

+ let mut self_positions = FxHashSet::default();

+ tracing::info_span!("Self_search").in_scope(|| {

+ search(

+ self,

+ &finder,

+ name,

+ is_possibly_self.into_iter().map(|position| {

+ (

+ self.sema.db.file_text(position.file_id.file_id()),

+ position.file_id,

+ position.range,

+ )

+ }),

+ |path, name_position| {

+ let has_self = path

+ .descendants_with_tokens()

+ .any(|node| node.kind() == SyntaxKind::SELF_TYPE_KW);

+ if has_self {

+ self_positions.insert(name_position);

+ }

+ has_self

+ },

+ sink,

+ )

+ });

+ tracing::info_span!("aliases_search").in_scope(|| {

+ search(

+ self,

+ &finder,

+ name,

+ FindUsages::scope_files(self.sema.db, search_scope),

+ |path, name_position| {

+ has_any_name(path, |name| container_possible_aliases.contains(name))

+ && !self_positions.contains(&name_position)

+ },

+ sink,

+ )

+ });

+ true

+ }

pub fn search(&self, sink: &mut dyn FnMut(EditionedFileId, FileReference) -> bool) {

let _p = tracing::info_span!("FindUsages:search").entered();

let sema = self.sema;

@@ -488,65 +898,23 @@ impl<'a> FindUsages<'a> {

Some(s) => s.as_str(),

None => return,

};

- let finder = &Finder::new(name);

- let include_self_kw_refs =

- self.include_self_kw_refs.as_ref().map(|ty| (ty, Finder::new("Self")));

- // for<'a> |text: &'a str, name: &'a str, search_range: TextRange| -> impl Iterator<Item = TextSize> + 'a { ... }

- fn match_indices<'a>(

- text: &'a str,

- finder: &'a Finder<'a>,

- search_range: TextRange,

- ) -> impl Iterator<Item = TextSize> + 'a {

- finder.find_iter(text.as_bytes()).filter_map(move |idx| {

- let offset: TextSize = idx.try_into().unwrap();

- if !search_range.contains_inclusive(offset) {

- return None;

- }

- Some(offset)

- })

+ // FIXME: This should probably depend on the number of the results (specifically, the number of false results).

+ if name.len() <= 7 && self.short_associated_function_fast_search(sink, &search_scope, name)

+ {

+ return;

}

- // for<'a> |scope: &'a SearchScope| -> impl Iterator<Item = (Arc<String>, EditionedFileId, TextRange)> + 'a { ... }

- fn scope_files<'a>(

- sema: &'a Semantics<'_, RootDatabase>,

- scope: &'a SearchScope,

- ) -> impl Iterator<Item = (Arc<str>, EditionedFileId, TextRange)> + 'a {

- scope.entries.iter().map(|(&file_id, &search_range)| {

- let text = sema.db.file_text(file_id.file_id());

- let search_range =

- search_range.unwrap_or_else(|| TextRange::up_to(TextSize::of(&*text)));

- (text, file_id, search_range)

- })

- }

- let find_nodes = move |name: &str, node: &syntax::SyntaxNode, offset: TextSize| {

- node.token_at_offset(offset)

- .find(|it| {

- // `name` is stripped of raw ident prefix. See the comment on name retrieval above.

- it.text().trim_start_matches("r#") == name

- })

- .into_iter()

- .flat_map(move |token| {

- // FIXME: There should be optimization potential here

- // Currently we try to descend everything we find which

- // means we call `Semantics::descend_into_macros` on

- // every textual hit. That function is notoriously

- // expensive even for things that do not get down mapped

- // into macros.

- sema.descend_into_macros(DescendPreference::None, token)

- .into_iter()

- .filter_map(|it| it.parent())

- })

- };

+ let finder = &Finder::new(name);

+ let include_self_kw_refs =

+ self.include_self_kw_refs.as_ref().map(|ty| (ty, Finder::new("Self")));

- for (text, file_id, search_range) in scope_files(sema, &search_scope) {

+ for (text, file_id, search_range) in Self::scope_files(sema.db, &search_scope) {

self.sema.db.unwind_if_cancelled();

- let tree = Lazy::new(move || sema.parse(file_id).syntax().clone());

+ let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone());

// Search for occurrences of the items name

- for offset in match_indices(&text, finder, search_range) {

+ for offset in Self::match_indices(&text, finder, search_range) {

tree.token_at_offset(offset).for_each(|token| {

let Some(str_token) = ast::String::cast(token.clone()) else { return };

if let Some((range, nameres)) =

@@ -556,7 +924,9 @@ impl<'a> FindUsages<'a> {

}

});

- for name in find_nodes(name, &tree, offset).filter_map(ast::NameLike::cast) {

+ for name in

+ Self::find_nodes(sema, name, &tree, offset).filter_map(ast::NameLike::cast)

+ {

if match name {

ast::NameLike::NameRef(name_ref) => self.found_name_ref(&name_ref, sink),

ast::NameLike::Name(name) => self.found_name(&name, sink),

@@ -568,8 +938,9 @@ impl<'a> FindUsages<'a> {

}

// Search for occurrences of the `Self` referring to our type

if let Some((self_ty, finder)) = &include_self_kw_refs {

- for offset in match_indices(&text, finder, search_range) {

- for name_ref in find_nodes("Self", &tree, offset).filter_map(ast::NameRef::cast)

+ for offset in Self::match_indices(&text, finder, search_range) {

+ for name_ref in

+ Self::find_nodes(sema, "Self", &tree, offset).filter_map(ast::NameRef::cast)

{

if self.found_self_ty_name_ref(self_ty, &name_ref, sink) {

return;

@@ -587,13 +958,13 @@ impl<'a> FindUsages<'a> {

let is_crate_root = module.is_crate_root().then(|| Finder::new("crate"));

let finder = &Finder::new("super");

- for (text, file_id, search_range) in scope_files(sema, &scope) {

+ for (text, file_id, search_range) in Self::scope_files(sema.db, &scope) {

self.sema.db.unwind_if_cancelled();

- let tree = Lazy::new(move || sema.parse(file_id).syntax().clone());

+ let tree = LazyCell::new(move || sema.parse(file_id).syntax().clone());

- for offset in match_indices(&text, finder, search_range) {

- for name_ref in

- find_nodes("super", &tree, offset).filter_map(ast::NameRef::cast)

+ for offset in Self::match_indices(&text, finder, search_range) {

+ for name_ref in Self::find_nodes(sema, "super", &tree, offset)

+ .filter_map(ast::NameRef::cast)

{

if self.found_name_ref(&name_ref, sink) {

return;

@@ -601,9 +972,9 @@ impl<'a> FindUsages<'a> {

}

if let Some(finder) = &is_crate_root {

- for offset in match_indices(&text, finder, search_range) {

- for name_ref in

- find_nodes("crate", &tree, offset).filter_map(ast::NameRef::cast)

+ for offset in Self::match_indices(&text, finder, search_range) {

+ for name_ref in Self::find_nodes(sema, "crate", &tree, offset)

+ .filter_map(ast::NameRef::cast)

{

if self.found_name_ref(&name_ref, sink) {

return;

@@ -641,11 +1012,12 @@ impl<'a> FindUsages<'a> {

let search_range =

search_range.unwrap_or_else(|| TextRange::up_to(TextSize::of(&*text)));

- let tree = Lazy::new(|| sema.parse(file_id).syntax().clone());

+ let tree = LazyCell::new(|| sema.parse(file_id).syntax().clone());

let finder = &Finder::new("self");

- for offset in match_indices(&text, finder, search_range) {

- for name_ref in find_nodes("self", &tree, offset).filter_map(ast::NameRef::cast)

+ for offset in Self::match_indices(&text, finder, search_range) {

+ for name_ref in

+ Self::find_nodes(sema, "self", &tree, offset).filter_map(ast::NameRef::cast)

{

if self.found_self_module_name_ref(&name_ref, sink) {

return;