Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'crates/hir-expand/src/db.rs')
| -rw-r--r-- | crates/hir-expand/src/db.rs | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/crates/hir-expand/src/db.rs b/crates/hir-expand/src/db.rs new file mode 100644 index 0000000000..8c08fdcd2e --- /dev/null +++ b/crates/hir-expand/src/db.rs @@ -0,0 +1,509 @@ +//! Defines database & queries for macro expansion. + +use std::sync::Arc; + +use base_db::{salsa, SourceDatabase}; +use either::Either; +use limit::Limit; +use mbe::syntax_node_to_token_tree; +use rustc_hash::FxHashSet; +use syntax::{ + ast::{self, HasAttrs, HasDocComments}, + AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken, T, +}; + +use crate::{ + ast_id_map::AstIdMap, builtin_attr_macro::pseudo_derive_attr_expansion, fixup, + hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander, BuiltinFnLikeExpander, + ExpandError, ExpandResult, ExpandTo, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind, + MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander, +}; + +/// Total limit on the number of tokens produced by any macro invocation. +/// +/// If an invocation produces more tokens than this limit, it will not be stored in the database and +/// an error will be emitted. +/// +/// Actual max for `analysis-stats .` at some point: 30672. +static TOKEN_LIMIT: Limit = Limit::new(524_288); + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum TokenExpander { + /// Old-style `macro_rules` or the new macros 2.0 + DeclarativeMacro { mac: mbe::DeclarativeMacro, def_site_token_map: mbe::TokenMap }, + /// Stuff like `line!` and `file!`. + Builtin(BuiltinFnLikeExpander), + /// `global_allocator` and such. + BuiltinAttr(BuiltinAttrExpander), + /// `derive(Copy)` and such. + BuiltinDerive(BuiltinDeriveExpander), + /// The thing we love the most here in rust-analyzer -- procedural macros. + ProcMacro(ProcMacroExpander), +} + +impl TokenExpander { + fn expand( + &self, + db: &dyn AstDatabase, + id: MacroCallId, + tt: &tt::Subtree, + ) -> ExpandResult<tt::Subtree> { + match self { + TokenExpander::DeclarativeMacro { mac, .. } => mac.expand(tt).map_err(Into::into), + TokenExpander::Builtin(it) => it.expand(db, id, tt).map_err(Into::into), + TokenExpander::BuiltinAttr(it) => it.expand(db, id, tt), + TokenExpander::BuiltinDerive(it) => it.expand(db, id, tt), + TokenExpander::ProcMacro(_) => { + // We store the result in salsa db to prevent non-deterministic behavior in + // some proc-macro implementation + // See #4315 for details + db.expand_proc_macro(id) + } + } + } + + pub(crate) fn map_id_down(&self, id: tt::TokenId) -> tt::TokenId { + match self { + TokenExpander::DeclarativeMacro { mac, .. } => mac.map_id_down(id), + TokenExpander::Builtin(..) + | TokenExpander::BuiltinAttr(..) + | TokenExpander::BuiltinDerive(..) + | TokenExpander::ProcMacro(..) => id, + } + } + + pub(crate) fn map_id_up(&self, id: tt::TokenId) -> (tt::TokenId, mbe::Origin) { + match self { + TokenExpander::DeclarativeMacro { mac, .. } => mac.map_id_up(id), + TokenExpander::Builtin(..) + | TokenExpander::BuiltinAttr(..) + | TokenExpander::BuiltinDerive(..) + | TokenExpander::ProcMacro(..) => (id, mbe::Origin::Call), + } + } +} + +// FIXME: rename to ExpandDatabase +#[salsa::query_group(AstDatabaseStorage)] +pub trait AstDatabase: SourceDatabase { + fn ast_id_map(&self, file_id: HirFileId) -> Arc<AstIdMap>; + + /// Main public API -- parses a hir file, not caring whether it's a real + /// file or a macro expansion. + #[salsa::transparent] + fn parse_or_expand(&self, file_id: HirFileId) -> Option<SyntaxNode>; + /// Implementation for the macro case. + fn parse_macro_expansion( + &self, + macro_file: MacroFile, + ) -> ExpandResult<Option<(Parse<SyntaxNode>, Arc<mbe::TokenMap>)>>; + + /// Macro ids. That's probably the tricksiest bit in rust-analyzer, and the + /// reason why we use salsa at all. + /// + /// We encode macro definitions into ids of macro calls, this what allows us + /// to be incremental. + #[salsa::interned] + fn intern_macro_call(&self, macro_call: MacroCallLoc) -> MacroCallId; + + /// Lowers syntactic macro call to a token tree representation. + #[salsa::transparent] + fn macro_arg( + &self, + id: MacroCallId, + ) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>>; + /// Extracts syntax node, corresponding to a macro call. That's a firewall + /// query, only typing in the macro call itself changes the returned + /// subtree. + fn macro_arg_text(&self, id: MacroCallId) -> Option<GreenNode>; + /// Gets the expander for this macro. This compiles declarative macros, and + /// just fetches procedural ones. + fn macro_def(&self, id: MacroDefId) -> Result<Arc<TokenExpander>, mbe::ParseError>; + + /// Expand macro call to a token tree. This query is LRUed (we keep 128 or so results in memory) + fn macro_expand(&self, macro_call: MacroCallId) -> ExpandResult<Option<Arc<tt::Subtree>>>; + /// Special case of the previous query for procedural macros. We can't LRU + /// proc macros, since they are not deterministic in general, and + /// non-determinism breaks salsa in a very, very, very bad way. @edwin0cheng + /// heroically debugged this once! + fn expand_proc_macro(&self, call: MacroCallId) -> ExpandResult<tt::Subtree>; + /// Firewall query that returns the error from the `macro_expand` query. + fn macro_expand_error(&self, macro_call: MacroCallId) -> Option<ExpandError>; + + fn hygiene_frame(&self, file_id: HirFileId) -> Arc<HygieneFrame>; +} + +/// This expands the given macro call, but with different arguments. This is +/// used for completion, where we want to see what 'would happen' if we insert a +/// token. The `token_to_map` mapped down into the expansion, with the mapped +/// token returned. +pub fn expand_speculative( + db: &dyn AstDatabase, + actual_macro_call: MacroCallId, + speculative_args: &SyntaxNode, + token_to_map: SyntaxToken, +) -> Option<(SyntaxNode, SyntaxToken)> { + let loc = db.lookup_intern_macro_call(actual_macro_call); + let macro_def = db.macro_def(loc.def).ok()?; + let token_range = token_to_map.text_range(); + + // Build the subtree and token mapping for the speculative args + let censor = censor_for_macro_input(&loc, speculative_args); + let mut fixups = fixup::fixup_syntax(speculative_args); + fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new()))); + let (mut tt, spec_args_tmap, _) = mbe::syntax_node_to_token_tree_with_modifications( + speculative_args, + fixups.token_map, + fixups.next_id, + fixups.replace, + fixups.append, + ); + + let (attr_arg, token_id) = match loc.kind { + MacroCallKind::Attr { invoc_attr_index, is_derive, .. } => { + let attr = if is_derive { + // for pseudo-derive expansion we actually pass the attribute itself only + ast::Attr::cast(speculative_args.clone()) + } else { + // Attributes may have an input token tree, build the subtree and map for this as well + // then try finding a token id for our token if it is inside this input subtree. + let item = ast::Item::cast(speculative_args.clone())?; + item.doc_comments_and_attrs().nth(invoc_attr_index as usize).and_then(Either::left) + }?; + match attr.token_tree() { + Some(token_tree) => { + let (mut tree, map) = syntax_node_to_token_tree(attr.token_tree()?.syntax()); + tree.delimiter = None; + + let shift = mbe::Shift::new(&tt); + shift.shift_all(&mut tree); + + let token_id = if token_tree.syntax().text_range().contains_range(token_range) { + let attr_input_start = + token_tree.left_delimiter_token()?.text_range().start(); + let range = token_range.checked_sub(attr_input_start)?; + let token_id = shift.shift(map.token_by_range(range)?); + Some(token_id) + } else { + None + }; + (Some(tree), token_id) + } + _ => (None, None), + } + } + _ => (None, None), + }; + let token_id = match token_id { + Some(token_id) => token_id, + // token wasn't inside an attribute input so it has to be in the general macro input + None => { + let range = token_range.checked_sub(speculative_args.text_range().start())?; + let token_id = spec_args_tmap.token_by_range(range)?; + macro_def.map_id_down(token_id) + } + }; + + // Do the actual expansion, we need to directly expand the proc macro due to the attribute args + // Otherwise the expand query will fetch the non speculative attribute args and pass those instead. + let mut speculative_expansion = match loc.def.kind { + MacroDefKind::ProcMacro(expander, ..) => { + tt.delimiter = None; + expander.expand(db, loc.krate, &tt, attr_arg.as_ref()) + } + MacroDefKind::BuiltInAttr(BuiltinAttrExpander::Derive, _) => { + pseudo_derive_attr_expansion(&tt, attr_arg.as_ref()?) + } + _ => macro_def.expand(db, actual_macro_call, &tt), + }; + + let expand_to = macro_expand_to(db, actual_macro_call); + fixup::reverse_fixups(&mut speculative_expansion.value, &spec_args_tmap, &fixups.undo_info); + let (node, rev_tmap) = token_tree_to_syntax_node(&speculative_expansion.value, expand_to); + + let range = rev_tmap.first_range_by_token(token_id, token_to_map.kind())?; + let token = node.syntax_node().covering_element(range).into_token()?; + Some((node.syntax_node(), token)) +} + +fn ast_id_map(db: &dyn AstDatabase, file_id: HirFileId) -> Arc<AstIdMap> { + let map = db.parse_or_expand(file_id).map(|it| AstIdMap::from_source(&it)).unwrap_or_default(); + Arc::new(map) +} + +fn parse_or_expand(db: &dyn AstDatabase, file_id: HirFileId) -> Option<SyntaxNode> { + match file_id.0 { + HirFileIdRepr::FileId(file_id) => Some(db.parse(file_id).tree().syntax().clone()), + HirFileIdRepr::MacroFile(macro_file) => { + // FIXME: Note how we convert from `Parse` to `SyntaxNode` here, + // forgetting about parse errors. + db.parse_macro_expansion(macro_file).value.map(|(it, _)| it.syntax_node()) + } + } +} + +fn parse_macro_expansion( + db: &dyn AstDatabase, + macro_file: MacroFile, +) -> ExpandResult<Option<(Parse<SyntaxNode>, Arc<mbe::TokenMap>)>> { + let _p = profile::span("parse_macro_expansion"); + let result = db.macro_expand(macro_file.macro_call_id); + + if let Some(err) = &result.err { + // Note: + // The final goal we would like to make all parse_macro success, + // such that the following log will not call anyway. + let loc: MacroCallLoc = db.lookup_intern_macro_call(macro_file.macro_call_id); + let node = loc.kind.to_node(db); + + // collect parent information for warning log + let parents = + std::iter::successors(loc.kind.file_id().call_node(db), |it| it.file_id.call_node(db)) + .map(|n| format!("{:#}", n.value)) + .collect::<Vec<_>>() + .join("\n"); + + tracing::debug!( + "fail on macro_parse: (reason: {:?} macro_call: {:#}) parents: {}", + err, + node.value, + parents + ); + } + let tt = match result.value { + Some(tt) => tt, + None => return ExpandResult { value: None, err: result.err }, + }; + + let expand_to = macro_expand_to(db, macro_file.macro_call_id); + + tracing::debug!("expanded = {}", tt.as_debug_string()); + tracing::debug!("kind = {:?}", expand_to); + + let (parse, rev_token_map) = token_tree_to_syntax_node(&tt, expand_to); + + ExpandResult { value: Some((parse, Arc::new(rev_token_map))), err: result.err } +} + +fn macro_arg( + db: &dyn AstDatabase, + id: MacroCallId, +) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>> { + let arg = db.macro_arg_text(id)?; + let loc = db.lookup_intern_macro_call(id); + + let node = SyntaxNode::new_root(arg); + let censor = censor_for_macro_input(&loc, &node); + let mut fixups = fixup::fixup_syntax(&node); + fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new()))); + let (mut tt, tmap, _) = mbe::syntax_node_to_token_tree_with_modifications( + &node, + fixups.token_map, + fixups.next_id, + fixups.replace, + fixups.append, + ); + + if loc.def.is_proc_macro() { + // proc macros expect their inputs without parentheses, MBEs expect it with them included + tt.delimiter = None; + } + + Some(Arc::new((tt, tmap, fixups.undo_info))) +} + +fn censor_for_macro_input(loc: &MacroCallLoc, node: &SyntaxNode) -> FxHashSet<SyntaxNode> { + (|| { + let censor = match loc.kind { + MacroCallKind::FnLike { .. } => return None, + MacroCallKind::Derive { derive_attr_index, .. } => { + cov_mark::hit!(derive_censoring); + ast::Item::cast(node.clone())? + .attrs() + .take(derive_attr_index as usize + 1) + // FIXME + .filter(|attr| attr.simple_name().as_deref() == Some("derive")) + .map(|it| it.syntax().clone()) + .collect() + } + MacroCallKind::Attr { is_derive: true, .. } => return None, + MacroCallKind::Attr { invoc_attr_index, .. } => { + cov_mark::hit!(attribute_macro_attr_censoring); + ast::Item::cast(node.clone())? + .doc_comments_and_attrs() + .nth(invoc_attr_index as usize) + .and_then(Either::left) + .map(|attr| attr.syntax().clone()) + .into_iter() + .collect() + } + }; + Some(censor) + })() + .unwrap_or_default() +} + +fn macro_arg_text(db: &dyn AstDatabase, id: MacroCallId) -> Option<GreenNode> { + let loc = db.lookup_intern_macro_call(id); + let arg = loc.kind.arg(db)?; + if matches!(loc.kind, MacroCallKind::FnLike { .. }) { + let first = arg.first_child_or_token().map_or(T![.], |it| it.kind()); + let last = arg.last_child_or_token().map_or(T![.], |it| it.kind()); + let well_formed_tt = + matches!((first, last), (T!['('], T![')']) | (T!['['], T![']']) | (T!['{'], T!['}'])); + if !well_formed_tt { + // Don't expand malformed (unbalanced) macro invocations. This is + // less than ideal, but trying to expand unbalanced macro calls + // sometimes produces pathological, deeply nested code which breaks + // all kinds of things. + // + // Some day, we'll have explicit recursion counters for all + // recursive things, at which point this code might be removed. + cov_mark::hit!(issue9358_bad_macro_stack_overflow); + return None; + } + } + Some(arg.green().into()) +} + +fn macro_def(db: &dyn AstDatabase, id: MacroDefId) -> Result<Arc<TokenExpander>, mbe::ParseError> { + match id.kind { + MacroDefKind::Declarative(ast_id) => { + let (mac, def_site_token_map) = match ast_id.to_node(db) { + ast::Macro::MacroRules(macro_rules) => { + let arg = macro_rules + .token_tree() + .ok_or_else(|| mbe::ParseError::Expected("expected a token tree".into()))?; + let (tt, def_site_token_map) = mbe::syntax_node_to_token_tree(arg.syntax()); + let mac = mbe::DeclarativeMacro::parse_macro_rules(&tt)?; + (mac, def_site_token_map) + } + ast::Macro::MacroDef(macro_def) => { + let arg = macro_def + .body() + .ok_or_else(|| mbe::ParseError::Expected("expected a token tree".into()))?; + let (tt, def_site_token_map) = mbe::syntax_node_to_token_tree(arg.syntax()); + let mac = mbe::DeclarativeMacro::parse_macro2(&tt)?; + (mac, def_site_token_map) + } + }; + Ok(Arc::new(TokenExpander::DeclarativeMacro { mac, def_site_token_map })) + } + MacroDefKind::BuiltIn(expander, _) => Ok(Arc::new(TokenExpander::Builtin(expander))), + MacroDefKind::BuiltInAttr(expander, _) => { + Ok(Arc::new(TokenExpander::BuiltinAttr(expander))) + } + MacroDefKind::BuiltInDerive(expander, _) => { + Ok(Arc::new(TokenExpander::BuiltinDerive(expander))) + } + MacroDefKind::BuiltInEager(..) => { + // FIXME: Return a random error here just to make the types align. + // This obviously should do something real instead. + Err(mbe::ParseError::UnexpectedToken("unexpected eager macro".into())) + } + MacroDefKind::ProcMacro(expander, ..) => Ok(Arc::new(TokenExpander::ProcMacro(expander))), + } +} + +fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Arc<tt::Subtree>>> { + let _p = profile::span("macro_expand"); + let loc: MacroCallLoc = db.lookup_intern_macro_call(id); + if let Some(eager) = &loc.eager { + return ExpandResult { + value: Some(eager.arg_or_expansion.clone()), + // FIXME: There could be errors here! + err: None, + }; + } + + let macro_arg = match db.macro_arg(id) { + Some(it) => it, + None => { + return ExpandResult::only_err(ExpandError::Other( + "Failed to lower macro args to token tree".into(), + )) + } + }; + + let expander = match db.macro_def(loc.def) { + Ok(it) => it, + // FIXME: This is weird -- we effectively report macro *definition* + // errors lazily, when we try to expand the macro. Instead, they should + // be reported at the definition site (when we construct a def map). + Err(err) => { + return ExpandResult::only_err(ExpandError::Other( + format!("invalid macro definition: {}", err).into(), + )) + } + }; + let ExpandResult { value: mut tt, err } = expander.expand(db, id, ¯o_arg.0); + // Set a hard limit for the expanded tt + let count = tt.count(); + if TOKEN_LIMIT.check(count).is_err() { + return ExpandResult::only_err(ExpandError::Other( + format!( + "macro invocation exceeds token limit: produced {} tokens, limit is {}", + count, + TOKEN_LIMIT.inner(), + ) + .into(), + )); + } + + fixup::reverse_fixups(&mut tt, ¯o_arg.1, ¯o_arg.2); + + ExpandResult { value: Some(Arc::new(tt)), err } +} + +fn macro_expand_error(db: &dyn AstDatabase, macro_call: MacroCallId) -> Option<ExpandError> { + db.macro_expand(macro_call).err +} + +fn expand_proc_macro(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<tt::Subtree> { + let loc: MacroCallLoc = db.lookup_intern_macro_call(id); + let macro_arg = match db.macro_arg(id) { + Some(it) => it, + None => { + return ExpandResult::only_err(ExpandError::Other("No arguments for proc-macro".into())) + } + }; + + let expander = match loc.def.kind { + MacroDefKind::ProcMacro(expander, ..) => expander, + _ => unreachable!(), + }; + + let attr_arg = match &loc.kind { + MacroCallKind::Attr { attr_args, .. } => { + let mut attr_args = attr_args.0.clone(); + mbe::Shift::new(¯o_arg.0).shift_all(&mut attr_args); + Some(attr_args) + } + _ => None, + }; + + expander.expand(db, loc.krate, ¯o_arg.0, attr_arg.as_ref()) +} + +fn hygiene_frame(db: &dyn AstDatabase, file_id: HirFileId) -> Arc<HygieneFrame> { + Arc::new(HygieneFrame::new(db, file_id)) +} + +fn macro_expand_to(db: &dyn AstDatabase, id: MacroCallId) -> ExpandTo { + let loc: MacroCallLoc = db.lookup_intern_macro_call(id); + loc.kind.expand_to() +} + +fn token_tree_to_syntax_node( + tt: &tt::Subtree, + expand_to: ExpandTo, +) -> (Parse<SyntaxNode>, mbe::TokenMap) { + let entry_point = match expand_to { + ExpandTo::Statements => mbe::TopEntryPoint::MacroStmts, + ExpandTo::Items => mbe::TopEntryPoint::MacroItems, + ExpandTo::Pattern => mbe::TopEntryPoint::Pattern, + ExpandTo::Type => mbe::TopEntryPoint::Type, + ExpandTo::Expr => mbe::TopEntryPoint::Expr, + }; + mbe::token_tree_to_syntax_node(tt, entry_point) +} |