//! Documentation extraction and source mapping. //! //! This module handles the extraction and processing of doc comments and `#[doc = "..."]` //! attributes, including macro expansion for `#[doc = macro!()]` patterns. //! It builds a concatenated string of the full docs as well as a source map //! to map it back to AST (which is needed for things like resolving links in doc comments //! and highlight injection). use std::{ convert::Infallible, ops::{ControlFlow, Range}, }; use base_db::Crate; use cfg::CfgOptions; use either::Either; use hir_expand::{ AstId, ExpandTo, HirFileId, InFile, attrs::{AstPathExt, expand_cfg_attr_with_doc_comments}, mod_path::ModPath, span_map::SpanMap, }; use span::AstIdMap; use syntax::{ AstNode, AstToken, SyntaxNode, ast::{self, AttrDocCommentIter, IsString}, }; use tt::{TextRange, TextSize}; use crate::{db::DefDatabase, macro_call_as_call_id, nameres::MacroSubNs, resolver::Resolver}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(crate) struct DocsSourceMapLine { /// The offset in [`Docs::docs`]. string_offset: TextSize, /// The offset in the AST of the text. `None` for macro-expanded doc strings /// where we cannot provide a faithful source mapping. ast_offset: Option, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Docs { /// The concatenated string of all `#[doc = "..."]` attributes and documentation comments. docs: String, /// A sorted map from an offset in `docs` to an offset in the source code. docs_source_map: Vec, /// If the item is an outlined module (`mod foo;`), `docs_source_map` store the concatenated /// list of the outline and inline docs (outline first). Then, this field contains the [`HirFileId`] /// of the outline declaration, and the index in `docs` from which the inline docs /// begin. outline_mod: Option<(HirFileId, usize)>, inline_file: HirFileId, /// The size the prepended prefix, which does not map to real doc comments. prefix_len: TextSize, /// The offset in `docs` from which the docs are inner attributes/comments. inline_inner_docs_start: Option, /// Like `inline_inner_docs_start`, but for `outline_mod`. This can happen only when merging `Docs` /// (as outline modules don't have inner attributes). outline_inner_docs_start: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum IsInnerDoc { No, Yes, } impl IsInnerDoc { #[inline] pub fn yes(self) -> bool { self == IsInnerDoc::Yes } } impl Docs { #[inline] pub fn docs(&self) -> &str { &self.docs } #[inline] pub fn into_docs(self) -> String { self.docs } pub fn find_ast_range( &self, mut string_range: TextRange, ) -> Option<(InFile, IsInnerDoc)> { if string_range.start() < self.prefix_len { return None; } string_range -= self.prefix_len; let mut file = self.inline_file; let mut inner_docs_start = self.inline_inner_docs_start; // Check whether the range is from the outline, the inline, or both. let source_map = if let Some((outline_mod_file, outline_mod_end)) = self.outline_mod { if let Some(first_inline) = self.docs_source_map.get(outline_mod_end) { if string_range.end() <= first_inline.string_offset { // The range is completely in the outline. file = outline_mod_file; inner_docs_start = self.outline_inner_docs_start; &self.docs_source_map[..outline_mod_end] } else if string_range.start() >= first_inline.string_offset { // The range is completely in the inline. &self.docs_source_map[outline_mod_end..] } else { // The range is combined from the outline and the inline - cannot map it back. return None; } } else { // There is no inline. file = outline_mod_file; inner_docs_start = self.outline_inner_docs_start; &self.docs_source_map } } else { // There is no outline. &self.docs_source_map }; let after_range = source_map.partition_point(|line| line.string_offset <= string_range.start()) - 1; let after_range = &source_map[after_range..]; let line = after_range.first()?; // Unmapped lines (from macro-expanded docs) cannot be mapped back to AST. let ast_offset = line.ast_offset?; if after_range.get(1).is_some_and(|next_line| next_line.string_offset < string_range.end()) { // The range is combined from two lines - cannot map it back. return None; } let ast_range = string_range - line.string_offset + ast_offset; let is_inner = if inner_docs_start .is_some_and(|inner_docs_start| string_range.start() >= inner_docs_start) { IsInnerDoc::Yes } else { IsInnerDoc::No }; Some((InFile::new(file, ast_range), is_inner)) } #[inline] pub fn shift_by(&mut self, offset: TextSize) { self.prefix_len += offset; } pub fn prepend_str(&mut self, s: &str) { self.prefix_len += TextSize::of(s); self.docs.insert_str(0, s); } pub fn append_str(&mut self, s: &str) { self.docs.push_str(s); } pub fn append(&mut self, other: &Docs) { let other_offset = TextSize::of(&self.docs); assert!( self.outline_mod.is_none() && other.outline_mod.is_none(), "cannot merge `Docs` that have `outline_mod` set" ); self.outline_mod = Some((self.inline_file, self.docs_source_map.len())); self.inline_file = other.inline_file; self.outline_inner_docs_start = self.inline_inner_docs_start; self.inline_inner_docs_start = other.inline_inner_docs_start.map(|it| it + other_offset); self.docs.push_str(&other.docs); self.docs_source_map.extend(other.docs_source_map.iter().map( |&DocsSourceMapLine { string_offset, ast_offset }| DocsSourceMapLine { ast_offset, string_offset: string_offset + other_offset, }, )); } fn extend_with_doc_comment(&mut self, comment: ast::Comment, indent: &mut usize) { let Some((doc, offset)) = comment.doc_comment() else { return }; self.extend_with_doc_str(doc, comment.syntax().text_range().start() + offset, indent); } fn extend_with_doc_attr(&mut self, value: ast::String, indent: &mut usize) { let Some(value_offset) = value.text_range_between_quotes() else { return }; let value_offset = value_offset.start(); let Ok(value) = value.value() else { return }; // FIXME: Handle source maps for escaped text. self.extend_with_doc_str(&value, value_offset, indent); } pub(crate) fn extend_with_doc_str( &mut self, doc: &str, offset_in_ast: TextSize, indent: &mut usize, ) { self.push_doc_lines(doc, Some(offset_in_ast), indent); } fn extend_with_unmapped_doc_str(&mut self, doc: &str, indent: &mut usize) { self.push_doc_lines(doc, None, indent); } fn push_doc_lines(&mut self, doc: &str, mut ast_offset: Option, indent: &mut usize) { for line in doc.split('\n') { self.docs_source_map .push(DocsSourceMapLine { string_offset: TextSize::of(&self.docs), ast_offset }); if let Some(ref mut offset) = ast_offset { *offset += TextSize::of(line) + TextSize::of("\n"); } let line = line.trim_end(); if let Some(line_indent) = line.chars().position(|ch| !ch.is_whitespace()) { // Empty lines are handled because `position()` returns `None` for them. *indent = std::cmp::min(*indent, line_indent); } self.docs.push_str(line); self.docs.push('\n'); } } fn remove_indent(&mut self, indent: usize, start_source_map_index: usize) { /// In case of panics, we want to avoid corrupted UTF-8 in `self.docs`, so we clear it. struct Guard<'a>(&'a mut Docs); impl Drop for Guard<'_> { fn drop(&mut self) { let Docs { docs, docs_source_map, outline_mod, inline_file: _, prefix_len: _, inline_inner_docs_start: _, outline_inner_docs_start: _, } = self.0; // Don't use `String::clear()` here because it's not guaranteed to not do UTF-8-dependent things, // and we may have temporarily broken the string's encoding. unsafe { docs.as_mut_vec() }.clear(); // This is just to avoid panics down the road. docs_source_map.clear(); *outline_mod = None; } } if self.docs.is_empty() { return; } let guard = Guard(self); let source_map = &mut guard.0.docs_source_map[start_source_map_index..]; let Some(&DocsSourceMapLine { string_offset: mut copy_into, .. }) = source_map.first() else { return; }; // We basically want to remove multiple ranges from a string. Doing this efficiently (without O(N^2) // or allocations) requires unsafe. Basically, for each line, we copy the line minus the indent into // consecutive to the previous line (which may have moved). Then at the end we truncate. let mut accumulated_offset = TextSize::new(0); for idx in 0..source_map.len() { let string_end_offset = source_map .get(idx + 1) .map_or_else(|| TextSize::of(&guard.0.docs), |next_attr| next_attr.string_offset); let line_source = &mut source_map[idx]; let line_docs = &guard.0.docs[TextRange::new(line_source.string_offset, string_end_offset)]; let line_docs_len = TextSize::of(line_docs); let indent_size = line_docs.char_indices().nth(indent).map_or_else( || TextSize::of(line_docs) - TextSize::of("\n"), |(offset, _)| TextSize::new(offset as u32), ); unsafe { guard.0.docs.as_bytes_mut() }.copy_within( Range::::from(TextRange::new( line_source.string_offset + indent_size, string_end_offset, )), copy_into.into(), ); copy_into += line_docs_len - indent_size; if let Some(inner_attrs_start) = &mut guard.0.inline_inner_docs_start && *inner_attrs_start == line_source.string_offset { *inner_attrs_start -= accumulated_offset; } // The removals in the string accumulate, but in the AST not, because it already points // to the beginning of each attribute. // Also, we need to shift the AST offset of every line, but the string offset of the first // line should not get shifted (in general, the shift for the string offset is by the // number of lines until the current one, excluding the current one). line_source.string_offset -= accumulated_offset; if let Some(ref mut ast_offset) = line_source.ast_offset { *ast_offset += indent_size; } accumulated_offset += indent_size; } // Don't use `String::truncate()` here because it's not guaranteed to not do UTF-8-dependent things, // and we may have temporarily broken the string's encoding. unsafe { guard.0.docs.as_mut_vec() }.truncate(copy_into.into()); std::mem::forget(guard); } fn remove_last_newline(&mut self) { self.docs.truncate(self.docs.len().saturating_sub(1)); } fn shrink_to_fit(&mut self) { let Docs { docs, docs_source_map, outline_mod: _, inline_file: _, prefix_len: _, inline_inner_docs_start: _, outline_inner_docs_start: _, } = self; docs.shrink_to_fit(); docs_source_map.shrink_to_fit(); } } struct DocMacroExpander<'db> { db: &'db dyn DefDatabase, krate: Crate, recursion_depth: usize, recursion_limit: usize, } struct DocExprSourceCtx<'db> { resolver: Resolver<'db>, file_id: HirFileId, ast_id_map: &'db AstIdMap, span_map: SpanMap<'db>, } fn expand_doc_expr_via_macro_pipeline<'db>( expander: &mut DocMacroExpander<'db>, source_ctx: &DocExprSourceCtx<'db>, expr: ast::Expr, ) -> Option { match expr { ast::Expr::ParenExpr(paren_expr) => { expand_doc_expr_via_macro_pipeline(expander, source_ctx, paren_expr.expr()?) } ast::Expr::Literal(literal) => match literal.kind() { ast::LiteralKind::String(string) => string.value().ok().map(Into::into), _ => None, }, ast::Expr::MacroExpr(macro_expr) => { let macro_call = macro_expr.macro_call()?; let (expr, new_source_ctx) = expand_doc_macro_call(expander, source_ctx, macro_call)?; // After expansion, the expr lives in the expansion file; use its source context. expand_doc_expr_via_macro_pipeline(expander, &new_source_ctx, expr) } _ => None, } } fn expand_doc_macro_call<'db>( expander: &mut DocMacroExpander<'db>, source_ctx: &DocExprSourceCtx<'db>, macro_call: ast::MacroCall, ) -> Option<(ast::Expr, DocExprSourceCtx<'db>)> { if expander.recursion_depth >= expander.recursion_limit { return None; } let path = macro_call.path()?; let mod_path = ModPath::from_src(expander.db, path, &mut |range| { source_ctx.span_map.span_for_range(range).ctx })?; let call_site = source_ctx.span_map.span_for_range(macro_call.syntax().text_range()); let ast_id = AstId::new(source_ctx.file_id, source_ctx.ast_id_map.ast_id(¯o_call)); let call_id = macro_call_as_call_id( expander.db, ast_id, &mod_path, call_site.ctx, ExpandTo::Expr, expander.krate, |path| { source_ctx.resolver.resolve_path_as_macro_def(expander.db, path, Some(MacroSubNs::Bang)) }, &mut |_, _| (), ) .ok()? .value?; expander.recursion_depth += 1; let parse = expander.db.parse_macro_expansion(call_id).value.0.clone(); let expr = parse.cast::().map(|parse| parse.tree())?; expander.recursion_depth -= 1; // Build a new source context for the expansion file so that any further // recursive expansion (e.g. a user macro expanding to `concat!(...)`) // correctly resolves AstIds and spans in the expansion. let expansion_file_id: HirFileId = call_id.into(); let new_source_ctx = DocExprSourceCtx { resolver: source_ctx.resolver.clone(), file_id: expansion_file_id, ast_id_map: expander.db.ast_id_map(expansion_file_id), span_map: expander.db.span_map(expansion_file_id), }; Some((expr, new_source_ctx)) } fn extend_with_attrs<'a, 'db>( result: &mut Docs, db: &'db dyn DefDatabase, krate: Crate, node: &SyntaxNode, file_id: HirFileId, expect_inner_attrs: bool, indent: &mut usize, get_cfg_options: &dyn Fn() -> &'a CfgOptions, cfg_options: &mut Option<&'a CfgOptions>, make_resolver: &dyn Fn() -> Resolver<'db>, ) { // Lazily initialised when we first encounter a `#[doc = macro!()]`. let mut expander: Option<(DocMacroExpander<'db>, DocExprSourceCtx<'db>)> = None; expand_cfg_attr_with_doc_comments::<_, Infallible>( AttrDocCommentIter::from_syntax_node(node).filter(|attr| match attr { Either::Left(attr) => attr.kind().is_inner() == expect_inner_attrs, Either::Right(comment) => comment .kind() .doc .is_some_and(|kind| (kind == ast::CommentPlacement::Inner) == expect_inner_attrs), }), || *cfg_options.get_or_insert_with(get_cfg_options), |attr| { match attr { Either::Right(doc_comment) => result.extend_with_doc_comment(doc_comment, indent), Either::Left((attr, _)) => match attr { ast::Meta::KeyValueMeta(attr) if attr.path().is1("doc") => { if let Some(value) = attr.expr() { if let ast::Expr::Literal(value) = &value && let ast::LiteralKind::String(value) = value.kind() { result.extend_with_doc_attr(value, indent); } else { let (exp, ctx) = expander.get_or_insert_with(|| { let resolver = make_resolver(); let def_map = resolver.top_level_def_map(); let recursion_limit = def_map.recursion_limit() as usize; ( DocMacroExpander { db, krate, recursion_depth: 0, recursion_limit, }, DocExprSourceCtx { resolver, file_id, ast_id_map: db.ast_id_map(file_id), span_map: db.span_map(file_id), }, ) }); if let Some(expanded) = expand_doc_expr_via_macro_pipeline(exp, ctx, value) { result.extend_with_unmapped_doc_str(&expanded, indent); } } } } _ => {} }, } ControlFlow::Continue(()) }, ); } pub(crate) fn extract_docs<'a, 'db>( db: &'db dyn DefDatabase, krate: Crate, resolver: &dyn Fn() -> Resolver<'db>, get_cfg_options: &dyn Fn() -> &'a CfgOptions, source: InFile, outer_mod_decl: Option>, inner_attrs_node: Option, ) -> Option> { let mut result = Docs { docs: String::new(), docs_source_map: Vec::new(), outline_mod: None, inline_file: source.file_id, prefix_len: TextSize::new(0), inline_inner_docs_start: None, outline_inner_docs_start: None, }; let mut cfg_options = None; if let Some(outer_mod_decl) = outer_mod_decl { let mut indent = usize::MAX; // For outer docs (the `mod foo;` declaration), use the module's own resolver. extend_with_attrs( &mut result, db, krate, outer_mod_decl.value.syntax(), outer_mod_decl.file_id, false, &mut indent, get_cfg_options, &mut cfg_options, resolver, ); result.remove_indent(indent, 0); result.outline_mod = Some((outer_mod_decl.file_id, result.docs_source_map.len())); } let inline_source_map_start = result.docs_source_map.len(); let mut indent = usize::MAX; // For inline docs, use the item's own resolver. extend_with_attrs( &mut result, db, krate, source.value.syntax(), source.file_id, false, &mut indent, get_cfg_options, &mut cfg_options, resolver, ); if let Some(inner_attrs_node) = &inner_attrs_node { result.inline_inner_docs_start = Some(TextSize::of(&result.docs)); extend_with_attrs( &mut result, db, krate, inner_attrs_node, source.file_id, true, &mut indent, get_cfg_options, &mut cfg_options, resolver, ); } result.remove_indent(indent, inline_source_map_start); result.remove_last_newline(); result.shrink_to_fit(); if result.docs.is_empty() { None } else { Some(Box::new(result)) } } #[cfg(test)] mod tests { use expect_test::expect; use hir_expand::InFile; use test_fixture::WithFixture; use tt::{TextRange, TextSize}; use crate::test_db::TestDB; use super::{Docs, IsInnerDoc}; #[test] fn docs() { let (_db, file_id) = TestDB::with_single_file(""); let mut docs = Docs { docs: String::new(), docs_source_map: Vec::new(), outline_mod: None, inline_file: file_id.into(), prefix_len: TextSize::new(0), inline_inner_docs_start: None, outline_inner_docs_start: None, }; let mut indent = usize::MAX; let outer = " foo\n\tbar baz"; let mut ast_offset = TextSize::new(123); for line in outer.split('\n') { docs.extend_with_doc_str(line, ast_offset, &mut indent); ast_offset += TextSize::of(line) + TextSize::of("\n"); } docs.inline_inner_docs_start = Some(TextSize::of(&docs.docs)); ast_offset += TextSize::new(123); let inner = " bar \n baz"; for line in inner.split('\n') { docs.extend_with_doc_str(line, ast_offset, &mut indent); ast_offset += TextSize::of(line) + TextSize::of("\n"); } assert_eq!(indent, 1); expect![[r#" [ DocsSourceMapLine { string_offset: 0, ast_offset: Some( 123, ), }, DocsSourceMapLine { string_offset: 5, ast_offset: Some( 128, ), }, DocsSourceMapLine { string_offset: 15, ast_offset: Some( 261, ), }, DocsSourceMapLine { string_offset: 20, ast_offset: Some( 267, ), }, ] "#]] .assert_debug_eq(&docs.docs_source_map); docs.remove_indent(indent, 0); assert_eq!(docs.inline_inner_docs_start, Some(TextSize::new(13))); assert_eq!(docs.docs, "foo\nbar baz\nbar\nbaz\n"); expect![[r#" [ DocsSourceMapLine { string_offset: 0, ast_offset: Some( 124, ), }, DocsSourceMapLine { string_offset: 4, ast_offset: Some( 129, ), }, DocsSourceMapLine { string_offset: 13, ast_offset: Some( 262, ), }, DocsSourceMapLine { string_offset: 17, ast_offset: Some( 268, ), }, ] "#]] .assert_debug_eq(&docs.docs_source_map); docs.append(&docs.clone()); docs.prepend_str("prefix---"); assert_eq!(docs.docs, "prefix---foo\nbar baz\nbar\nbaz\nfoo\nbar baz\nbar\nbaz\n"); expect![[r#" [ DocsSourceMapLine { string_offset: 0, ast_offset: Some( 124, ), }, DocsSourceMapLine { string_offset: 4, ast_offset: Some( 129, ), }, DocsSourceMapLine { string_offset: 13, ast_offset: Some( 262, ), }, DocsSourceMapLine { string_offset: 17, ast_offset: Some( 268, ), }, DocsSourceMapLine { string_offset: 21, ast_offset: Some( 124, ), }, DocsSourceMapLine { string_offset: 25, ast_offset: Some( 129, ), }, DocsSourceMapLine { string_offset: 34, ast_offset: Some( 262, ), }, DocsSourceMapLine { string_offset: 38, ast_offset: Some( 268, ), }, ] "#]] .assert_debug_eq(&docs.docs_source_map); let range = |start, end| TextRange::new(TextSize::new(start), TextSize::new(end)); let in_file = |range| InFile::new(file_id.into(), range); assert_eq!(docs.find_ast_range(range(0, 2)), None); assert_eq!(docs.find_ast_range(range(8, 10)), None); assert_eq!( docs.find_ast_range(range(9, 10)), Some((in_file(range(124, 125)), IsInnerDoc::No)) ); assert_eq!(docs.find_ast_range(range(20, 23)), None); assert_eq!( docs.find_ast_range(range(23, 25)), Some((in_file(range(263, 265)), IsInnerDoc::Yes)) ); } }