Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/syntax.rs')
| -rw-r--r-- | helix-core/src/syntax.rs | 2582 |
1 files changed, 622 insertions, 1960 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index dfc32342..e232ee69 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,323 +1,206 @@ pub mod config; -mod tree_cursor; - -use crate::{ - chars::char_is_line_ending, - regex::Regex, - transaction::{ChangeSet, Operation}, - RopeSlice, Tendril, -}; - -use ahash::RandomState; -use arc_swap::{ArcSwap, Guard}; -use bitflags::bitflags; -use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; -use hashbrown::raw::RawTable; -use helix_stdx::rope::{self, RopeSliceExt}; -use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, - cell::RefCell, - collections::{HashMap, VecDeque}, - fmt::{self, Write}, - hash::{Hash, Hasher}, - mem::replace, + collections::HashMap, + fmt, iter, + ops::{self, RangeBounds}, path::Path, sync::Arc, + time::Duration, }; -use once_cell::sync::Lazy; - -use helix_loader::grammar::{get_language, load_runtime_file}; +use anyhow::{Context, Result}; +use arc_swap::{ArcSwap, Guard}; +use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; +use helix_loader::grammar::get_language; +use helix_stdx::rope::RopeSliceExt as _; +use once_cell::sync::OnceCell; +use ropey::RopeSlice; +use tree_house::{ + highlighter, + query_iter::QueryIter, + tree_sitter::{Grammar, InactiveQueryCursor, InputEdit, Node, Query, RopeInput, Tree}, + Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, +}; -pub use tree_cursor::TreeCursor; +use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language}; -#[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, -} +pub use tree_house::{ + highlighter::{Highlight, HighlightEvent}, + Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT, +}; #[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec<Node<'a>>), +pub struct LanguageData { + config: Arc<LanguageConfiguration>, + syntax: OnceCell<Option<SyntaxConfig>>, + indent_query: OnceCell<Option<IndentQuery>>, + textobject_query: OnceCell<Option<TextObjectQuery>>, } -impl CapturedNode<'_> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte(), - Self::Grouped(ns) => ns[0].start_byte(), - } - } - - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte(), - Self::Grouped(ns) => ns.last().unwrap().end_byte(), +impl LanguageData { + fn new(config: LanguageConfiguration) -> Self { + Self { + config: Arc::new(config), + syntax: OnceCell::new(), + indent_query: OnceCell::new(), + textobject_query: OnceCell::new(), } } - pub fn byte_range(&self) -> std::ops::Range<usize> { - self.start_byte()..self.end_byte() - } -} - -/// The maximum number of in-progress matches a TS cursor can consider at once. -/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. -/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. -/// -/// -/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). -/// However, this causes performance issues for medium to large files. -/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). -/// -/// -/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream -/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>). -/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. -/// -/// -/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>). -/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. -/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. -const TREE_SITTER_MATCH_LIMIT: u32 = 256; - -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) + pub fn config(&self) -> &Arc<LanguageConfiguration> { + &self.config } - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; - - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + /// Loads the grammar and compiles the highlights, injections and locals for the language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_syntax_config( + config: &LanguageConfiguration, + loader: &Loader, + ) -> Result<Option<SyntaxConfig>> { + let name = &config.language_id; + let parser_name = config.grammar.as_deref().unwrap_or(name); + let Some(grammar) = get_language(parser_name)? else { + log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist"); + return Ok(None); + }; + let highlight_query_text = read_query(name, "highlights.scm"); + let injection_query_text = read_query(name, "injections.scm"); + let local_query_text = read_query(name, "locals.scm"); + let config = SyntaxConfig::new( + grammar, + &highlight_query_text, + &injection_query_text, + &local_query_text, + ) + .with_context(|| format!("Failed to compile highlights for '{name}'"))?; - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); + reconfigure_highlights(&config, &loader.scopes()); - Some(nodes) + Ok(Some(config)) } -} - -pub fn read_query(language: &str, filename: &str) -> String { - static INHERITS_REGEX: Lazy<Regex> = - Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); - - let query = load_runtime_file(language, filename).unwrap_or_default(); - - // replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s) - INHERITS_REGEX - .replace_all(&query, |captures: ®ex::Captures| { - captures[1] - .split(',') - .fold(String::new(), |mut output, language| { - // `write!` to a String cannot fail. - write!(output, "\n{}\n", read_query(language, filename)).unwrap(); - output - }) - }) - .to_string() -} - -impl config::LanguageConfiguration { - fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { - let highlights_query = read_query(&self.language_id, "highlights.scm"); - // always highlight syntax errors - // highlights_query += "\n(ERROR) @error"; - let injections_query = read_query(&self.language_id, "injections.scm"); - let locals_query = read_query(&self.language_id, "locals.scm"); - - if highlights_query.is_empty() { - None - } else { - let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) - .map_err(|err| { - log::error!( - "Failed to load tree-sitter parser for language {:?}: {:#}", - self.language_id, - err - ) - }) - .ok()?; - let config = HighlightConfiguration::new( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) - .ok()?; - - config.configure(scopes); - Some(Arc::new(config)) - } + fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> { + self.syntax + .get_or_init(|| { + Self::compile_syntax_config(&self.config, loader) + .map_err(|err| { + log::error!("{err:#}"); + }) + .ok() + .flatten() + }) + .as_ref() } - pub fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.highlight_config.get() { - config.configure(scopes); + /// Compiles the indents.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_indent_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result<Option<IndentQuery>> { + let name = &config.language_id; + let text = read_query(name, "indents.scm"); + if text.is_empty() { + return Ok(None); } + let indent_query = IndentQuery::new(grammar, &text) + .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?; + Ok(Some(indent_query)) } - pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { - self.highlight_config - .get_or_init(|| self.initialize_highlight(scopes)) - .clone() - } - - pub fn is_highlight_initialized(&self) -> bool { - self.highlight_config.get().is_some() - } - - pub fn indent_query(&self) -> Option<&Query> { + fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> { self.indent_query - .get_or_init(|| self.load_query("indents.scm")) - .as_ref() - } - - pub fn textobject_query(&self) -> Option<&TextObjectQuery> { - self.textobject_query .get_or_init(|| { - self.load_query("textobjects.scm") - .map(|query| TextObjectQuery { query }) + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_indent_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() }) .as_ref() } - pub fn scope(&self) -> &str { - &self.scope - } - - fn load_query(&self, kind: &str) -> Option<Query> { - let query_text = read_query(&self.language_id, kind); - if query_text.is_empty() { - return None; + /// Compiles the textobjects.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_textobject_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result<Option<TextObjectQuery>> { + let name = &config.language_id; + let text = read_query(name, "textobjects.scm"); + if text.is_empty() { + return Ok(None); } - let lang = &self.highlight_config.get()?.as_ref()?.language; - Query::new(lang, &query_text) - .map_err(|e| { - log::error!( - "Failed to parse {} queries for {}: {}", - kind, - self.language_id, - e - ) - }) - .ok() + let query = Query::new(grammar, &text, |_, _| Ok(())) + .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?; + Ok(Some(TextObjectQuery::new(query))) } -} - -#[derive(Debug)] -struct FileTypeGlob { - glob: globset::Glob, - language_id: usize, -} -impl FileTypeGlob { - fn new(glob: globset::Glob, language_id: usize) -> Self { - Self { glob, language_id } + fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_textobject_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() } -} -#[derive(Debug)] -struct FileTypeGlobMatcher { - matcher: globset::GlobSet, - file_types: Vec<FileTypeGlob>, -} - -impl Default for FileTypeGlobMatcher { - fn default() -> Self { - Self { - matcher: globset::GlobSet::empty(), - file_types: Default::default(), + fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.syntax.get() { + reconfigure_highlights(config, scopes); } } } -impl FileTypeGlobMatcher { - fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> { - let mut builder = globset::GlobSetBuilder::new(); - for file_type in &file_types { - builder.add(file_type.glob.clone()); +fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) { + config.configure(move |capture_name| { + let capture_parts: Vec<_> = capture_name.split('.').collect(); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for (i, part) in recognized_name.split('.').enumerate() { + match capture_parts.get(i) { + Some(capture_part) if *capture_part == part => len += 1, + _ => { + matches = false; + break; + } + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } } - - Ok(Self { - matcher: builder.build()?, - file_types, - }) - } - - fn language_id_for_path(&self, path: &Path) -> Option<&usize> { - self.matcher - .matches(path) - .iter() - .filter_map(|idx| self.file_types.get(*idx)) - .max_by_key(|file_type| file_type.glob.glob().len()) - .map(|file_type| &file_type.language_id) - } + best_index.map(|idx| Highlight::new(idx as u32)) + }); } -// Expose loader as Lazy<> global since it's always static? +pub fn read_query(lang: &str, query_filename: &str) -> String { + tree_house::read_query(lang, |language| { + helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default() + }) +} #[derive(Debug, Default)] pub struct Loader { - // highlight_names ? - language_configs: Vec<Arc<LanguageConfiguration>>, - language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize> - language_config_ids_glob_matcher: FileTypeGlobMatcher, - language_config_ids_by_shebang: HashMap<String, usize>, - + languages: Vec<LanguageData>, + languages_by_extension: HashMap<String, Language>, + languages_by_shebang: HashMap<String, Language>, + languages_glob_matcher: FileTypeGlobMatcher, language_server_configs: HashMap<String, LanguageServerConfiguration>, - scopes: ArcSwap<Vec<String>>, } @@ -325,96 +208,72 @@ pub type LoaderError = globset::Error; impl Loader { pub fn new(config: Configuration) -> Result<Self, LoaderError> { - let mut language_configs = Vec::new(); - let mut language_config_ids_by_extension = HashMap::new(); - let mut language_config_ids_by_shebang = HashMap::new(); + let mut languages = Vec::with_capacity(config.language.len()); + let mut languages_by_extension = HashMap::new(); + let mut languages_by_shebang = HashMap::new(); let mut file_type_globs = Vec::new(); - for config in config.language { - // get the next id - let language_id = language_configs.len(); + for mut config in config.language { + let language = Language(languages.len() as u32); + config.language = Some(language); for file_type in &config.file_types { - // entry().or_insert(Vec::new).push(language_id); match file_type { FileType::Extension(extension) => { - language_config_ids_by_extension.insert(extension.clone(), language_id); + languages_by_extension.insert(extension.clone(), language); } FileType::Glob(glob) => { - file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id)); + file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language)); } }; } for shebang in &config.shebangs { - language_config_ids_by_shebang.insert(shebang.clone(), language_id); + languages_by_shebang.insert(shebang.clone(), language); } - language_configs.push(Arc::new(config)); + languages.push(LanguageData::new(config)); } Ok(Self { - language_configs, - language_config_ids_by_extension, - language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, - language_config_ids_by_shebang, + languages, + languages_by_extension, + languages_by_shebang, + languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, language_server_configs: config.language_server, scopes: ArcSwap::from_pointee(Vec::new()), }) } - pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> { - // Find all the language configurations that match this file name - // or a suffix of the file name. - let configuration_id = self - .language_config_ids_glob_matcher - .language_id_for_path(path) - .or_else(|| { - path.extension() - .and_then(|extension| extension.to_str()) - .and_then(|extension| self.language_config_ids_by_extension.get(extension)) - }); - - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - - // TODO: content_regex handling conflict resolution + pub fn languages(&self) -> impl ExactSizeIterator<Item = (Language, &LanguageData)> { + self.languages + .iter() + .enumerate() + .map(|(idx, data)| (Language(idx as u32), data)) } - pub fn language_config_for_shebang( - &self, - source: RopeSlice, - ) -> Option<Arc<LanguageConfiguration>> { - let line = Cow::from(source.line(0)); - static SHEBANG_REGEX: Lazy<Regex> = - Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap()); - let configuration_id = SHEBANG_REGEX - .captures(&line) - .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); + pub fn language_configs(&self) -> impl ExactSizeIterator<Item = &LanguageConfiguration> { + self.languages.iter().map(|language| &*language.config) + } - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) + pub fn language(&self, lang: Language) -> &LanguageData { + &self.languages[lang.idx()] } - pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> { - self.language_configs - .iter() - .find(|config| config.scope == scope) - .cloned() + pub fn language_for_name(&self, name: impl PartialEq<String>) -> Option<Language> { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (name == config.config.language_id).then_some(Language(idx as u32)) + }) } - pub fn language_config_for_language_id( - &self, - id: impl PartialEq<String>, - ) -> Option<Arc<LanguageConfiguration>> { - self.language_configs - .iter() - .find(|config| id.eq(&config.language_id)) - .cloned() + pub fn language_for_scope(&self, scope: &str) -> Option<Language> { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (scope == config.config.scope).then_some(Language(idx as u32)) + }) } - /// Unlike `language_config_for_language_id`, which only returns Some for an exact id, this - /// function will perform a regex match on the given string to find the closest language match. - pub fn language_config_for_name(&self, slice: RopeSlice) -> Option<Arc<LanguageConfiguration>> { + pub fn language_for_match(&self, text: RopeSlice) -> Option<Language> { // PERF: If the name matches up with the id, then this saves the need to do expensive regex. - let shortcircuit = self.language_config_for_language_id(slice); + let shortcircuit = self.language_for_name(text); if shortcircuit.is_some() { return shortcircuit; } @@ -423,129 +282,145 @@ impl Loader { let mut best_match_length = 0; let mut best_match_position = None; - for (i, configuration) in self.language_configs.iter().enumerate() { - if let Some(injection_regex) = &configuration.injection_regex { - if let Some(mat) = injection_regex.find(slice.regex_input()) { + for (idx, data) in self.languages.iter().enumerate() { + if let Some(injection_regex) = &data.config.injection_regex { + if let Some(mat) = injection_regex.find(text.regex_input()) { let length = mat.end() - mat.start(); if length > best_match_length { - best_match_position = Some(i); + best_match_position = Some(idx); best_match_length = length; } } } } - best_match_position.map(|i| self.language_configs[i].clone()) + best_match_position.map(|i| Language(i as u32)) } - pub fn language_configuration_for_injection_string( - &self, - capture: &InjectionLanguageMarker, - ) -> Option<Arc<LanguageConfiguration>> { - match capture { - InjectionLanguageMarker::LanguageId(id) => self.language_config_for_language_id(*id), - InjectionLanguageMarker::Name(name) => self.language_config_for_name(*name), - InjectionLanguageMarker::Filename(file) => { - let path_str: Cow<str> = (*file).into(); - self.language_config_for_file_name(Path::new(path_str.as_ref())) - } - InjectionLanguageMarker::Shebang(shebang) => { - let shebang_str: Cow<str> = (*shebang).into(); - self.language_config_ids_by_shebang - .get(shebang_str.as_ref()) - .and_then(|&id| self.language_configs.get(id).cloned()) - } - } + pub fn language_for_filename(&self, path: &Path) -> Option<Language> { + // Find all the language configurations that match this file name + // or a suffix of the file name. + + // TODO: content_regex handling conflict resolution + self.languages_glob_matcher + .language_for_path(path) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.languages_by_extension.get(extension).copied()) + }) + } + + pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> { + let shebang: Cow<str> = text.into(); + self.languages_by_shebang.get(shebang.as_ref()).copied() + } + + pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> { + self.language(lang).indent_query(self) } - pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> { - self.language_configs.iter() + pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> { + self.language(lang).textobject_query(self) } pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> { &self.language_server_configs } + pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { + self.scopes.load() + } + pub fn set_scopes(&self, scopes: Vec<String>) { self.scopes.store(Arc::new(scopes)); // Reconfigure existing grammars - for config in self - .language_configs - .iter() - .filter(|cfg| cfg.is_highlight_initialized()) - { - config.reconfigure(&self.scopes()); + for data in &self.languages { + data.reconfigure(&self.scopes()); } } +} - pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { - self.scopes.load() +impl LanguageLoader for Loader { + fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<Language> { + match marker { + InjectionLanguageMarker::Name(name) => self.language_for_name(name), + InjectionLanguageMarker::Match(text) => self.language_for_match(text), + InjectionLanguageMarker::Filename(text) => { + let path: Cow<str> = text.into(); + self.language_for_filename(Path::new(path.as_ref())) + } + InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), + } + } + + fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> { + self.languages[lang.idx()].syntax_config(self) } } -pub struct TsParser { - parser: tree_sitter::Parser, - pub cursors: Vec<QueryCursor>, +#[derive(Debug)] +struct FileTypeGlob { + glob: globset::Glob, + language: Language, } -// could also just use a pool, or a single instance? -thread_local! { - pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) +impl FileTypeGlob { + pub fn new(glob: globset::Glob, language: Language) -> Self { + Self { glob, language } + } } #[derive(Debug)] -pub struct Syntax { - layers: HopSlotMap<LayerId, LanguageLayer>, - root: LayerId, - loader: Arc<ArcSwap<Loader>>, +struct FileTypeGlobMatcher { + matcher: globset::GlobSet, + file_types: Vec<FileTypeGlob>, } -fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> { - Cow::from(source.byte_slice(range)) +impl Default for FileTypeGlobMatcher { + fn default() -> Self { + Self { + matcher: globset::GlobSet::empty(), + file_types: Default::default(), + } + } } -impl Syntax { - pub fn new( - source: RopeSlice, - config: Arc<HighlightConfiguration>, - loader: Arc<ArcSwap<Loader>>, - ) -> Option<Self> { - let root_layer = LanguageLayer { - tree: None, - config, - depth: 0, - flags: LayerUpdateFlags::empty(), - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - parent: None, - }; +impl FileTypeGlobMatcher { + fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> { + let mut builder = globset::GlobSetBuilder::new(); + for file_type in &file_types { + builder.add(file_type.glob.clone()); + } - // track scope_descriptor: a Vec of scopes for item in tree + Ok(Self { + matcher: builder.build()?, + file_types, + }) + } - let mut layers = HopSlotMap::default(); - let root = layers.insert(root_layer); + fn language_for_path(&self, path: &Path) -> Option<Language> { + self.matcher + .matches(path) + .iter() + .filter_map(|idx| self.file_types.get(*idx)) + .max_by_key(|file_type| file_type.glob.glob().len()) + .map(|file_type| file_type.language) + } +} - let mut syntax = Self { - root, - layers, - loader, - }; +#[derive(Debug)] +pub struct Syntax { + inner: tree_house::Syntax, +} - let res = syntax.update(source, source, &ChangeSet::new(source)); +const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); - return None; - } - Some(syntax) +impl Syntax { + pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result<Self, Error> { + let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?; + Ok(Self { inner }) } pub fn update( @@ -553,518 +428,82 @@ impl Syntax { old_source: RopeSlice, source: RopeSlice, changeset: &ChangeSet, + loader: &Loader, ) -> Result<(), Error> { - let mut queue = VecDeque::new(); - queue.push_back(self.root); - - let loader = self.loader.load(); - let scopes = loader.scopes.load(); - let injection_callback = |language: &InjectionLanguageMarker| { - loader - .language_configuration_for_injection_string(language) - .and_then(|language_config| language_config.highlight_config(&scopes)) - }; - - // Convert the changeset into tree sitter edits. let edits = generate_edits(old_source, changeset); - - // This table allows inverse indexing of `layers`. - // That is by hashing a `Layer` you can find - // the `LayerId` of an existing equivalent `Layer` in `layers`. - // - // It is used to determine if a new layer exists for an injection - // or if an existing layer needs to be updated. - let mut layers_table = RawTable::with_capacity(self.layers.len()); - let layers_hasher = RandomState::new(); - // Use the edits to update all layers markers - fn point_add(a: Point, b: Point) -> Point { - if b.row > 0 { - Point::new(a.row.saturating_add(b.row), b.column) - } else { - Point::new(0, a.column.saturating_add(b.column)) - } - } - fn point_sub(a: Point, b: Point) -> Point { - if a.row > b.row { - Point::new(a.row.saturating_sub(b.row), a.column) - } else { - Point::new(0, a.column.saturating_sub(b.column)) - } - } - - for (layer_id, layer) in self.layers.iter_mut() { - // The root layer always covers the whole range (0..usize::MAX) - if layer.depth == 0 { - layer.flags = LayerUpdateFlags::MODIFIED; - continue; - } - - if !edits.is_empty() { - for range in &mut layer.ranges { - // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 - for edit in edits.iter().rev() { - let is_pure_insertion = edit.old_end_byte == edit.start_byte; - - // if edit is after range, skip - if edit.start_byte > range.end_byte { - // TODO: || (is_noop && edit.start_byte == range.end_byte) - continue; - } - - // if edit is before range, shift entire range by len - if edit.old_end_byte < range.start_byte { - range.start_byte = - edit.new_end_byte + (range.start_byte - edit.old_end_byte); - range.start_point = point_add( - edit.new_end_position, - point_sub(range.start_point, edit.old_end_position), - ); - - range.end_byte = edit - .new_end_byte - .saturating_add(range.end_byte - edit.old_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - - layer.flags |= LayerUpdateFlags::MOVED; - } - // if the edit starts in the space before and extends into the range - else if edit.start_byte < range.start_byte { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - // If the edit is an insertion at the start of the tree, shift - else if edit.start_byte == range.start_byte && is_pure_insertion { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - layer.flags |= LayerUpdateFlags::MOVED; - } else { - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - } - } - } - - let hash = layers_hasher.hash_one(layer); - // Safety: insert_no_grow is unsafe because it assumes that the table - // has enough capacity to hold additional elements. - // This is always the case as we reserved enough capacity above. - unsafe { layers_table.insert_no_grow(hash, layer_id) }; - } - - PARSER.with(|ts_parser| { - let ts_parser = &mut ts_parser.borrow_mut(); - ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - // TODO: might need to set cursor range - cursor.set_byte_range(0..usize::MAX); - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let source_slice = source.slice(..); - - while let Some(layer_id) = queue.pop_front() { - let layer = &mut self.layers[layer_id]; - - // Mark the layer as touched - layer.flags |= LayerUpdateFlags::TOUCHED; - - // If a tree already exists, notify it of changes. - if let Some(tree) = &mut layer.tree { - if layer - .flags - .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) - { - for edit in edits.iter().rev() { - // Apply the edits in reverse. - // If we applied them in order then edit 1 would disrupt the positioning of edit 2. - tree.edit(edit); - } - } - - if layer.flags.contains(LayerUpdateFlags::MODIFIED) { - // Re-parse the tree. - layer.parse(&mut ts_parser.parser, source)?; - } - } else { - // always parse if this layer has never been parsed before - layer.parse(&mut ts_parser.parser, source)?; - } - - // Switch to an immutable borrow. - let layer = &self.layers[layer_id]; - - // Process injections. - let matches = cursor.matches( - &layer.config.injections_query, - layer.tree().root_node(), - RopeProvider(source_slice), - ); - let mut combined_injections = vec![ - (None, Vec::new(), IncludedChildren::default()); - layer.config.combined_injections_patterns.len() - ]; - let mut injections = Vec::new(); - let mut last_injection_end = 0; - for mat in matches { - let (injection_capture, content_node, included_children) = layer - .config - .injection_for_match(&layer.config.injections_query, &mat, source_slice); - - // in case this is a combined injection save it for more processing later - if let Some(combined_injection_idx) = layer - .config - .combined_injections_patterns - .iter() - .position(|&pattern| pattern == mat.pattern_index) - { - let entry = &mut combined_injections[combined_injection_idx]; - if injection_capture.is_some() { - entry.0 = injection_capture; - } - if let Some(content_node) = content_node { - if content_node.start_byte() >= last_injection_end { - entry.1.push(content_node); - last_injection_end = content_node.end_byte(); - } - } - entry.2 = included_children; - continue; - } - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - mat.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(injection_capture), Some(content_node)) = - (injection_capture, content_node) - { - if let Some(config) = (injection_callback)(&injection_capture) { - let ranges = - intersect_ranges(&layer.ranges, &[content_node], included_children); - - if !ranges.is_empty() { - if content_node.start_byte() < last_injection_end { - continue; - } - last_injection_end = content_node.end_byte(); - injections.push((config, ranges)); - } - } - } - } - - for (lang_name, content_nodes, included_children) in combined_injections { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(config) = (injection_callback)(&lang_name) { - let ranges = - intersect_ranges(&layer.ranges, &content_nodes, included_children); - if !ranges.is_empty() { - injections.push((config, ranges)); - } - } - } - } - - let depth = layer.depth + 1; - // TODO: can't inline this since matches borrows self.layers - for (config, ranges) in injections { - let parent = Some(layer_id); - let new_layer = LanguageLayer { - tree: None, - config, - depth, - ranges, - flags: LayerUpdateFlags::empty(), - parent: None, - }; - - // Find an identical existing layer - let layer = layers_table - .get(layers_hasher.hash_one(&new_layer), |&it| { - self.layers[it] == new_layer - }) - .copied(); - - // ...or insert a new one. - let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); - self.layers[layer_id].parent = parent; - - queue.push_back(layer_id); - } - - // TODO: pre-process local scopes at this time, rather than highlight? - // would solve problems with locals not working across boundaries - } - - // Return the cursor back in the pool. - ts_parser.cursors.push(cursor); - - // Reset all `LayerUpdateFlags` and remove all untouched layers - self.layers.retain(|_, layer| { - replace(&mut layer.flags, LayerUpdateFlags::empty()) - .contains(LayerUpdateFlags::TOUCHED) - }); - + if edits.is_empty() { Ok(()) - }) + } else { + self.inner.update(source, PARSE_TIMEOUT, &edits, loader) + } } - pub fn tree(&self) -> &Tree { - self.layers[self.root].tree() + pub fn layer(&self, layer: Layer) -> &tree_house::LayerData { + self.inner.layer(layer) } - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight_iter<'a>( - &'a self, - source: RopeSlice<'a>, - range: Option<std::ops::Range<usize>>, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a { - let mut layers = self - .layers - .iter() - .filter_map(|(_, layer)| { - // TODO: if range doesn't overlap layer range, skip it - - // Reuse a cursor from the pool if available. - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_default() - }); - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let cursor_ref = unsafe { - mem::transmute::<&mut tree_sitter::QueryCursor, &mut tree_sitter::QueryCursor>( - &mut cursor, - ) - }; - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let mut captures = cursor_ref - .captures( - &layer.config.query, - layer.tree().root_node(), - RopeProvider(source), - ) - .peekable(); - - // If there's no captures, skip the layer - captures.peek()?; - - Some(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - _tree: None, - captures: RefCell::new(captures), - config: layer.config.as_ref(), // TODO: just reuse `layer` - depth: layer.depth, // TODO: just reuse `layer` - }) - }) - .collect::<Vec<_>>(); - - layers.sort_unstable_by_key(|layer| layer.sort_key()); - - let mut result = HighlightIter { - source, - byte_offset: range.map_or(0, |r| r.start), - cancellation_flag, - iter_count: 0, - layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - result + pub fn root_layer(&self) -> Layer { + self.inner.root() } - pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { - let mut container_id = self.root; - - for (layer_id, layer) in self.layers.iter() { - if layer.depth > self.layers[container_id].depth - && layer.contains_byte_range(start, end) - { - container_id = layer_id; - } - } - - self.layers[container_id].tree() + pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer { + self.inner.layer_for_byte_range(start, end) } - pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> { - self.tree_for_byte_range(start, end) - .root_node() - .named_descendant_for_byte_range(start, end) + pub fn root_language(&self) -> Language { + self.layer(self.root_layer()).language } - pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> { - self.tree_for_byte_range(start, end) - .root_node() - .descendant_for_byte_range(start, end) + pub fn tree(&self) -> &Tree { + self.inner.tree() } - pub fn walk(&self) -> TreeCursor<'_> { - // data structure to find the smallest range that contains a point - // when some of the ranges in the structure can overlap. - TreeCursor::new(&self.layers, self.root) + pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree { + self.inner.tree_for_byte_range(start, end) } - // Commenting - // comment_strings_for_pos - // is_commented - - // Indentation - // suggested_indent_for_line_at_buffer_row - // suggested_indent_for_buffer_row - // indent_level_for_line - - // TODO: Folding -} - -bitflags! { - /// Flags that track the status of a layer - /// in the `Sytaxn::update` function - #[derive(Debug)] - struct LayerUpdateFlags : u32{ - const MODIFIED = 0b001; - const MOVED = 0b010; - const TOUCHED = 0b100; + pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> { + self.inner.named_descendant_for_byte_range(start, end) } -} - -#[derive(Debug)] -pub struct LanguageLayer { - // mode - // grammar - pub config: Arc<HighlightConfiguration>, - pub(crate) tree: Option<Tree>, - pub ranges: Vec<Range>, - pub depth: u32, - flags: LayerUpdateFlags, - parent: Option<LayerId>, -} -/// This PartialEq implementation only checks if that -/// two layers are theoretically identical (meaning they highlight the same text range with the same language). -/// It does not check whether the layers have the same internal treesitter -/// state. -impl PartialEq for LanguageLayer { - fn eq(&self, other: &Self) -> bool { - self.depth == other.depth - && self.config.language == other.config.language - && self.ranges == other.ranges + pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> { + self.inner.descendant_for_byte_range(start, end) } -} -/// Hash implementation belongs to PartialEq implementation above. -/// See its documentation for details. -impl Hash for LanguageLayer { - fn hash<H: Hasher>(&self, state: &mut H) { - self.depth.hash(state); - self.config.language.hash(state); - self.ranges.hash(state); + pub fn walk(&self) -> TreeCursor { + self.inner.walk() } -} -impl LanguageLayer { - pub fn tree(&self) -> &Tree { - // TODO: no unwrap - self.tree.as_ref().unwrap() - } - - fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { - parser - .set_included_ranges(&self.ranges) - .map_err(|_| Error::InvalidRanges)?; - - parser - .set_language(&self.config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - &chunk.as_bytes()[byte - start_byte..] - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - // unsafe { ts_parser.parser.set_cancellation_flag(None) }; - self.tree = Some(tree); - Ok(()) + pub fn highlighter<'a>( + &'a self, + source: RopeSlice<'a>, + loader: &'a Loader, + range: impl RangeBounds<u32>, + ) -> Highlighter<'a> { + Highlighter::new(&self.inner, source, loader, range) } - /// Whether the layer contains the given byte range. - /// - /// If the layer has multiple ranges (i.e. combined injections), the - /// given range is considered contained if it is within the start and - /// end bytes of the first and last ranges **and** if the given range - /// starts or ends within any of the layer's ranges. - fn contains_byte_range(&self, start: usize, end: usize) -> bool { - let layer_start = self - .ranges - .first() - .expect("ranges should not be empty") - .start_byte; - let layer_end = self - .ranges - .last() - .expect("ranges should not be empty") - .end_byte; - - layer_start <= start - && layer_end >= end - && self.ranges.iter().any(|range| { - let byte_range = range.start_byte..range.end_byte; - byte_range.contains(&start) || byte_range.contains(&end) - }) + pub fn query_iter<'a, QueryLoader, LayerState, Range>( + &'a self, + source: RopeSlice<'a>, + loader: QueryLoader, + range: Range, + ) -> QueryIter<'a, 'a, QueryLoader, LayerState> + where + QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a, + LayerState: Default, + Range: RangeBounds<u32>, + { + QueryIter::new(&self.inner, source, loader, range) } } -pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, -) -> Vec<tree_sitter::InputEdit> { - use Operation::*; +pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; + +fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> { + use crate::Operation::*; + use tree_sitter::Point; + let mut old_pos = 0; let mut edits = Vec::new(); @@ -1076,35 +515,6 @@ pub(crate) fn generate_edits( let mut iter = changeset.changes.iter().peekable(); // TODO; this is a lot easier with Change instead of Operation. - - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; - - (byte, Point::new(line, col)) - } - - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } - } - Point { row, column } - } - while let Some(change) = iter.next() { let len = match change { Delete(i) | Retain(i) => *i, @@ -1115,47 +525,47 @@ pub(crate) fn generate_edits( match change { Retain(_) => {} Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + let start_byte = old_text.char_to_byte(old_pos) as u32; + let old_end_byte = old_text.char_to_byte(old_end) as u32; // deletion - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, }); } Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let start_byte = old_text.char_to_byte(old_pos) as u32; // a subsequent delete means a replace, consume it if let Some(Delete(len)) = iter.peek() { old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + let old_end_byte = old_text.char_to_byte(old_end) as u32; iter.next(); // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, }); } else { // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len() as u32, // old_pos + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, }); } } @@ -1165,949 +575,295 @@ pub(crate) fn generate_edits( edits } -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str}; -use tree_sitter::{ - Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents the reason why syntax highlighting failed. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - Cancelled, - InvalidLanguage, - InvalidRanges, - Unknown, -} - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -/// Contains the data needed to highlight code written in a particular language. +/// A set of "overlay" highlights and ranges they apply to. /// -/// This struct is immutable and can be shared between threads. +/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights. #[derive(Debug)] -pub struct HighlightConfiguration { - pub language: Grammar, - pub query: Query, - injections_query: Query, - combined_injections_patterns: Vec<usize>, - highlights_pattern_index: usize, - highlight_indices: ArcSwap<Vec<Option<Highlight>>>, - non_local_variable_patterns: Vec<bool>, - injection_content_capture_index: Option<u32>, - injection_language_capture_index: Option<u32>, - injection_filename_capture_index: Option<u32>, - injection_shebang_capture_index: Option<u32>, - local_scope_capture_index: Option<u32>, - local_def_capture_index: Option<u32>, - local_def_value_capture_index: Option<u32>, - local_ref_capture_index: Option<u32>, -} - -#[derive(Debug)] -struct LocalDef<'a> { - name: Cow<'a, str>, - value_range: ops::Range<usize>, - highlight: Option<Highlight>, -} - -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range<usize>, - local_defs: Vec<LocalDef<'a>>, -} - -#[derive(Debug)] -struct HighlightIter<'a> { - source: RopeSlice<'a>, - byte_offset: usize, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec<HighlightIterLayer<'a>>, - iter_count: usize, - next_event: Option<HighlightEvent>, - last_highlight_range: Option<(usize, usize, u32)>, -} - -// Adapter to convert rope chunks to bytes -pub struct ChunksBytes<'a> { - chunks: ropey::iter::Chunks<'a>, -} -impl<'a> Iterator for ChunksBytes<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option<Self::Item> { - self.chunks.next().map(str::as_bytes) - } +pub enum OverlayHighlights { + /// All highlights use a single `Highlight`. + /// + /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in + /// the future though. + Homogeneous { + highlight: Highlight, + ranges: Vec<ops::Range<usize>>, + }, + /// A collection of different highlights for given ranges. + /// + /// Note that the ranges **must be non-overlapping**. + Heterogenous { + highlights: Vec<(Highlight, ops::Range<usize>)>, + }, } -pub struct RopeProvider<'a>(pub RopeSlice<'a>); -impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { - type I = ChunksBytes<'a>; - - fn text(&mut self, node: Node) -> Self::I { - let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); - ChunksBytes { - chunks: fragment.chunks(), +impl OverlayHighlights { + pub fn single(highlight: Highlight, range: ops::Range<usize>) -> Self { + Self::Homogeneous { + highlight, + ranges: vec![range], } } -} - -struct HighlightIterLayer<'a> { - _tree: Option<Tree>, - cursor: QueryCursor, - captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec<usize>, - scope_stack: Vec<LocalScope<'a>>, - depth: u32, -} -impl fmt::Debug for HighlightIterLayer<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HighlightIterLayer").finish() + fn is_empty(&self) -> bool { + match self { + Self::Homogeneous { ranges, .. } => ranges.is_empty(), + Self::Heterogenous { highlights } => highlights.is_empty(), + } } } -impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting - /// queries. - /// - /// # Parameters +#[derive(Debug)] +struct Overlay { + highlights: OverlayHighlights, + /// The position of the highlighter into the Vec of ranges of the overlays. /// - /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. - /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. + /// Used by the `OverlayHighlighter`. + idx: usize, + /// The currently active highlight (and the ending character index) for this overlay. /// - /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - pub fn new( - language: Grammar, - highlights_query: &str, - injection_query: &str, - locals_query: &str, - ) -> Result<Self, QueryError> { - // Concatenate the query strings, keeping track of the start offset of each section. - let mut query_source = String::new(); - query_source.push_str(locals_query); - let highlights_query_offset = query_source.len(); - query_source.push_str(highlights_query); - - // Construct a single query by concatenating the three query strings, but record the - // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - } - - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); - - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} - } - } - - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } + /// Used by the `OverlayHighlighter`. + active_highlight: Option<(Highlight, usize)>, +} - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); - Ok(Self { - language, - query, - injections_query, - combined_injections_patterns, - highlights_pattern_index, - highlight_indices, - non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, +impl Overlay { + fn new(highlights: OverlayHighlights) -> Option<Self> { + (!highlights.is_empty()).then_some(Self { + highlights, + idx: 0, + active_highlight: None, }) } - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() - } - - /// Set the list of recognized highlight names. - /// - /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - /// these queries can choose to recognize highlights with different levels of specificity. - /// For example, the string `function.builtin` will match against `function.builtin.constructor` - /// but will not match `function.method.builtin` and `function.method`. - /// - /// When highlighting, results are returned as `Highlight` values, which contain the index - /// of the matched highlight this list of highlight names. - pub fn configure(&self, recognized_names: &[String]) { - let mut capture_parts = Vec::new(); - let indices: Vec<_> = self - .query - .capture_names() - .iter() - .map(move |capture_name| { - capture_parts.clear(); - capture_parts.extend(capture_name.split('.')); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; - } - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(Highlight) - }) - .collect(); - - self.highlight_indices.store(Arc::new(indices)); - } - - fn injection_pair<'a>( - &self, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) { - let mut injection_capture = None; - let mut content_node = None; - - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - injection_capture = Some(InjectionLanguageMarker::Name( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_filename_capture_index { - injection_capture = Some(InjectionLanguageMarker::Filename( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); - - // some languages allow space and newlines before the actual string content - // so a shebang could be on either the first or second line - let lines = if let Ok(end) = node_slice.try_line_to_byte(2) { - node_slice.byte_slice(..end) - } else { - node_slice - }; - - static SHEBANG_REGEX: Lazy<rope::Regex> = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); - - injection_capture = SHEBANG_REGEX - .captures_iter(lines.regex_input()) - .map(|cap| { - let cap = lines.byte_slice(cap.get_group(1).unwrap().range()); - InjectionLanguageMarker::Shebang(cap) - }) - .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); - } - } - (injection_capture, content_node) - } - - fn injection_for_match<'a>( - &self, - query: &'a Query, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> ( - Option<InjectionLanguageMarker<'a>>, - Option<Node<'a>>, - IncludedChildren, - ) { - let (mut injection_capture, content_node) = self.injection_pair(query_match, source); - - let mut included_children = IncludedChildren::default(); - for prop in query.property_settings(query_match.pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `#set!` predicate - // that sets the injection.language key. - "injection.language" if injection_capture.is_none() => { - injection_capture = prop - .value - .as_deref() - .map(InjectionLanguageMarker::LanguageId); - } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `#set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => included_children = IncludedChildren::All, - - // Some queries might only exclude named children but include unnamed - // children in their `injection.content` node. This can be enabled using - // a `#set!` predicate that sets the `injection.include-unnamed-children` key. - "injection.include-unnamed-children" => { - included_children = IncludedChildren::Unnamed - } - _ => {} - } + fn current(&self) -> Option<(Highlight, ops::Range<usize>)> { + match &self.highlights { + OverlayHighlights::Homogeneous { highlight, ranges } => ranges + .get(self.idx) + .map(|range| (*highlight, range.clone())), + OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(), } - - (injection_capture, content_node, included_children) } -} -impl HighlightIterLayer<'_> { - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .borrow_mut() - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } + fn start(&self) -> Option<usize> { + match &self.highlights { + OverlayHighlights::Homogeneous { ranges, .. } => { + ranges.get(self.idx).map(|range| range.start) } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, + OverlayHighlights::Heterogenous { highlights } => highlights + .get(self.idx) + .map(|(_highlight, range)| range.start), } } } -#[derive(Clone)] -enum IncludedChildren { - None, - All, - Unnamed, -} - -impl Default for IncludedChildren { - fn default() -> Self { - Self::None - } +/// A collection of highlights to apply when rendering which merge on top of syntax highlights. +#[derive(Debug)] +pub struct OverlayHighlighter { + overlays: Vec<Overlay>, + next_highlight_start: usize, + next_highlight_end: usize, } -// Compute the ranges that should be included when parsing an injection. -// This takes into account three things: -// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. -// * `nodes` - Every injection takes place within a set of nodes. The injection ranges -// are the ranges of those nodes. -// * `includes_children` - For some injections, the content nodes' children should be -// excluded from the nested document, so that only the content nodes' *own* content -// is reparsed. For other injections, the content nodes' entire ranges should be -// reparsed, including the ranges of their children. -fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - included_children: IncludedChildren, -) -> Vec<Range> { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| match included_children { - IncludedChildren::None => Some(child.range()), - IncludedChildren::All => None, - IncludedChildren::Unnamed => { - if child.is_named() { - Some(child.range()) - } else { - None - } - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } - - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } +impl OverlayHighlighter { + pub fn new(overlays: impl IntoIterator<Item = OverlayHighlights>) -> Self { + let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect(); + let next_highlight_start = overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } - } + Self { + overlays, + next_highlight_start, + next_highlight_end: usize::MAX, } } - result -} -impl HighlightIter<'_> { - fn emit_event( - &mut self, - offset: usize, - event: Option<HighlightEvent>, - ) -> Option<Result<HighlightEvent, Error>> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); - } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } - } else { - let layer = self.layers.remove(i + 1); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - break; - } - if i > 0 { - self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - } + /// The current position in the overlay highlights. + /// + /// This method is meant to be used when treating this type as a cursor over the overlay + /// highlights. + /// + /// `usize::MAX` is returned when there are no more overlay highlights. + pub fn next_event_offset(&self) -> usize { + self.next_highlight_start.min(self.next_highlight_end) } -} - -impl Iterator for HighlightIter<'_> { - type Item = Result<HighlightEvent, Error>; - - fn next(&mut self) -> Option<Self::Item> { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } - - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } - - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - let len = self.source.len_bytes(); - return if self.byte_offset < len { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: len, - })); - self.byte_offset = len; - result - } else { - None - }; - } - - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - let captures = layer.captures.get_mut(); - if let Some((next_match, capture_index)) = captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len_bytes(), None); - }; - - let (mut match_, capture_index) = captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; - - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } - - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - if let "local.scope-inherits" = prop.key.as_ref() { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } - let name = byte_range_to_str(range.clone(), self.source); - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index - && definition_highlight.is_none() + pub fn advance(&mut self) -> (HighlightEvent, impl Iterator<Item = Highlight> + '_) { + let mut refresh = false; + let prev_stack_size = self + .overlays + .iter() + .filter(|overlay| overlay.active_highlight.is_some()) + .count(); + let pos = self.next_event_offset(); + + if self.next_highlight_end == pos { + for overlay in self.overlays.iter_mut() { + if overlay + .active_highlight + .is_some_and(|(_highlight, end)| end == pos) { - definition_highlight = None; - let name = byte_range_to_str(range.clone(), self.source); - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } - - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; + overlay.active_highlight.take(); } } - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } + refresh = true; + } - // Use the last capture found for the current node, skipping over any - // highlight patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - match_.remove(); - capture = next_capture; - match_ = captures.next().unwrap().0; - } else { - break; + while self.next_highlight_start == pos { + let mut activated_idx = usize::MAX; + for (idx, overlay) in self.overlays.iter_mut().enumerate() { + let Some((highlight, range)) = overlay.current() else { + continue; + }; + if range.start != self.next_highlight_start { + continue; } - } - let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; + // If this overlay has a highlight at this start index, set its active highlight + // and increment the cursor position within the overlay. + overlay.active_highlight = Some((highlight, range.end)); + overlay.idx += 1; - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; + activated_idx = activated_idx.min(idx); } - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } - - self.sort_layers(); + // If `self.next_highlight_start == pos` that means that some overlay was ready to + // emit a highlight, so `activated_idx` must have been set to an existing index. + assert!( + (0..self.overlays.len()).contains(&activated_idx), + "expected an overlay to highlight (at pos {pos}, there are {} overlays)", + self.overlays.len() + ); + + // If any overlays are active after the (lowest) one which was just activated, the + // highlights need to be refreshed. + refresh |= self.overlays[activated_idx..] + .iter() + .any(|overlay| overlay.active_highlight.is_some()); + + self.next_highlight_start = self + .overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); } - } -} - -#[derive(Debug, Clone)] -pub enum InjectionLanguageMarker<'a> { - /// The language is specified by `LanguageConfiguration`'s `language_id` field. - /// - /// This marker is used when a pattern sets the `injection.language` property, for example - /// `(#set! injection.language "rust")`. - LanguageId(&'a str), - /// The language is specified in the document and captured by `@injection.language`. - /// - /// This is used for markdown code fences for example. While the `LanguageId` variant can be - /// looked up by finding the language config that sets an `language_id`, this variant contains - /// text from the document being highlighted, so the text is checked against each language's - /// `injection_regex`. - Name(RopeSlice<'a>), - Filename(RopeSlice<'a>), - Shebang(RopeSlice<'a>), -} -const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - -pub struct Merge<I> { - iter: I, - spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>, + self.next_highlight_end = self + .overlays + .iter() + .filter_map(|overlay| Some(overlay.active_highlight?.1)) + .min() + .unwrap_or(usize::MAX); - next_event: Option<HighlightEvent>, - next_span: Option<(usize, std::ops::Range<usize>)>, + let (event, start) = if refresh { + (HighlightEvent::Refresh, 0) + } else { + (HighlightEvent::Push, prev_stack_size) + }; - queue: Vec<HighlightEvent>, + ( + event, + self.overlays + .iter() + .flat_map(|overlay| overlay.active_highlight) + .map(|(highlight, _end)| highlight) + .skip(start), + ) + } } -/// Merge a list of spans into the highlight event stream. -pub fn merge<I: Iterator<Item = HighlightEvent>>( - iter: I, - spans: Vec<(usize, std::ops::Range<usize>)>, -) -> Merge<I> { - let spans = Box::new(spans.into_iter()); - let mut merge = Merge { - iter, - spans, - next_event: None, - next_span: None, - queue: Vec::new(), - }; - merge.next_event = merge.iter.next(); - merge.next_span = merge.spans.next(); - merge +#[derive(Debug)] +pub enum CapturedNode<'a> { + Single(Node<'a>), + /// Guaranteed to be not empty + Grouped(Vec<Node<'a>>), } -impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> { - type Item = HighlightEvent; - fn next(&mut self) -> Option<Self::Item> { - use HighlightEvent::*; - if let Some(event) = self.queue.pop() { - return Some(event); +impl CapturedNode<'_> { + pub fn start_byte(&self) -> usize { + match self { + Self::Single(n) => n.start_byte() as usize, + Self::Grouped(ns) => ns[0].start_byte() as usize, } + } - loop { - match (self.next_event, &self.next_span) { - // this happens when range is partially or fully offscreen - (Some(Source { start, .. }), Some((span, range))) if start > range.start => { - if start > range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, start..range.end)); - }; - } - _ => break, - } + pub fn end_byte(&self) -> usize { + match self { + Self::Single(n) => n.end_byte() as usize, + Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize, } + } - match (self.next_event, &self.next_span) { - (Some(HighlightStart(i)), _) => { - self.next_event = self.iter.next(); - Some(HighlightStart(i)) - } - (Some(HighlightEnd), _) => { - self.next_event = self.iter.next(); - Some(HighlightEnd) - } - (Some(Source { start, end }), Some((_, range))) if start < range.start => { - let intersect = range.start.min(end); - let event = Source { - start, - end: intersect, - }; - - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; - - Some(event) - } - (Some(Source { start, end }), Some((span, range))) if start == range.start => { - let intersect = range.end.min(end); - let event = HighlightStart(Highlight(*span)); - - // enqueue in reverse order - self.queue.push(HighlightEnd); - self.queue.push(Source { - start, - end: intersect, - }); + pub fn byte_range(&self) -> ops::Range<usize> { + self.start_byte()..self.end_byte() + } +} - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; +#[derive(Debug)] +pub struct TextObjectQuery { + query: Query, +} - if intersect == range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, intersect..range.end)); - } +impl TextObjectQuery { + pub fn new(query: Query) -> Self { + Self { query } + } - Some(event) - } - (Some(event), None) => { - self.next_event = self.iter.next(); - Some(event) - } - // Can happen if cursor at EOF and/or diagnostic reaches past the end. - // We need to actually emit events for the cursor-at-EOF situation, - // even though the range is past the end of the text. This needs to be - // handled appropriately by the drawing code by not assuming that - // all `Source` events point to valid indices in the rope. - (None, Some((span, range))) => { - let event = HighlightStart(Highlight(*span)); - self.queue.push(HighlightEnd); - self.queue.push(Source { - start: range.start, - end: range.end, - }); - self.next_span = self.spans.next(); - Some(event) - } - (None, None) => None, - e => unreachable!("{:?}", e), - } + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + /// + /// Captures may contain multiple nodes by using quantifiers (+, *, etc), + /// and support for this is partial and could use improvement. + /// + /// ```query + /// (comment)+ @capture + /// + /// ; OR + /// ( + /// (comment)* + /// . + /// (function) + /// ) @capture + /// ``` + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { + self.capture_nodes_any(&[capture_name], node, slice) } -} -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) -} + /// Find the first capture that exists out of all given `capture_names` + /// and return sub nodes that match this capture. + pub fn capture_nodes_any<'a>( + &'a self, + capture_names: &[&str], + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { + let capture = capture_names + .iter() + .find_map(|cap| self.query.get_capture(cap))?; -fn format_anonymous_node_kind(kind: &str) -> Cow<str> { - if kind.contains('"') { - Cow::Owned(kind.replace('"', "\\\"")) - } else { - Cow::Borrowed(kind) + let mut cursor = InactiveQueryCursor::new(); + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + let mut cursor = cursor.execute_query(&self.query, node, RopeInput::new(slice)); + let capture_node = iter::from_fn(move || { + let (mat, _) = cursor.next_matched_node()?; + Some(mat.nodes_for_capture(capture).cloned().collect()) + }) + .filter_map(move |nodes: Vec<_>| { + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }); + Some(capture_node) } } @@ -2123,6 +879,18 @@ pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result } } +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id())) +} + +fn format_anonymous_node_kind(kind: &str) -> Cow<str> { + if kind.contains('"') { + Cow::Owned(kind.replace('"', "\\\"")) + } else { + Cow::Borrowed(kind) + } +} + fn pretty_print_tree_impl<W: fmt::Write>( fmt: &mut W, cursor: &mut tree_sitter::TreeCursor, @@ -2173,9 +941,13 @@ fn pretty_print_tree_impl<W: fmt::Write>( #[cfg(test)] mod test { + use once_cell::sync::Lazy; + use super::*; use crate::{Rope, Transaction}; + static LOADER: Lazy<Loader> = Lazy::new(|| crate::config::user_lang_loader().unwrap()); + #[test] fn test_textobject_queries() { let query_str = r#" @@ -2190,29 +962,16 @@ mod test { "#, ); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language("rust").unwrap(); - - let query = Query::new(&language, query_str).unwrap(); - let textobject = TextObjectQuery { query }; - let mut cursor = QueryCursor::new(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name("rust").unwrap(); + let grammar = LOADER.get_config(language).unwrap().grammar; + let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap(); + let textobject = TextObjectQuery::new(query); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax.tree().root_node(); - let mut test = |capture, range| { + let test = |capture, range| { let matches: Vec<_> = textobject - .capture_nodes(capture, root, source.slice(..), &mut cursor) + .capture_nodes(capture, &root, source.slice(..)) .unwrap() .collect(); @@ -2232,82 +991,8 @@ mod test { } #[test] - fn test_parser() { - let highlight_names: Vec<String> = [ - "attribute", - "constant", - "function.builtin", - "function", - "keyword", - "operator", - "property", - "punctuation", - "punctuation.bracket", - "punctuation.delimiter", - "string", - "string.special", - "tag", - "type", - "type.builtin", - "variable", - "variable.builtin", - "variable.parameter", - ] - .iter() - .cloned() - .map(String::from) - .collect(); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - - let language = get_language("rust").unwrap(); - let config = HighlightConfiguration::new( - language, - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") - .unwrap(), - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") - .unwrap(), - "", // locals.scm - ) - .unwrap(); - config.configure(&highlight_names); - - let source = Rope::from_str( - " - struct Stuff {} - fn main() {} - ", - ); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); - let tree = syntax.tree(); - let root = tree.root_node(); - assert_eq!(root.kind(), "source_file"); - - assert_eq!( - root.to_sexp(), - concat!( - "(source_file ", - "(struct_item name: (type_identifier) body: (field_declaration_list)) ", - "(function_item name: (identifier) parameters: (parameters) body: (block)))" - ) - ); - - let struct_node = root.child(0).unwrap(); - assert_eq!(struct_node.kind(), "struct_item"); - } - - #[test] fn test_input_edits() { - use tree_sitter::InputEdit; + use tree_sitter::{InputEdit, Point}; let doc = Rope::from("hello world!\ntest 123"); let transaction = Transaction::change( @@ -2324,17 +1009,17 @@ mod test { start_byte: 6, old_end_byte: 11, new_end_byte: 10, - start_position: Point { row: 0, column: 6 }, - old_end_position: Point { row: 0, column: 11 }, - new_end_position: Point { row: 0, column: 10 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }, InputEdit { start_byte: 12, old_end_byte: 17, new_end_byte: 12, - start_position: Point { row: 0, column: 12 }, - old_end_position: Point { row: 1, column: 4 }, - new_end_position: Point { row: 0, column: 12 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO } ] ); @@ -2353,9 +1038,9 @@ mod test { start_byte: 8, old_end_byte: 8, new_end_byte: 14, - start_position: Point { row: 0, column: 8 }, - old_end_position: Point { row: 0, column: 8 }, - new_end_position: Point { row: 0, column: 14 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }] ); } @@ -2369,26 +1054,13 @@ mod test { end: usize, ) { let source = Rope::from_str(source); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language(language_name).unwrap(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name(language_name).unwrap(); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax .tree() .root_node() - .descendant_for_byte_range(start, end) + .descendant_for_byte_range(start as u32, end as u32) .unwrap(); let mut output = String::new(); @@ -2456,14 +1128,4 @@ mod test { source.len(), ); } - - #[test] - fn test_load_runtime_file() { - // Test to make sure we can load some data from the runtime directory. - let contents = load_runtime_file("rust", "indents.scm").unwrap(); - assert!(!contents.is_empty()); - - let results = load_runtime_file("rust", "does-not-exist"); - assert!(results.is_err()); - } } |