Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/syntax.rs')
| -rw-r--r-- | helix-core/src/syntax.rs | 2790 |
1 files changed, 1666 insertions, 1124 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 4bc177ef..f4b4535b 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,1262 +1,1898 @@ -pub mod config; +use crate::{ + chars::char_is_line_ending, + regex::Regex, + transaction::{ChangeSet, Operation}, + Rope, RopeSlice, Tendril, +}; + +pub use helix_syntax::get_language; + +use arc_swap::ArcSwap; use std::{ borrow::Cow, - collections::HashMap, - fmt, iter, - ops::{self, RangeBounds}, + cell::RefCell, + collections::{HashMap, HashSet}, + fmt, path::Path, sync::Arc, - time::Duration, }; -use anyhow::{Context, Result}; -use arc_swap::{ArcSwap, Guard}; -use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; -use foldhash::HashSet; -use helix_loader::grammar::get_language; -use helix_stdx::rope::RopeSliceExt as _; -use once_cell::sync::OnceCell; -use ropey::RopeSlice; -use tree_house::{ - highlighter, - query_iter::QueryIter, - tree_sitter::{ - query::{InvalidPredicateError, UserPredicate}, - Capture, Grammar, InactiveQueryCursor, InputEdit, Node, Pattern, Query, RopeInput, Tree, - }, - Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, -}; +use once_cell::sync::{Lazy, OnceCell}; +use serde::{Deserialize, Serialize}; -use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language}; +fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::<String>::deserialize(deserializer)? + .map(|buf| Regex::new(&buf).map_err(serde::de::Error::custom)) + .transpose() +} -pub use tree_house::{ - highlighter::{Highlight, HighlightEvent}, - query_iter::QueryIterEvent, - Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT, -}; +fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::<toml::Value>::deserialize(deserializer)? + .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) + .transpose() +} -#[derive(Debug)] -pub struct LanguageData { - config: Arc<LanguageConfiguration>, - syntax: OnceCell<Option<SyntaxConfig>>, - indent_query: OnceCell<Option<IndentQuery>>, - textobject_query: OnceCell<Option<TextObjectQuery>>, - tag_query: OnceCell<Option<TagQuery>>, - rainbow_query: OnceCell<Option<RainbowQuery>>, +#[derive(Debug, Serialize, Deserialize)] +pub struct Configuration { + pub language: Vec<LanguageConfiguration>, } -impl LanguageData { - fn new(config: LanguageConfiguration) -> Self { - Self { - config: Arc::new(config), - syntax: OnceCell::new(), - indent_query: OnceCell::new(), - textobject_query: OnceCell::new(), - tag_query: OnceCell::new(), - rainbow_query: OnceCell::new(), - } - } +// largely based on tree-sitter/cli/src/loader.rs +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct LanguageConfiguration { + #[serde(rename = "name")] + pub language_id: String, + pub scope: String, // source.rust + pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc> + pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml> + pub comment_token: Option<String>, + + #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] + pub config: Option<serde_json::Value>, + + #[serde(default)] + pub auto_format: bool, + + // content_regex + #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] + pub injection_regex: Option<Regex>, + // first_line_regex + // + #[serde(skip)] + pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>, + // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 + #[serde(skip_serializing_if = "Option::is_none")] + pub language_server: Option<LanguageServerConfiguration>, + #[serde(skip_serializing_if = "Option::is_none")] + pub indent: Option<IndentationConfiguration>, + + #[serde(skip)] + pub(crate) indent_query: OnceCell<Option<IndentQuery>>, + #[serde(skip)] + pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>, +} - pub fn config(&self) -> &Arc<LanguageConfiguration> { - &self.config - } +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct LanguageServerConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec<String>, +} - /// Loads the grammar and compiles the highlights, injections and locals for the language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_syntax_config( - config: &LanguageConfiguration, - loader: &Loader, - ) -> Result<Option<SyntaxConfig>> { - let name = &config.language_id; - let parser_name = config.grammar.as_deref().unwrap_or(name); - let Some(grammar) = get_language(parser_name)? else { - log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist"); - return Ok(None); - }; - let highlight_query_text = read_query(name, "highlights.scm"); - let injection_query_text = read_query(name, "injections.scm"); - let local_query_text = read_query(name, "locals.scm"); - let config = SyntaxConfig::new( - grammar, - &highlight_query_text, - &injection_query_text, - &local_query_text, - ) - .with_context(|| format!("Failed to compile highlights for '{name}'"))?; +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndentationConfiguration { + pub tab_width: usize, + pub unit: String, +} - reconfigure_highlights(&config, &loader.scopes()); +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndentQuery { + #[serde(default)] + #[serde(skip_serializing_if = "HashSet::is_empty")] + pub indent: HashSet<String>, + #[serde(default)] + #[serde(skip_serializing_if = "HashSet::is_empty")] + pub outdent: HashSet<String>, +} - Ok(Some(config)) - } +#[derive(Debug)] +pub struct TextObjectQuery { + pub query: Query, +} - pub fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> { - self.syntax - .get_or_init(|| { - Self::compile_syntax_config(&self.config, loader) - .map_err(|err| { - log::error!("{err:#}"); - }) - .ok() - .flatten() +impl TextObjectQuery { + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: Node<'a>, + slice: RopeSlice<'a>, + cursor: &'a mut QueryCursor, + ) -> Option<impl Iterator<Item = Node<'a>>> { + let capture_idx = self.query.capture_index_for_name(capture_name)?; + let captures = cursor.captures(&self.query, node, RopeProvider(slice)); + + captures + .filter_map(move |(mat, idx)| { + (mat.captures[idx].index == capture_idx).then(|| mat.captures[idx].node) }) - .as_ref() + .into() } +} - /// Compiles the indents.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_indent_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<IndentQuery>> { - let name = &config.language_id; - let text = read_query(name, "indents.scm"); - if text.is_empty() { - return Ok(None); - } - let indent_query = IndentQuery::new(grammar, &text) - .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?; - Ok(Some(indent_query)) +fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> { + let path = crate::RUNTIME_DIR + .join("queries") + .join(language) + .join(filename); + std::fs::read_to_string(&path) +} + +fn read_query(language: &str, filename: &str) -> String { + static INHERITS_REGEX: Lazy<Regex> = + Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()]+)\s*").unwrap()); + + let query = load_runtime_file(language, filename).unwrap_or_default(); + + // TODO: the collect() is not ideal + let inherits = INHERITS_REGEX + .captures_iter(&query) + .flat_map(|captures| { + captures[1] + .split(',') + .map(str::to_owned) + .collect::<Vec<_>>() + }) + .collect::<Vec<_>>(); + + if inherits.is_empty() { + return query; } - fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> { - self.indent_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_indent_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() - }) - .as_ref() + let mut queries = inherits + .iter() + .map(|language| read_query(language, filename)) + .collect::<Vec<_>>(); + + queries.push(query); + + queries.concat() +} + +impl LanguageConfiguration { + fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { + let language = self.language_id.to_ascii_lowercase(); + + let highlights_query = read_query(&language, "highlights.scm"); + // always highlight syntax errors + // highlights_query += "\n(ERROR) @error"; + + let injections_query = read_query(&language, "injections.scm"); + let locals_query = read_query(&language, "locals.scm"); + + if highlights_query.is_empty() { + None + } else { + let language = get_language(&crate::RUNTIME_DIR, &self.language_id).ok()?; + let config = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ); + + let config = match config { + Ok(config) => config, + Err(err) => panic!("{}", err), + }; // TODO: avoid panic + config.configure(scopes); + Some(Arc::new(config)) + } } - /// Compiles the textobjects.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_textobject_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<TextObjectQuery>> { - let name = &config.language_id; - let text = read_query(name, "textobjects.scm"); - if text.is_empty() { - return Ok(None); + pub fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.highlight_config.get() { + config.configure(scopes); } - let query = Query::new(grammar, &text, |_, _| Ok(())) - .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?; - Ok(Some(TextObjectQuery::new(query))) } - fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> { - self.textobject_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_textobject_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() - }) - .as_ref() + pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { + self.highlight_config + .get_or_init(|| self.initialize_highlight(scopes)) + .clone() } - /// Compiles the tags.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_tag_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<TagQuery>> { - let name = &config.language_id; - let text = read_query(name, "tags.scm"); - if text.is_empty() { - return Ok(None); - } - let query = Query::new(grammar, &text, |_pattern, predicate| match predicate { - // TODO: these predicates are allowed in tags.scm queries but not yet used. - UserPredicate::IsPropertySet { key: "local", .. } => Ok(()), - UserPredicate::Other(pred) => match pred.name() { - "strip!" | "select-adjacent!" => Ok(()), - _ => Err(InvalidPredicateError::unknown(predicate)), - }, - _ => Err(InvalidPredicateError::unknown(predicate)), - }) - .with_context(|| format!("Failed to compile tags.scm query for '{name}'"))?; - Ok(Some(TagQuery { query })) + pub fn is_highlight_initialized(&self) -> bool { + self.highlight_config.get().is_some() } - fn tag_query(&self, loader: &Loader) -> Option<&TagQuery> { - self.tag_query + pub fn indent_query(&self) -> Option<&IndentQuery> { + self.indent_query .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_tag_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() + let language = self.language_id.to_ascii_lowercase(); + + let toml = load_runtime_file(&language, "indents.toml").ok()?; + toml::from_slice(toml.as_bytes()).ok() }) .as_ref() } - /// Compiles the rainbows.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_rainbow_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<RainbowQuery>> { - let name = &config.language_id; - let text = read_query(name, "rainbows.scm"); - if text.is_empty() { - return Ok(None); - } - let rainbow_query = RainbowQuery::new(grammar, &text) - .with_context(|| format!("Failed to compile rainbows.scm query for '{name}'"))?; - Ok(Some(rainbow_query)) - } - - fn rainbow_query(&self, loader: &Loader) -> Option<&RainbowQuery> { - self.rainbow_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_rainbow_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() + pub fn textobject_query(&self) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| -> Option<TextObjectQuery> { + let lang_name = self.language_id.to_ascii_lowercase(); + let query_text = read_query(&lang_name, "textobjects.scm"); + let lang = self.highlight_config.get()?.as_ref()?.language; + let query = Query::new(lang, &query_text).ok()?; + Some(TextObjectQuery { query }) }) .as_ref() } - fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.syntax.get() { - reconfigure_highlights(config, scopes); - } + pub fn scope(&self) -> &str { + &self.scope } } -pub fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) { - config.configure(move |capture_name| { - let capture_parts: Vec<_> = capture_name.split('.').collect(); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; - } - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(|idx| Highlight::new(idx as u32)) - }); -} - -pub fn read_query(lang: &str, query_filename: &str) -> String { - tree_house::read_query(lang, |language| { - helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default() - }) -} - -#[derive(Debug, Default)] +#[derive(Debug)] pub struct Loader { - languages: Vec<LanguageData>, - languages_by_extension: HashMap<String, Language>, - languages_by_shebang: HashMap<String, Language>, - languages_glob_matcher: FileTypeGlobMatcher, - language_server_configs: HashMap<String, LanguageServerConfiguration>, - scopes: ArcSwap<Vec<String>>, + // highlight_names ? + language_configs: Vec<Arc<LanguageConfiguration>>, + language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize> } -pub type LoaderError = globset::Error; - impl Loader { - pub fn new(config: Configuration) -> Result<Self, LoaderError> { - let mut languages = Vec::with_capacity(config.language.len()); - let mut languages_by_extension = HashMap::new(); - let mut languages_by_shebang = HashMap::new(); - let mut file_type_globs = Vec::new(); + pub fn new(config: Configuration) -> Self { + let mut loader = Self { + language_configs: Vec::new(), + language_config_ids_by_file_type: HashMap::new(), + }; - for mut config in config.language { - let language = Language(languages.len() as u32); - config.language = Some(language); + for config in config.language { + // get the next id + let language_id = loader.language_configs.len(); for file_type in &config.file_types { - match file_type { - FileType::Extension(extension) => { - languages_by_extension.insert(extension.clone(), language); - } - FileType::Glob(glob) => { - file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language)); - } - }; - } - for shebang in &config.shebangs { - languages_by_shebang.insert(shebang.clone(), language); + // entry().or_insert(Vec::new).push(language_id); + loader + .language_config_ids_by_file_type + .insert(file_type.clone(), language_id); } - languages.push(LanguageData::new(config)); + loader.language_configs.push(Arc::new(config)); } - Ok(Self { - languages, - languages_by_extension, - languages_by_shebang, - languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, - language_server_configs: config.language_server, - scopes: ArcSwap::from_pointee(Vec::new()), - }) + loader } - pub fn languages(&self) -> impl ExactSizeIterator<Item = (Language, &LanguageData)> { - self.languages - .iter() - .enumerate() - .map(|(idx, data)| (Language(idx as u32), data)) - } - - pub fn language_configs(&self) -> impl ExactSizeIterator<Item = &LanguageConfiguration> { - self.languages.iter().map(|language| &*language.config) - } + pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> { + // Find all the language configurations that match this file name + // or a suffix of the file name. + let configuration_id = path + .file_name() + .and_then(|n| n.to_str()) + .and_then(|file_name| self.language_config_ids_by_file_type.get(file_name)) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.language_config_ids_by_file_type.get(extension)) + }); - pub fn language(&self, lang: Language) -> &LanguageData { - &self.languages[lang.idx()] - } + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - pub fn language_for_name(&self, name: impl PartialEq<String>) -> Option<Language> { - self.languages.iter().enumerate().find_map(|(idx, config)| { - (name == config.config.language_id).then_some(Language(idx as u32)) - }) + // TODO: content_regex handling conflict resolution } - pub fn language_for_scope(&self, scope: &str) -> Option<Language> { - self.languages.iter().enumerate().find_map(|(idx, config)| { - (scope == config.config.scope).then_some(Language(idx as u32)) - }) + pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> { + self.language_configs + .iter() + .find(|config| config.scope == scope) + .cloned() } - pub fn language_for_match(&self, text: RopeSlice) -> Option<Language> { - // PERF: If the name matches up with the id, then this saves the need to do expensive regex. - let shortcircuit = self.language_for_name(text); - if shortcircuit.is_some() { - return shortcircuit; - } - - // If the name did not match up with a known id, then match on injection regex. - + pub fn language_configuration_for_injection_string( + &self, + string: &str, + ) -> Option<Arc<LanguageConfiguration>> { let mut best_match_length = 0; let mut best_match_position = None; - for (idx, data) in self.languages.iter().enumerate() { - if let Some(injection_regex) = &data.config.injection_regex { - if let Some(mat) = injection_regex.find(text.regex_input()) { + for (i, configuration) in self.language_configs.iter().enumerate() { + if let Some(injection_regex) = &configuration.injection_regex { + if let Some(mat) = injection_regex.find(string) { let length = mat.end() - mat.start(); if length > best_match_length { - best_match_position = Some(idx); + best_match_position = Some(i); best_match_length = length; } } } } - best_match_position.map(|i| Language(i as u32)) + if let Some(i) = best_match_position { + let configuration = &self.language_configs[i]; + return Some(configuration.clone()); + } + None } - - pub fn language_for_filename(&self, path: &Path) -> Option<Language> { - // Find all the language configurations that match this file name - // or a suffix of the file name. - - // TODO: content_regex handling conflict resolution - self.languages_glob_matcher - .language_for_path(path) - .or_else(|| { - path.extension() - .and_then(|extension| extension.to_str()) - .and_then(|extension| self.languages_by_extension.get(extension).copied()) - }) + pub fn language_configs_iter(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> { + self.language_configs.iter() } +} - pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> { - // NOTE: this is slightly different than the one for injection markers in tree-house. It - // is anchored at the beginning. - use helix_stdx::rope::Regex; - use once_cell::sync::Lazy; - const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap()); - - let marker = SHEBANG_REGEX - .captures_iter(regex_cursor::Input::new(text)) - .map(|cap| text.byte_slice(cap.get_group(1).unwrap().range())) - .next()?; - self.language_for_shebang_marker(marker) - } +pub struct TsParser { + parser: tree_sitter::Parser, + cursors: Vec<QueryCursor>, +} - fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> { - let shebang: Cow<str> = marker.into(); - self.languages_by_shebang.get(shebang.as_ref()).copied() +impl fmt::Debug for TsParser { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TsParser").finish() } +} - pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> { - self.language(lang).indent_query(self) - } +// could also just use a pool, or a single instance? +thread_local! { + pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser { + parser: Parser::new(), + cursors: Vec::new(), + }) +} - pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> { - self.language(lang).textobject_query(self) - } +#[derive(Debug)] +pub struct Syntax { + config: Arc<HighlightConfiguration>, - pub fn tag_query(&self, lang: Language) -> Option<&TagQuery> { - self.language(lang).tag_query(self) - } + root_layer: LanguageLayer, +} - fn rainbow_query(&self, lang: Language) -> Option<&RainbowQuery> { - self.language(lang).rainbow_query(self) - } +fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> { + let start_char = source.byte_to_char(range.start); + let end_char = source.byte_to_char(range.end); + Cow::from(source.slice(start_char..end_char)) +} - pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> { - &self.language_server_configs - } +impl Syntax { + // buffer, grammar, config, grammars, sync_timeout? + pub fn new( + /*language: Lang,*/ source: &Rope, + config: Arc<HighlightConfiguration>, + ) -> Self { + let root_layer = LanguageLayer { tree: None }; + + // track markers of injections + // track scope_descriptor: a Vec of scopes for item in tree + + let mut syntax = Self { + // grammar, + config, + root_layer, + }; - pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { - self.scopes.load() + // update root layer + PARSER.with(|ts_parser| { + // TODO: handle the returned `Result` properly. + let _ = syntax.root_layer.parse( + &mut ts_parser.borrow_mut(), + &syntax.config, + source, + 0, + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + ); + }); + syntax } - pub fn set_scopes(&self, scopes: Vec<String>) { - self.scopes.store(Arc::new(scopes)); + pub fn update( + &mut self, + old_source: &Rope, + source: &Rope, + changeset: &ChangeSet, + ) -> Result<(), Error> { + PARSER.with(|ts_parser| { + self.root_layer.update( + &mut ts_parser.borrow_mut(), + &self.config, + old_source, + source, + changeset, + ) + }) - // Reconfigure existing grammars - for data in &self.languages { - data.reconfigure(&self.scopes()); - } + // TODO: deal with injections and update them too } -} -impl LanguageLoader for Loader { - fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<Language> { - match marker { - InjectionLanguageMarker::Name(name) => self.language_for_name(name), - InjectionLanguageMarker::Match(text) => self.language_for_match(text), - InjectionLanguageMarker::Filename(text) => { - let path: Cow<str> = text.into(); - self.language_for_filename(Path::new(path.as_ref())) - } - InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text), - } - } + // fn buffer_changed -> call layer.update(range, new_text) on root layer and then all marker layers - fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> { - self.languages[lang.idx()].syntax_config(self) + // call this on transaction.apply() -> buffer_changed(changes) + // + // fn parse(language, old_tree, ranges) + // + pub fn tree(&self) -> &Tree { + self.root_layer.tree() } -} + // + // <!--update_for_injection(grammar)--> -#[derive(Debug)] -struct FileTypeGlob { - glob: globset::Glob, - language: Language, -} + // Highlighting -impl FileTypeGlob { - pub fn new(glob: globset::Glob, language: Language) -> Self { - Self { glob, language } - } -} - -#[derive(Debug)] -struct FileTypeGlobMatcher { - matcher: globset::GlobSet, - file_types: Vec<FileTypeGlob>, -} + /// Iterate over the highlighted regions for a given slice of source code. + pub fn highlight_iter<'a>( + &self, + source: RopeSlice<'a>, + range: Option<std::ops::Range<usize>>, + cancellation_flag: Option<&'a AtomicUsize>, + injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, + ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a { + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + + // reuse a cursor from the pool if possible + let mut cursor = PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.pop().unwrap_or_else(QueryCursor::new) + }); + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(self.tree()) }; + let cursor_ref = unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let query_ref = unsafe { mem::transmute::<_, &'static Query>(&self.config.query) }; + let config_ref = + unsafe { mem::transmute::<_, &'static HighlightConfiguration>(self.config.as_ref()) }; + + // if reusing cursors & no range this resets to whole range + cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); + + let captures = cursor_ref + .captures(query_ref, tree_ref.root_node(), RopeProvider(source)) + .peekable(); + + // manually craft the root layer based on the existing tree + let layer = HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + depth: 0, + _tree: None, + captures, + config: config_ref, + ranges: vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], + }; -impl Default for FileTypeGlobMatcher { - fn default() -> Self { - Self { - matcher: globset::GlobSet::empty(), - file_types: Default::default(), - } + let mut result = HighlightIter { + source, + byte_offset: range.map_or(0, |r| r.start), // TODO: simplify + injection_callback, + cancellation_flag, + iter_count: 0, + layers: vec![layer], + next_event: None, + last_highlight_range: None, + }; + result.sort_layers(); + result } -} + // on_tokenize + // on_change_highlighting -impl FileTypeGlobMatcher { - fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> { - let mut builder = globset::GlobSetBuilder::new(); - for file_type in &file_types { - builder.add(file_type.glob.clone()); - } + // Commenting + // comment_strings_for_pos + // is_commented - Ok(Self { - matcher: builder.build()?, - file_types, - }) - } + // Indentation + // suggested_indent_for_line_at_buffer_row + // suggested_indent_for_buffer_row + // indent_level_for_line - fn language_for_path(&self, path: &Path) -> Option<Language> { - self.matcher - .matches(path) - .iter() - .filter_map(|idx| self.file_types.get(*idx)) - .max_by_key(|file_type| file_type.glob.glob().len()) - .map(|file_type| file_type.language) - } + // TODO: Folding + + // Syntax APIs + // get_syntax_node_containing_range -> + // ... + // get_syntax_node_at_pos + // buffer_range_for_scope_at_pos } #[derive(Debug)] -pub struct Syntax { - pub inner: tree_house::Syntax, +pub struct LanguageLayer { + // mode + // grammar + // depth + pub(crate) tree: Option<Tree>, } -const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous +impl LanguageLayer { + // pub fn new() -> Self { + // Self { tree: None } + // } -impl Syntax { - pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result<Self, Error> { - let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?; - Ok(Self { inner }) + pub fn tree(&self) -> &Tree { + // TODO: no unwrap + self.tree.as_ref().unwrap() } - pub fn update( + fn parse( &mut self, - old_source: RopeSlice, - source: RopeSlice, - changeset: &ChangeSet, - loader: &Loader, + ts_parser: &mut TsParser, + config: &HighlightConfiguration, + source: &Rope, + _depth: usize, + ranges: Vec<Range>, ) -> Result<(), Error> { - let edits = generate_edits(old_source, changeset); - if edits.is_empty() { - Ok(()) - } else { - self.inner.update(source, PARSE_TIMEOUT, &edits, loader) + if ts_parser.parser.set_included_ranges(&ranges).is_ok() { + ts_parser + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + + // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; + let tree = ts_parser + .parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + chunk[byte - start_byte..].as_bytes() + } else { + // out of range + &[] + } + }, + self.tree.as_ref(), + ) + .ok_or(Error::Cancelled)?; + + self.tree = Some(tree) } + Ok(()) } - pub fn layer(&self, layer: Layer) -> &tree_house::LayerData { - self.inner.layer(layer) - } + pub(crate) fn generate_edits( + old_text: RopeSlice, + changeset: &ChangeSet, + ) -> Vec<tree_sitter::InputEdit> { + use Operation::*; + let mut old_pos = 0; - pub fn root_layer(&self) -> Layer { - self.inner.root() - } + let mut edits = Vec::new(); - pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer { - self.inner.layer_for_byte_range(start, end) - } + let mut iter = changeset.changes.iter().peekable(); - pub fn root_language(&self) -> Language { - self.layer(self.root_layer()).language - } + // TODO; this is a lot easier with Change instead of Operation. - pub fn tree(&self) -> &Tree { - self.inner.tree() - } + fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; - pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree { - self.inner.tree_for_byte_range(start, end) - } + (byte, Point::new(line, col)) + } - pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node<'_>> { - self.inner.named_descendant_for_byte_range(start, end) - } + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; + + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { + row += 1; + column = 0; + } else { + column += 1; + } + } + Point { row, column } + } - pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node<'_>> { - self.inner.descendant_for_byte_range(start, end) + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, + }; + let mut old_end = old_pos + len; + + match change { + Retain(_) => {} + Delete(_) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + + // TODO: Position also needs to be byte based... + // let byte = char_to_byte(old_pos) + // let line = char_to_line(old_pos) + // let line_start_byte = line_to_byte() + // Position::new(line, line_start_byte - byte) + + // deletion + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: start_position, // old pos to coords + }); + } + Insert(s) => { + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); + + iter.next(); + + // replacement + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + }); + } else { + // insert + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len(), // old_pos + s.len() + start_position, // old pos to coords + old_end_position: start_position, // same + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) + }); + } + } + } + old_pos = old_end; + } + edits } - pub fn walk(&self) -> TreeCursor<'_> { - self.inner.walk() - } + fn update( + &mut self, + ts_parser: &mut TsParser, + config: &HighlightConfiguration, + old_source: &Rope, + source: &Rope, + changeset: &ChangeSet, + ) -> Result<(), Error> { + if changeset.is_empty() { + return Ok(()); + } - pub fn highlighter<'a>( - &'a self, - source: RopeSlice<'a>, - loader: &'a Loader, - range: impl RangeBounds<u32>, - ) -> Highlighter<'a> { - Highlighter::new(&self.inner, source, loader, range) - } + let edits = Self::generate_edits(old_source.slice(..), changeset); - pub fn query_iter<'a, QueryLoader, LayerState, Range>( - &'a self, - source: RopeSlice<'a>, - loader: QueryLoader, - range: Range, - ) -> QueryIter<'a, 'a, QueryLoader, LayerState> - where - QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a, - LayerState: Default, - Range: RangeBounds<u32>, - { - QueryIter::new(&self.inner, source, loader, range) - } + // Notify the tree about all the changes + for edit in edits.iter().rev() { + // apply the edits in reverse. If we applied them in order then edit 1 would disrupt + // the positioning of edit 2 + self.tree.as_mut().unwrap().edit(edit); + } - pub fn tags<'a>( - &'a self, - source: RopeSlice<'a>, - loader: &'a Loader, - range: impl RangeBounds<u32>, - ) -> QueryIter<'a, 'a, impl FnMut(Language) -> Option<&'a Query> + 'a, ()> { - self.query_iter( + self.parse( + ts_parser, + config, source, - |lang| loader.tag_query(lang).map(|q| &q.query), - range, + 0, + // TODO: what to do about this range on update + vec![Range { + start_byte: 0, + end_byte: usize::MAX, + start_point: Point::new(0, 0), + end_point: Point::new(usize::MAX, usize::MAX), + }], ) } - pub fn rainbow_highlights( - &self, - source: RopeSlice, - rainbow_length: usize, - loader: &Loader, - range: impl RangeBounds<u32>, - ) -> OverlayHighlights { - struct RainbowScope<'tree> { - end: u32, - node: Option<Node<'tree>>, - highlight: Highlight, - } - - let mut scope_stack = Vec::<RainbowScope>::new(); - let mut highlights = Vec::new(); - let mut query_iter = self.query_iter::<_, (), _>( - source, - |lang| loader.rainbow_query(lang).map(|q| &q.query), - range, - ); + // fn highlight_iter() -> same as Mode but for this layer. Mode composits these + // fn buffer_changed + // fn update(range) + // fn update_injections() +} - while let Some(event) = query_iter.next() { - let QueryIterEvent::Match(mat) = event else { - continue; - }; +// -- refactored from tree-sitter-highlight to be able to retain state +// TODO: add seek() to iter + +// problem: any time a layer is updated it must update it's injections on the parent (potentially +// removing some from use) +// can't modify to vec and exist in it at the same time since that would violate borrows +// maybe we can do with an arena +// maybe just caching on the top layer and nevermind the injections for now? +// +// Grammar { +// layers: Vec<Box<Layer>> to prevent memory moves when vec is modified +// } +// injections tracked by marker: +// if marker areas match it's fine and update +// if not found add new layer +// if length 0 then area got removed, clean up the layer +// +// layer update: +// if range.len = 0 then remove the layer +// for change in changes { tree.edit(change) } +// tree = parser.parse(.., tree, ..) +// calculate affected range and update injections +// injection update: +// look for existing injections +// if present, range = (first injection start, last injection end) +// +// For now cheat and just throw out non-root layers if they exist. This should still improve +// parsing in majority of cases. + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::{iter, mem, ops, str, usize}; +use tree_sitter::{ + Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, + QueryMatch, Range, TextProvider, Tree, +}; - let rainbow_query = loader - .rainbow_query(query_iter.current_language()) - .expect("language must have a rainbow query to emit matches"); +const CANCELLATION_CHECK_INTERVAL: usize = 100; - let byte_range = mat.node.byte_range(); - // Pop any scopes that end before this capture begins. - while scope_stack - .last() - .is_some_and(|scope| byte_range.start >= scope.end) - { - scope_stack.pop(); - } +/// Indicates which highlight should be applied to a region of source code. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Highlight(pub usize); - let capture = Some(mat.capture); - if capture == rainbow_query.scope_capture { - scope_stack.push(RainbowScope { - end: byte_range.end, - node: if rainbow_query - .include_children_patterns - .contains(&mat.pattern) - { - None - } else { - Some(mat.node.clone()) - }, - highlight: Highlight::new((scope_stack.len() % rainbow_length) as u32), - }); - } else if capture == rainbow_query.bracket_capture { - if let Some(scope) = scope_stack.last() { - if !scope - .node - .as_ref() - .is_some_and(|node| mat.node.parent().as_ref() != Some(node)) - { - let start = source - .byte_to_char(source.floor_char_boundary(byte_range.start as usize)); - let end = - source.byte_to_char(source.ceil_char_boundary(byte_range.end as usize)); - highlights.push((scope.highlight, start..end)); - } - } - } - } +/// Represents the reason why syntax highlighting failed. +#[derive(Debug, PartialEq, Eq)] +pub enum Error { + Cancelled, + InvalidLanguage, + Unknown, +} - OverlayHighlights::Heterogenous { highlights } - } +/// Represents a single step in rendering a syntax-highlighted document. +#[derive(Copy, Clone, Debug)] +pub enum HighlightEvent { + Source { start: usize, end: usize }, + HighlightStart(Highlight), + HighlightEnd, } -pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; +/// Contains the data neeeded to higlight code written in a particular language. +/// +/// This struct is immutable and can be shared between threads. +#[derive(Debug)] +pub struct HighlightConfiguration { + pub language: Grammar, + pub query: Query, + combined_injections_query: Option<Query>, + locals_pattern_index: usize, + highlights_pattern_index: usize, + highlight_indices: ArcSwap<Vec<Option<Highlight>>>, + non_local_variable_patterns: Vec<bool>, + injection_content_capture_index: Option<u32>, + injection_language_capture_index: Option<u32>, + local_scope_capture_index: Option<u32>, + local_def_capture_index: Option<u32>, + local_def_value_capture_index: Option<u32>, + local_ref_capture_index: Option<u32>, +} -fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> { - use crate::Operation::*; - use tree_sitter::Point; +#[derive(Debug)] +struct LocalDef<'a> { + name: Cow<'a, str>, + value_range: ops::Range<usize>, + highlight: Option<Highlight>, +} - let mut old_pos = 0; +#[derive(Debug)] +struct LocalScope<'a> { + inherits: bool, + range: ops::Range<usize>, + local_defs: Vec<LocalDef<'a>>, +} - let mut edits = Vec::new(); +#[derive(Debug)] +struct HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + source: RopeSlice<'a>, + byte_offset: usize, + injection_callback: F, + cancellation_flag: Option<&'a AtomicUsize>, + layers: Vec<HighlightIterLayer<'a>>, + iter_count: usize, + next_event: Option<HighlightEvent>, + last_highlight_range: Option<(usize, usize, usize)>, +} - if changeset.changes.is_empty() { - return edits; +// Adapter to convert rope chunks to bytes +struct ChunksBytes<'a> { + chunks: ropey::iter::Chunks<'a>, +} +impl<'a> Iterator for ChunksBytes<'a> { + type Item = &'a [u8]; + fn next(&mut self) -> Option<Self::Item> { + self.chunks.next().map(str::as_bytes) } +} - let mut iter = changeset.changes.iter().peekable(); +struct RopeProvider<'a>(RopeSlice<'a>); +impl<'a> TextProvider<'a> for RopeProvider<'a> { + type I = ChunksBytes<'a>; - // TODO; this is a lot easier with Change instead of Operation. - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; - - match change { - Retain(_) => {} - Delete(_) => { - let start_byte = old_text.char_to_byte(old_pos) as u32; - let old_end_byte = old_text.char_to_byte(old_end) as u32; - - // deletion - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, - }); - } - Insert(s) => { - let start_byte = old_text.char_to_byte(old_pos) as u32; - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let old_end_byte = old_text.char_to_byte(old_end) as u32; - - iter.next(); - - // replacement - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len() - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, - }); - } else { - // insert - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len() as u32, // old_pos + s.len() - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, - }); - } - } + fn text(&mut self, node: Node) -> Self::I { + let start_char = self.0.byte_to_char(node.start_byte()); + let end_char = self.0.byte_to_char(node.end_byte()); + let fragment = self.0.slice(start_char..end_char); + ChunksBytes { + chunks: fragment.chunks(), } - old_pos = old_end; } - edits } -/// A set of "overlay" highlights and ranges they apply to. -/// -/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights. -#[derive(Debug)] -pub enum OverlayHighlights { - /// All highlights use a single `Highlight`. - /// - /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in - /// the future though. - Homogeneous { - highlight: Highlight, - ranges: Vec<ops::Range<usize>>, - }, - /// A collection of different highlights for given ranges. - /// - /// Note that the ranges **must be non-overlapping**. - Heterogenous { - highlights: Vec<(Highlight, ops::Range<usize>)>, - }, +struct HighlightIterLayer<'a> { + _tree: Option<Tree>, + cursor: QueryCursor, + captures: iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>>>, + config: &'a HighlightConfiguration, + highlight_end_stack: Vec<usize>, + scope_stack: Vec<LocalScope<'a>>, + ranges: Vec<Range>, + depth: usize, } -impl OverlayHighlights { - pub fn single(highlight: Highlight, range: ops::Range<usize>) -> Self { - Self::Homogeneous { - highlight, - ranges: vec![range], - } - } - - fn is_empty(&self) -> bool { - match self { - Self::Homogeneous { ranges, .. } => ranges.is_empty(), - Self::Heterogenous { highlights } => highlights.is_empty(), - } +impl<'a> fmt::Debug for HighlightIterLayer<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("HighlightIterLayer").finish() } } -#[derive(Debug)] -struct Overlay { - highlights: OverlayHighlights, - /// The position of the highlighter into the Vec of ranges of the overlays. +impl HighlightConfiguration { + /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting + /// queries. /// - /// Used by the `OverlayHighlighter`. - idx: usize, - /// The currently active highlight (and the ending character index) for this overlay. + /// # Parameters /// - /// Used by the `OverlayHighlighter`. - active_highlight: Option<(Highlight, usize)>, -} - -impl Overlay { - fn new(highlights: OverlayHighlights) -> Option<Self> { - (!highlights.is_empty()).then_some(Self { - highlights, - idx: 0, - active_highlight: None, - }) - } + /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. + /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This + /// should be non-empty, otherwise no syntax highlights will be added. + /// * `injections_query` - A string containing tree patterns for injecting other languages + /// into the document. This can be empty if no injections are desired. + /// * `locals_query` - A string containing tree patterns for tracking local variable + /// definitions and references. This can be empty if local variable tracking is not needed. + /// + /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. + pub fn new( + language: Grammar, + highlights_query: &str, + injection_query: &str, + locals_query: &str, + ) -> Result<Self, QueryError> { + // Concatenate the query strings, keeping track of the start offset of each section. + let mut query_source = String::new(); + query_source.push_str(injection_query); + let locals_query_offset = query_source.len(); + query_source.push_str(locals_query); + let highlights_query_offset = query_source.len(); + query_source.push_str(highlights_query); + + // Construct a single query by concatenating the three query strings, but record the + // range of pattern indices that belong to each individual string. + let mut query = Query::new(language, &query_source)?; + let mut locals_pattern_index = 0; + let mut highlights_pattern_index = 0; + for i in 0..(query.pattern_count()) { + let pattern_offset = query.start_byte_for_pattern(i); + if pattern_offset < highlights_query_offset { + if pattern_offset < highlights_query_offset { + highlights_pattern_index += 1; + } + if pattern_offset < locals_query_offset { + locals_pattern_index += 1; + } + } + } - fn current(&self) -> Option<(Highlight, ops::Range<usize>)> { - match &self.highlights { - OverlayHighlights::Homogeneous { highlight, ranges } => ranges - .get(self.idx) - .map(|range| (*highlight, range.clone())), - OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(), + // Construct a separate query just for dealing with the 'combined injections'. + // Disable the combined injection patterns in the main query. + let mut combined_injections_query = Query::new(language, injection_query)?; + let mut has_combined_queries = false; + for pattern_index in 0..locals_pattern_index { + let settings = query.property_settings(pattern_index); + if settings.iter().any(|s| &*s.key == "injection.combined") { + has_combined_queries = true; + query.disable_pattern(pattern_index); + } else { + combined_injections_query.disable_pattern(pattern_index); + } } - } + let combined_injections_query = if has_combined_queries { + Some(combined_injections_query) + } else { + None + }; - fn start(&self) -> Option<usize> { - match &self.highlights { - OverlayHighlights::Homogeneous { ranges, .. } => { - ranges.get(self.idx).map(|range| range.start) + // Find all of the highlighting patterns that are disabled for nodes that + // have been identified as local variables. + let non_local_variable_patterns = (0..query.pattern_count()) + .map(|i| { + query + .property_predicates(i) + .iter() + .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") + }) + .collect(); + + // Store the numeric ids for all of the special captures. + let mut injection_content_capture_index = None; + let mut injection_language_capture_index = None; + let mut local_def_capture_index = None; + let mut local_def_value_capture_index = None; + let mut local_ref_capture_index = None; + let mut local_scope_capture_index = None; + for (i, name) in query.capture_names().iter().enumerate() { + let i = Some(i as u32); + match name.as_str() { + "injection.content" => injection_content_capture_index = i, + "injection.language" => injection_language_capture_index = i, + "local.definition" => local_def_capture_index = i, + "local.definition-value" => local_def_value_capture_index = i, + "local.reference" => local_ref_capture_index = i, + "local.scope" => local_scope_capture_index = i, + _ => {} } - OverlayHighlights::Heterogenous { highlights } => highlights - .get(self.idx) - .map(|(_highlight, range)| range.start), } - } -} -/// A collection of highlights to apply when rendering which merge on top of syntax highlights. -#[derive(Debug)] -pub struct OverlayHighlighter { - overlays: Vec<Overlay>, - next_highlight_start: usize, - next_highlight_end: usize, -} + let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); + Ok(Self { + language, + query, + combined_injections_query, + locals_pattern_index, + highlights_pattern_index, + highlight_indices, + non_local_variable_patterns, + injection_content_capture_index, + injection_language_capture_index, + local_scope_capture_index, + local_def_capture_index, + local_def_value_capture_index, + local_ref_capture_index, + }) + } -impl OverlayHighlighter { - pub fn new(overlays: impl IntoIterator<Item = OverlayHighlights>) -> Self { - let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect(); - let next_highlight_start = overlays - .iter() - .filter_map(|overlay| overlay.start()) - .min() - .unwrap_or(usize::MAX); - - Self { - overlays, - next_highlight_start, - next_highlight_end: usize::MAX, - } + /// Get a slice containing all of the highlight names used in the configuration. + pub fn names(&self) -> &[String] { + self.query.capture_names() } - /// The current position in the overlay highlights. + /// Set the list of recognized highlight names. /// - /// This method is meant to be used when treating this type as a cursor over the overlay - /// highlights. + /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated + /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of + /// these queries can choose to recognize highlights with different levels of specificity. + /// For example, the string `function.builtin` will match against `function.method.builtin` + /// and `function.builtin.constructor`, but will not match `function.method`. /// - /// `usize::MAX` is returned when there are no more overlay highlights. - pub fn next_event_offset(&self) -> usize { - self.next_highlight_start.min(self.next_highlight_end) - } - - pub fn advance(&mut self) -> (HighlightEvent, impl Iterator<Item = Highlight> + '_) { - let mut refresh = false; - let prev_stack_size = self - .overlays + /// When highlighting, results are returned as `Highlight` values, which contain the index + /// of the matched highlight this list of highlight names. + pub fn configure(&self, recognized_names: &[String]) { + let mut capture_parts = Vec::new(); + let indices: Vec<_> = self + .query + .capture_names() .iter() - .filter(|overlay| overlay.active_highlight.is_some()) - .count(); - let pos = self.next_event_offset(); - - if self.next_highlight_end == pos { - for overlay in self.overlays.iter_mut() { - if overlay - .active_highlight - .is_some_and(|(_highlight, end)| end == pos) - { - overlay.active_highlight.take(); + .map(move |capture_name| { + capture_parts.clear(); + capture_parts.extend(capture_name.split('.')); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let recognized_name = recognized_name; + let mut len = 0; + let mut matches = true; + for part in recognized_name.split('.') { + len += 1; + if !capture_parts.contains(&part) { + matches = false; + break; + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } } - } + best_index.map(Highlight) + }) + .collect(); - refresh = true; - } + self.highlight_indices.store(Arc::new(indices)); + } +} - while self.next_highlight_start == pos { - let mut activated_idx = usize::MAX; - for (idx, overlay) in self.overlays.iter_mut().enumerate() { - let Some((highlight, range)) = overlay.current() else { - continue; - }; - if range.start != self.next_highlight_start { - continue; +impl<'a> HighlightIterLayer<'a> { + /// Create a new 'layer' of highlighting for this document. + /// + /// In the even that the new layer contains "combined injections" (injections where multiple + /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and + /// added to the returned vector. + fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>( + source: RopeSlice<'a>, + cancellation_flag: Option<&'a AtomicUsize>, + injection_callback: &mut F, + mut config: &'a HighlightConfiguration, + mut depth: usize, + mut ranges: Vec<Range>, + ) -> Result<Vec<Self>, Error> { + let mut result = Vec::with_capacity(1); + let mut queue = Vec::new(); + loop { + // --> Tree parsing part + + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + + if highlighter.parser.set_included_ranges(&ranges).is_ok() { + highlighter + .parser + .set_language(config.language) + .map_err(|_| Error::InvalidLanguage)?; + + unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; + let tree = highlighter + .parser + .parse_with( + &mut |byte, _| { + if byte <= source.len_bytes() { + let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); + chunk[byte - start_byte..].as_bytes() + } else { + // out of range + &[] + } + }, + None, + ) + .ok_or(Error::Cancelled)?; + unsafe { highlighter.parser.set_cancellation_flag(None) }; + let mut cursor = highlighter.cursors.pop().unwrap_or_else(QueryCursor::new); + + // Process combined injections. + if let Some(combined_injections_query) = &config.combined_injections_query { + let mut injections_by_pattern_index = vec![ + (None, Vec::new(), false); + combined_injections_query + .pattern_count() + ]; + let matches = cursor.matches( + combined_injections_query, + tree.root_node(), + RopeProvider(source), + ); + for mat in matches { + let entry = &mut injections_by_pattern_index[mat.pattern_index]; + let (language_name, content_node, include_children) = + injection_for_match( + config, + combined_injections_query, + &mat, + source, + ); + if language_name.is_some() { + entry.0 = language_name; + } + if let Some(content_node) = content_node { + entry.1.push(content_node); + } + entry.2 = include_children; + } + for (lang_name, content_nodes, includes_children) in + injections_by_pattern_index + { + if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) + { + if let Some(next_config) = (injection_callback)(&lang_name) { + let ranges = Self::intersect_ranges( + &ranges, + &content_nodes, + includes_children, + ); + if !ranges.is_empty() { + queue.push((next_config, depth + 1, ranges)); + } + } + } + } + } + + // --> Highlighting query part + + // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which + // prevents them from being moved. But both of these values are really just + // pointers, so it's actually ok to move them. + let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; + let cursor_ref = + unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) }; + let captures = cursor_ref + .captures(&config.query, tree_ref.root_node(), RopeProvider(source)) + .peekable(); + + result.push(HighlightIterLayer { + highlight_end_stack: Vec::new(), + scope_stack: vec![LocalScope { + inherits: false, + range: 0..usize::MAX, + local_defs: Vec::new(), + }], + cursor, + depth, + _tree: Some(tree), + captures, + config, + ranges, + }); } - // If this overlay has a highlight at this start index, set its active highlight - // and increment the cursor position within the overlay. - overlay.active_highlight = Some((highlight, range.end)); - overlay.idx += 1; + Ok(()) // so we can use the try operator + })?; - activated_idx = activated_idx.min(idx); + if queue.is_empty() { + break; } - // If `self.next_highlight_start == pos` that means that some overlay was ready to - // emit a highlight, so `activated_idx` must have been set to an existing index. - assert!( - (0..self.overlays.len()).contains(&activated_idx), - "expected an overlay to highlight (at pos {pos}, there are {} overlays)", - self.overlays.len() - ); - - // If any overlays are active after the (lowest) one which was just activated, the - // highlights need to be refreshed. - refresh |= self.overlays[activated_idx..] - .iter() - .any(|overlay| overlay.active_highlight.is_some()); - - self.next_highlight_start = self - .overlays - .iter() - .filter_map(|overlay| overlay.start()) - .min() - .unwrap_or(usize::MAX); + let (next_config, next_depth, next_ranges) = queue.remove(0); + config = next_config; + depth = next_depth; + ranges = next_ranges; } - self.next_highlight_end = self - .overlays - .iter() - .filter_map(|overlay| Some(overlay.active_highlight?.1)) - .min() - .unwrap_or(usize::MAX); + Ok(result) + } + + // Compute the ranges that should be included when parsing an injection. + // This takes into account three things: + // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. + // * `nodes` - Every injection takes place within a set of nodes. The injection ranges + // are the ranges of those nodes. + // * `includes_children` - For some injections, the content nodes' children should be + // excluded from the nested document, so that only the content nodes' *own* content + // is reparsed. For other injections, the content nodes' entire ranges should be + // reparsed, including the ranges of their children. + fn intersect_ranges( + parent_ranges: &[Range], + nodes: &[Node], + includes_children: bool, + ) -> Vec<Range> { + let mut cursor = nodes[0].walk(); + let mut result = Vec::new(); + let mut parent_range_iter = parent_ranges.iter(); + let mut parent_range = parent_range_iter + .next() + .expect("Layers should only be constructed with non-empty ranges vectors"); + for node in nodes.iter() { + let mut preceding_range = Range { + start_byte: 0, + start_point: Point::new(0, 0), + end_byte: node.start_byte(), + end_point: node.start_position(), + }; + let following_range = Range { + start_byte: node.end_byte(), + start_point: node.end_position(), + end_byte: usize::MAX, + end_point: Point::new(usize::MAX, usize::MAX), + }; - let (event, start) = if refresh { - (HighlightEvent::Refresh, 0) - } else { - (HighlightEvent::Push, prev_stack_size) - }; + for excluded_range in node + .children(&mut cursor) + .filter_map(|child| { + if includes_children { + None + } else { + Some(child.range()) + } + }) + .chain([following_range].iter().cloned()) + { + let mut range = Range { + start_byte: preceding_range.end_byte, + start_point: preceding_range.end_point, + end_byte: excluded_range.start_byte, + end_point: excluded_range.start_point, + }; + preceding_range = excluded_range; - ( - event, - self.overlays - .iter() - .flat_map(|overlay| overlay.active_highlight) - .map(|(highlight, _end)| highlight) - .skip(start), - ) - } -} + if range.end_byte < parent_range.start_byte { + continue; + } -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec<Node<'a>>), -} + while parent_range.start_byte <= range.end_byte { + if parent_range.end_byte > range.start_byte { + if range.start_byte < parent_range.start_byte { + range.start_byte = parent_range.start_byte; + range.start_point = parent_range.start_point; + } + + if parent_range.end_byte < range.end_byte { + if range.start_byte < parent_range.end_byte { + result.push(Range { + start_byte: range.start_byte, + start_point: range.start_point, + end_byte: parent_range.end_byte, + end_point: parent_range.end_point, + }); + } + range.start_byte = parent_range.end_byte; + range.start_point = parent_range.end_point; + } else { + if range.start_byte < range.end_byte { + result.push(range); + } + break; + } + } -impl CapturedNode<'_> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte() as usize, - Self::Grouped(ns) => ns[0].start_byte() as usize, + if let Some(next_range) = parent_range_iter.next() { + parent_range = next_range; + } else { + return result; + } + } + } + } + result + } + + // First, sort scope boundaries by their byte offset in the document. At a + // given position, emit scope endings before scope beginnings. Finally, emit + // scope boundaries from deeper layers first. + fn sort_key(&mut self) -> Option<(usize, bool, isize)> { + let depth = -(self.depth as isize); + let next_start = self + .captures + .peek() + .map(|(m, i)| m.captures[*i].node.start_byte()); + let next_end = self.highlight_end_stack.last().cloned(); + match (next_start, next_end) { + (Some(start), Some(end)) => { + if start < end { + Some((start, true, depth)) + } else { + Some((end, false, depth)) + } + } + (Some(i), None) => Some((i, true, depth)), + (None, Some(j)) => Some((j, false, depth)), + _ => None, } } +} - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte() as usize, - Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize, +impl<'a, F> HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + fn emit_event( + &mut self, + offset: usize, + event: Option<HighlightEvent>, + ) -> Option<Result<HighlightEvent, Error>> { + let result; + if self.byte_offset < offset { + result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: offset, + })); + self.byte_offset = offset; + self.next_event = event; + } else { + result = event.map(Ok); + } + self.sort_layers(); + result + } + + fn sort_layers(&mut self) { + while !self.layers.is_empty() { + if let Some(sort_key) = self.layers[0].sort_key() { + let mut i = 0; + while i + 1 < self.layers.len() { + if let Some(next_offset) = self.layers[i + 1].sort_key() { + if next_offset < sort_key { + i += 1; + continue; + } + } + break; + } + if i > 0 { + self.layers[0..(i + 1)].rotate_left(1); + } + break; + } else { + let layer = self.layers.remove(0); + PARSER.with(|ts_parser| { + let highlighter = &mut ts_parser.borrow_mut(); + highlighter.cursors.push(layer.cursor); + }); + } } } - pub fn byte_range(&self) -> ops::Range<usize> { - self.start_byte()..self.end_byte() + fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { + if let Some(sort_key) = layer.sort_key() { + let mut i = 1; + while i < self.layers.len() { + if let Some(sort_key_i) = self.layers[i].sort_key() { + if sort_key_i > sort_key { + self.layers.insert(i, layer); + return; + } + i += 1; + } else { + self.layers.remove(i); + } + } + self.layers.push(layer); + } } } -#[derive(Debug)] -pub struct TextObjectQuery { - query: Query, -} - -impl TextObjectQuery { - pub fn new(query: Query) -> Self { - Self { query } - } +impl<'a, F> Iterator for HighlightIter<'a, F> +where + F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, +{ + type Item = Result<HighlightEvent, Error>; + + fn next(&mut self) -> Option<Self::Item> { + 'main: loop { + // If we've already determined the next highlight boundary, just return it. + if let Some(e) = self.next_event.take() { + return Some(Ok(e)); + } - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: &Node<'a>, - slice: RopeSlice<'a>, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - self.capture_nodes_any(&[capture_name], node, slice) - } + // Periodically check for cancellation, returning `Cancelled` error if the + // cancellation flag was flipped. + if let Some(cancellation_flag) = self.cancellation_flag { + self.iter_count += 1; + if self.iter_count >= CANCELLATION_CHECK_INTERVAL { + self.iter_count = 0; + if cancellation_flag.load(Ordering::Relaxed) != 0 { + return Some(Err(Error::Cancelled)); + } + } + } - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: &Node<'a>, - slice: RopeSlice<'a>, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - let capture = capture_names - .iter() - .find_map(|cap| self.query.get_capture(cap))?; + // If none of the layers have any more highlight boundaries, terminate. + if self.layers.is_empty() { + let len = self.source.len_bytes(); + return if self.byte_offset < len { + let result = Some(Ok(HighlightEvent::Source { + start: self.byte_offset, + end: len, + })); + self.byte_offset = len; + result + } else { + None + }; + } - let mut cursor = InactiveQueryCursor::new(0..u32::MAX, TREE_SITTER_MATCH_LIMIT) - .execute_query(&self.query, node, RopeInput::new(slice)); - let capture_node = iter::from_fn(move || { - let (mat, _) = cursor.next_matched_node()?; - Some(mat.nodes_for_capture(capture).cloned().collect()) - }) - .filter_map(move |nodes: Vec<_>| { - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) + // Get the next capture from whichever layer has the earliest highlight boundary. + let range; + let layer = &mut self.layers[0]; + if let Some((next_match, capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*capture_index]; + range = next_capture.node.byte_range(); + + // If any previous highlight ends before this node starts, then before + // processing this capture, emit the source code up until the end of the + // previous highlight, and an end event for that highlight. + if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if end_byte <= range.start { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } + } + } + // If there are no more captures, then emit any remaining highlight end events. + // And if there are none of those, then just advance to the end of the document. + else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); } else { - nodes.into_iter().map(CapturedNode::Single).next() + // return self.emit_event(self.source.len(), None); + return None; + }; + + let (mut match_, capture_index) = layer.captures.next().unwrap(); + let mut capture = match_.captures[capture_index]; + + // If this capture represents an injection, then process the injection. + if match_.pattern_index < layer.config.locals_pattern_index { + let (language_name, content_node, include_children) = + injection_for_match(layer.config, &layer.config.query, &match_, self.source); + + // Explicitly remove this match so that none of its other captures will remain + // in the stream of captures. + match_.remove(); + + // If a language is found with the given name, then add a new language layer + // to the highlighted document. + if let (Some(language_name), Some(content_node)) = (language_name, content_node) { + if let Some(config) = (self.injection_callback)(&language_name) { + let ranges = HighlightIterLayer::intersect_ranges( + &self.layers[0].ranges, + &[content_node], + include_children, + ); + if !ranges.is_empty() { + match HighlightIterLayer::new( + self.source, + self.cancellation_flag, + &mut self.injection_callback, + config, + self.layers[0].depth + 1, + ranges, + ) { + Ok(layers) => { + for layer in layers { + self.insert_layer(layer); + } + } + Err(e) => return Some(Err(e)), + } + } + } + } + + self.sort_layers(); + continue 'main; } - }); - Some(capture_node) - } -} -#[derive(Debug)] -pub struct TagQuery { - pub query: Query, -} + // Remove from the local scope stack any local scopes that have already ended. + while range.start > layer.scope_stack.last().unwrap().range.end { + layer.scope_stack.pop(); + } -pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result { - if node.child_count() == 0 { - if node_is_visible(&node) { - write!(fmt, "({})", node.kind()) - } else { - write!(fmt, "\"{}\"", format_anonymous_node_kind(node.kind())) - } - } else { - pretty_print_tree_impl(fmt, &mut node.walk(), 0) - } -} + // If this capture is for tracking local variables, then process the + // local variable info. + let mut reference_highlight = None; + let mut definition_highlight = None; + while match_.pattern_index < layer.config.highlights_pattern_index { + // If the node represents a local scope, push a new local scope onto + // the scope stack. + if Some(capture.index) == layer.config.local_scope_capture_index { + definition_highlight = None; + let mut scope = LocalScope { + inherits: true, + range: range.clone(), + local_defs: Vec::new(), + }; + for prop in layer.config.query.property_settings(match_.pattern_index) { + if let "local.scope-inherits" = prop.key.as_ref() { + scope.inherits = + prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); + } + } + layer.scope_stack.push(scope); + } + // If the node represents a definition, add a new definition to the + // local scope at the top of the scope stack. + else if Some(capture.index) == layer.config.local_def_capture_index { + reference_highlight = None; + let scope = layer.scope_stack.last_mut().unwrap(); + + let mut value_range = 0..0; + for capture in match_.captures { + if Some(capture.index) == layer.config.local_def_value_capture_index { + value_range = capture.node.byte_range(); + } + } -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id())) -} + let name = byte_range_to_str(range.clone(), self.source); + scope.local_defs.push(LocalDef { + name, + value_range, + highlight: None, + }); + definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); + } + // If the node represents a reference, then try to find the corresponding + // definition in the scope stack. + else if Some(capture.index) == layer.config.local_ref_capture_index + && definition_highlight.is_none() + { + definition_highlight = None; + let name = byte_range_to_str(range.clone(), self.source); + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { + if def.name == name && range.start >= def.value_range.end { + Some(def.highlight) + } else { + None + } + }) { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; + } + } + } -fn format_anonymous_node_kind(kind: &str) -> Cow<'_, str> { - if kind.contains('"') { - Cow::Owned(kind.replace('"', "\\\"")) - } else { - Cow::Borrowed(kind) - } -} + // Continue processing any additional matches for the same node. + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } + } -fn pretty_print_tree_impl<W: fmt::Write>( - fmt: &mut W, - cursor: &mut tree_sitter::TreeCursor, - depth: usize, -) -> fmt::Result { - let node = cursor.node(); - let visible = node_is_visible(&node); + self.sort_layers(); + continue 'main; + } - if visible { - let indentation_columns = depth * 2; - write!(fmt, "{:indentation_columns$}", "")?; + // Otherwise, this capture must represent a highlight. + // If this exact range has already been highlighted by an earlier pattern, or by + // a different layer, then skip over this one. + if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { + if range.start == last_start && range.end == last_end && layer.depth < last_depth { + self.sort_layers(); + continue 'main; + } + } - if let Some(field_name) = cursor.field_name() { - write!(fmt, "{}: ", field_name)?; - } + // If the current node was found to be a local variable, then skip over any + // highlighting patterns that are disabled for local variables. + if definition_highlight.is_some() || reference_highlight.is_some() { + while layer.config.non_local_variable_patterns[match_.pattern_index] { + if let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + capture = next_capture; + match_ = layer.captures.next().unwrap().0; + continue; + } + } - write!(fmt, "({}", node.kind())?; - } else { - write!(fmt, " \"{}\"", format_anonymous_node_kind(node.kind()))?; - } + self.sort_layers(); + continue 'main; + } + } - // Handle children. - if cursor.goto_first_child() { - loop { - if node_is_visible(&cursor.node()) { - fmt.write_char('\n')?; + // Once a highlighting pattern is found for the current node, skip over + // any later highlighting patterns that also match this node. Captures + // for a given node are ordered by pattern index, so these subsequent + // captures are guaranteed to be for highlighting, not injections or + // local variables. + while let Some((next_match, next_capture_index)) = layer.captures.peek() { + let next_capture = next_match.captures[*next_capture_index]; + if next_capture.node == capture.node { + layer.captures.next(); + } else { + break; + } } - pretty_print_tree_impl(fmt, cursor, depth + 1)?; + let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; - if !cursor.goto_next_sibling() { - break; + // If this node represents a local definition, then store the current + // highlight value on the local scope entry representing this node. + if let Some(definition_highlight) = definition_highlight { + *definition_highlight = current_highlight; } + + // Emit a scope start event and push the node's end position to the stack. + if let Some(highlight) = reference_highlight.or(current_highlight) { + self.last_highlight_range = Some((range.start, range.end, layer.depth)); + layer.highlight_end_stack.push(range.end); + return self + .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + } + + self.sort_layers(); } + } +} - let moved = cursor.goto_parent(); - // The parent of the first child must exist, and must be `node`. - debug_assert!(moved); - debug_assert!(cursor.node() == node); +fn injection_for_match<'a>( + config: &HighlightConfiguration, + query: &'a Query, + query_match: &QueryMatch<'a, 'a>, + source: RopeSlice<'a>, +) -> (Option<Cow<'a, str>>, Option<Node<'a>>, bool) { + let content_capture_index = config.injection_content_capture_index; + let language_capture_index = config.injection_language_capture_index; + + let mut language_name = None; + let mut content_node = None; + for capture in query_match.captures { + let index = Some(capture.index); + if index == language_capture_index { + let name = byte_range_to_str(capture.node.byte_range(), source); + language_name = Some(name); + } else if index == content_capture_index { + content_node = Some(capture.node); + } } - if visible { - fmt.write_char(')')?; + let mut include_children = false; + for prop in query.property_settings(query_match.pattern_index) { + match prop.key.as_ref() { + // In addition to specifying the language name via the text of a + // captured node, it can also be hard-coded via a `#set!` predicate + // that sets the injection.language key. + "injection.language" => { + if language_name.is_none() { + language_name = prop.value.as_ref().map(|s| s.as_ref().into()) + } + } + + // By default, injections do not include the *children* of an + // `injection.content` node - only the ranges that belong to the + // node itself. This can be changed using a `#set!` predicate that + // sets the `injection.include-children` key. + "injection.include-children" => include_children = true, + _ => {} + } } - Ok(()) + (language_name, content_node, include_children) } -/// Finds the child of `node` which contains the given byte range. -pub fn child_for_byte_range<'a>(node: &Node<'a>, range: ops::Range<u32>) -> Option<Node<'a>> { - for child in node.children() { - let child_range = child.byte_range(); +// fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) { +// if vec.len() > capacity { +// vec.truncate(capacity); +// vec.shrink_to_fit(); +// } +// vec.clear(); +// } - if range.start >= child_range.start && range.end <= child_range.end { - return Some(child); - } - } +pub struct Merge<I> { + iter: I, + spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>, + + next_event: Option<HighlightEvent>, + next_span: Option<(usize, std::ops::Range<usize>)>, - None + queue: Vec<HighlightEvent>, } -#[derive(Debug)] -pub struct RainbowQuery { - query: Query, - include_children_patterns: HashSet<Pattern>, - scope_capture: Option<Capture>, - bracket_capture: Option<Capture>, +/// Merge a list of spans into the highlight event stream. +pub fn merge<I: Iterator<Item = HighlightEvent>>( + iter: I, + spans: Vec<(usize, std::ops::Range<usize>)>, +) -> Merge<I> { + let spans = Box::new(spans.into_iter()); + let mut merge = Merge { + iter, + spans, + next_event: None, + next_span: None, + queue: Vec::new(), + }; + merge.next_event = merge.iter.next(); + merge.next_span = merge.spans.next(); + merge } -impl RainbowQuery { - fn new(grammar: Grammar, source: &str) -> Result<Self, tree_sitter::query::ParseError> { - let mut include_children_patterns = HashSet::default(); - - let query = Query::new(grammar, source, |pattern, predicate| match predicate { - UserPredicate::SetProperty { - key: "rainbow.include-children", - val, - } => { - if val.is_some() { - return Err( - "property 'rainbow.include-children' does not take an argument".into(), - ); +impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> { + type Item = HighlightEvent; + fn next(&mut self) -> Option<Self::Item> { + use HighlightEvent::*; + if let Some(event) = self.queue.pop() { + return Some(event); + } + + loop { + match (self.next_event, &self.next_span) { + // this happens when range is partially or fully offscreen + (Some(Source { start, .. }), Some((span, range))) if start > range.start => { + if start > range.end { + self.next_span = self.spans.next(); + } else { + self.next_span = Some((*span, start..range.end)); + }; } - include_children_patterns.insert(pattern); - Ok(()) + _ => break, + } + } + + match (self.next_event, &self.next_span) { + (Some(HighlightStart(i)), _) => { + self.next_event = self.iter.next(); + Some(HighlightStart(i)) } - _ => Err(InvalidPredicateError::unknown(predicate)), - })?; + (Some(HighlightEnd), _) => { + self.next_event = self.iter.next(); + Some(HighlightEnd) + } + (Some(Source { start, end }), Some((_, range))) if start < range.start => { + let intersect = range.start.min(end); + let event = Source { + start, + end: intersect, + }; - Ok(Self { - include_children_patterns, - scope_capture: query.get_capture("rainbow.scope"), - bracket_capture: query.get_capture("rainbow.bracket"), - query, - }) + if end == intersect { + // the event is complete + self.next_event = self.iter.next(); + } else { + // subslice the event + self.next_event = Some(Source { + start: intersect, + end, + }); + }; + + Some(event) + } + (Some(Source { start, end }), Some((span, range))) if start == range.start => { + let intersect = range.end.min(end); + let event = HighlightStart(Highlight(*span)); + + // enqueue in reverse order + self.queue.push(HighlightEnd); + self.queue.push(Source { + start, + end: intersect, + }); + + if end == intersect { + // the event is complete + self.next_event = self.iter.next(); + } else { + // subslice the event + self.next_event = Some(Source { + start: intersect, + end, + }); + }; + + if intersect == range.end { + self.next_span = self.spans.next(); + } else { + self.next_span = Some((*span, intersect..range.end)); + } + + Some(event) + } + (Some(event), None) => { + self.next_event = self.iter.next(); + Some(event) + } + // Can happen if cursor at EOF and/or diagnostic reaches past the end. + // We need to actually emit events for the cursor-at-EOF situation, + // even though the range is past the end of the text. This needs to be + // handled appropriately by the drawing code by not assuming that + // all `Source` events point to valid indices in the rope. + (None, Some((span, range))) => { + let event = HighlightStart(Highlight(*span)); + self.queue.push(HighlightEnd); + self.queue.push(Source { + start: range.start, + end: range.end, + }); + self.next_span = self.spans.next(); + Some(event) + } + (None, None) => None, + e => unreachable!("{:?}", e), + } } } #[cfg(test)] mod test { - use once_cell::sync::Lazy; - use super::*; use crate::{Rope, Transaction}; - static LOADER: Lazy<Loader> = Lazy::new(crate::config::default_lang_loader); - #[test] - fn test_textobject_queries() { - let query_str = r#" - (line_comment)+ @quantified_nodes - ((line_comment)+) @quantified_nodes_grouped - ((line_comment) (line_comment)) @multiple_nodes_grouped - "#; - let source = Rope::from_str( - r#" -/// a comment on -/// multiple lines - "#, - ); + fn test_parser() { + let highlight_names: Vec<String> = [ + "attribute", + "constant", + "function.builtin", + "function", + "keyword", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", + ] + .iter() + .cloned() + .map(String::from) + .collect(); + + let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); + let config = HighlightConfiguration::new( + language, + &std::fs::read_to_string( + "../helix-syntax/languages/tree-sitter-rust/queries/highlights.scm", + ) + .unwrap(), + &std::fs::read_to_string( + "../helix-syntax/languages/tree-sitter-rust/queries/injections.scm", + ) + .unwrap(), + "", // locals.scm + ) + .unwrap(); + config.configure(&highlight_names); - let language = LOADER.language_for_name("rust").unwrap(); - dbg!(language); - let grammar = LOADER.get_config(language).unwrap().grammar; - dbg!(grammar); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); - let mut h = syntax.highlighter( - "fn main() { 4 + 2; }".into(), - &LOADER, - 0.."fn main() { 4 + 2; }".len() as u32, + let source = Rope::from_str( + " + struct Stuff {} + fn main() {} + ", ); + let syntax = Syntax::new(&source, Arc::new(config)); + let tree = syntax.tree(); + let root = tree.root_node(); + assert_eq!(root.kind(), "source_file"); - for n in 0..5 { - dbg!(h.active_highlights().collect::<Vec<_>>()); - dbg!(h.next_event_offset()); - let (e, h) = h.advance(); - dbg!(h.collect::<Vec<_>>(), e); - // panic!() - } - - let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap(); - let textobject = TextObjectQuery::new(query); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); - - let root = syntax.tree().root_node(); - let test = |capture, range| { - let matches: Vec<_> = textobject - .capture_nodes(capture, &root, source.slice(..)) - .unwrap() - .collect(); - - assert_eq!( - matches[0].byte_range(), - range, - "@{} expected {:?}", - capture, - range + assert_eq!( + root.to_sexp(), + concat!( + "(source_file ", + "(struct_item name: (type_identifier) body: (field_declaration_list)) ", + "(function_item name: (identifier) parameters: (parameters) body: (block)))" ) - }; + ); - test("quantified_nodes", 1..37); - // NOTE: Enable after implementing proper node group capturing - // test("quantified_nodes_grouped", 1..37); - // test("multiple_nodes_grouped", 1..37); + let struct_node = root.child(0).unwrap(); + assert_eq!(struct_node.kind(), "struct_item"); } #[test] fn test_input_edits() { - use tree_sitter::{InputEdit, Point}; + use tree_sitter::InputEdit; let doc = Rope::from("hello world!\ntest 123"); let transaction = Transaction::change( &doc, vec![(6, 11, Some("test".into())), (12, 17, None)].into_iter(), ); - let edits = generate_edits(doc.slice(..), transaction.changes()); + let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); // transaction.apply(&mut state); assert_eq!( @@ -1266,17 +1902,17 @@ mod test { start_byte: 6, old_end_byte: 11, new_end_byte: 10, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 6 }, + old_end_position: Point { row: 0, column: 11 }, + new_end_position: Point { row: 0, column: 10 } }, InputEdit { start_byte: 12, old_end_byte: 17, new_end_byte: 12, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 12 }, + old_end_position: Point { row: 1, column: 4 }, + new_end_position: Point { row: 0, column: 12 } } ] ); @@ -1285,7 +1921,7 @@ mod test { let mut doc = Rope::from("fn test() {}"); let transaction = Transaction::change(&doc, vec![(8, 8, Some("a: u32".into()))].into_iter()); - let edits = generate_edits(doc.slice(..), transaction.changes()); + let edits = LanguageLayer::generate_edits(doc.slice(..), transaction.changes()); transaction.apply(&mut doc); assert_eq!(doc, "fn test(a: u32) {}"); @@ -1295,114 +1931,20 @@ mod test { start_byte: 8, old_end_byte: 8, new_end_byte: 14, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 8 }, + old_end_position: Point { row: 0, column: 8 }, + new_end_position: Point { row: 0, column: 14 } }] ); } - #[track_caller] - fn assert_pretty_print( - language_name: &str, - source: &str, - expected: &str, - start: usize, - end: usize, - ) { - let source = Rope::from_str(source); - let language = LOADER.language_for_name(language_name).unwrap(); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); - - let root = syntax - .tree() - .root_node() - .descendant_for_byte_range(start as u32, end as u32) - .unwrap(); - - let mut output = String::new(); - pretty_print_tree(&mut output, root).unwrap(); - - assert_eq!(expected, output); - } - #[test] - fn test_pretty_print() { - let source = r#"// Hello"#; - assert_pretty_print("rust", source, "(line_comment \"//\")", 0, source.len()); - - // A large tree should be indented with fields: - let source = r#"fn main() { - println!("Hello, World!"); - }"#; - assert_pretty_print( - "rust", - source, - concat!( - "(function_item \"fn\"\n", - " name: (identifier)\n", - " parameters: (parameters \"(\" \")\")\n", - " body: (block \"{\"\n", - " (expression_statement\n", - " (macro_invocation\n", - " macro: (identifier) \"!\"\n", - " (token_tree \"(\"\n", - " (string_literal \"\\\"\"\n", - " (string_content) \"\\\"\") \")\")) \";\") \"}\"))", - ), - 0, - source.len(), - ); + fn test_load_runtime_file() { + // Test to make sure we can load some data from the runtime directory. + let contents = load_runtime_file("rust", "indents.toml").unwrap(); + assert!(!contents.is_empty()); - // Selecting a token should print just that token: - let source = r#"fn main() {}"#; - assert_pretty_print("rust", source, r#""fn""#, 0, 1); - - // Error nodes are printed as errors: - let source = r#"}{"#; - assert_pretty_print("rust", source, "(ERROR \"}\" \"{\")", 0, source.len()); - - // Fields broken under unnamed nodes are determined correctly. - // In the following source, `object` belongs to the `singleton_method` - // rule but `name` and `body` belong to an unnamed helper `_method_rest`. - // This can cause a bug with a pretty-printing implementation that - // uses `Node::field_name_for_child` to determine field names but is - // fixed when using `tree_sitter::TreeCursor::field_name`. - let source = "def self.method_name - true - end"; - assert_pretty_print( - "ruby", - source, - concat!( - "(singleton_method \"def\"\n", - " object: (self) \".\"\n", - " name: (identifier)\n", - " body: (body_statement\n", - " (true)) \"end\")" - ), - 0, - source.len(), - ); - } - #[test] - fn highlight() { - let source = Rope::from_str(r#"assert_eq!(0, Some(0));"#); - let loader = crate::config::default_lang_loader(); - loader.set_scopes(vec!["punctuation".to_string()]); - let language = loader.language_for_name("rust").unwrap(); - - let syntax = Syntax::new(source.slice(..), language, &loader).unwrap(); - println!( - "{}", - tree_house::fixtures::highlighter_fixture( - "", - &loader, - |_| "punct".to_string(), - &syntax.inner, - source.slice(..), - .., - ) - ); + let results = load_runtime_file("rust", "does-not-exist"); + assert!(results.is_err()); } } |