Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/syntax.rs')
| -rw-r--r-- | helix-core/src/syntax.rs | 1979 |
1 files changed, 934 insertions, 1045 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 4bc177ef..e2d862f4 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,474 +1,737 @@ -pub mod config; - -use std::{ - borrow::Cow, - collections::HashMap, - fmt, iter, - ops::{self, RangeBounds}, - path::Path, - sync::Arc, - time::Duration, -}; +use std::borrow::Cow; +use std::fmt::{self, Display}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; -use anyhow::{Context, Result}; use arc_swap::{ArcSwap, Guard}; -use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; -use foldhash::HashSet; -use helix_loader::grammar::get_language; -use helix_stdx::rope::RopeSliceExt as _; -use once_cell::sync::OnceCell; -use ropey::RopeSlice; -use tree_house::{ - highlighter, - query_iter::QueryIter, - tree_sitter::{ - query::{InvalidPredicateError, UserPredicate}, - Capture, Grammar, InactiveQueryCursor, InputEdit, Node, Pattern, Query, RopeInput, Tree, - }, - Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, +use globset::GlobSet; +pub use helix_syntax::highlighter::{Highlight, HighlightEvent}; +pub use helix_syntax::{ + merge, pretty_print_tree, HighlightConfiguration, InjectionLanguageMarker, RopeProvider, + TextObjectQuery, TreeCursor, }; +pub use helix_syntax::{with_cursor, Syntax}; +use once_cell::sync::{Lazy, OnceCell}; +use regex::Regex; +use ropey::RopeSlice; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use tree_sitter::{Point, Query}; + +use crate::auto_pairs::AutoPairs; +use crate::chars::char_is_line_ending; +use crate::diagnostic::Severity; +use crate::{ChangeSet, Operation, Tendril}; + +use helix_loader::grammar::{get_language, load_runtime_file}; + +fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::<String>::deserialize(deserializer)? + .map(|buf| Regex::new(&buf).map_err(serde::de::Error::custom)) + .transpose() +} -use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language}; +fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::<toml::Value>::deserialize(deserializer)? + .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) + .transpose() +} -pub use tree_house::{ - highlighter::{Highlight, HighlightEvent}, - query_iter::QueryIterEvent, - Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT, -}; +fn deserialize_tab_width<'de, D>(deserializer: D) -> Result<usize, D::Error> +where + D: serde::Deserializer<'de>, +{ + usize::deserialize(deserializer).and_then(|n| { + if n > 0 && n <= 16 { + Ok(n) + } else { + Err(serde::de::Error::custom( + "tab width must be a value from 1 to 16 inclusive", + )) + } + }) +} -#[derive(Debug)] -pub struct LanguageData { - config: Arc<LanguageConfiguration>, - syntax: OnceCell<Option<SyntaxConfig>>, - indent_query: OnceCell<Option<IndentQuery>>, - textobject_query: OnceCell<Option<TextObjectQuery>>, - tag_query: OnceCell<Option<TagQuery>>, - rainbow_query: OnceCell<Option<RainbowQuery>>, +pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result<Option<AutoPairs>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Ok(Option::<AutoPairConfig>::deserialize(deserializer)?.and_then(AutoPairConfig::into)) } -impl LanguageData { - fn new(config: LanguageConfiguration) -> Self { - Self { - config: Arc::new(config), - syntax: OnceCell::new(), - indent_query: OnceCell::new(), - textobject_query: OnceCell::new(), - tag_query: OnceCell::new(), - rainbow_query: OnceCell::new(), - } - } +fn default_timeout() -> u64 { + 20 +} - pub fn config(&self) -> &Arc<LanguageConfiguration> { - &self.config - } +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct Configuration { + pub language: Vec<LanguageConfiguration>, + #[serde(default)] + pub language_server: HashMap<String, LanguageServerConfiguration>, +} - /// Loads the grammar and compiles the highlights, injections and locals for the language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_syntax_config( - config: &LanguageConfiguration, - loader: &Loader, - ) -> Result<Option<SyntaxConfig>> { - let name = &config.language_id; - let parser_name = config.grammar.as_deref().unwrap_or(name); - let Some(grammar) = get_language(parser_name)? else { - log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist"); - return Ok(None); - }; - let highlight_query_text = read_query(name, "highlights.scm"); - let injection_query_text = read_query(name, "injections.scm"); - let local_query_text = read_query(name, "locals.scm"); - let config = SyntaxConfig::new( - grammar, - &highlight_query_text, - &injection_query_text, - &local_query_text, - ) - .with_context(|| format!("Failed to compile highlights for '{name}'"))?; +// largely based on tree-sitter/cli/src/loader.rs +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct LanguageConfiguration { + #[serde(rename = "name")] + pub language_id: String, // c-sharp, rust, tsx + #[serde(rename = "language-id")] + // see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem + pub language_server_language_id: Option<String>, // csharp, rust, typescriptreact, for the language-server + pub scope: String, // source.rust + pub file_types: Vec<FileType>, // filename extension or ends_with? <Gemfile, rb, etc> + #[serde(default)] + pub shebangs: Vec<String>, // interpreter(s) associated with language + #[serde(default)] + pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml> + #[serde( + default, + skip_serializing, + deserialize_with = "from_comment_tokens", + alias = "comment-token" + )] + pub comment_tokens: Option<Vec<String>>, + #[serde( + default, + skip_serializing, + deserialize_with = "from_block_comment_tokens" + )] + pub block_comment_tokens: Option<Vec<BlockCommentToken>>, + pub text_width: Option<usize>, + pub soft_wrap: Option<SoftWrap>, + + #[serde(default)] + pub auto_format: bool, + + #[serde(skip_serializing_if = "Option::is_none")] + pub formatter: Option<FormatterConfiguration>, + + #[serde(default)] + pub diagnostic_severity: Severity, + + pub grammar: Option<String>, // tree-sitter grammar name, defaults to language_id + + // content_regex + #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] + pub injection_regex: Option<Regex>, + // first_line_regex + // + #[serde(skip)] + pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>, + // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + serialize_with = "serialize_lang_features", + deserialize_with = "deserialize_lang_features" + )] + pub language_servers: Vec<LanguageServerFeatures>, + #[serde(skip_serializing_if = "Option::is_none")] + pub indent: Option<IndentationConfiguration>, + + #[serde(skip)] + pub(crate) indent_query: OnceCell<Option<Query>>, + #[serde(skip)] + pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub debugger: Option<DebugAdapterConfig>, + + /// Automatic insertion of pairs to parentheses, brackets, + /// etc. Defaults to true. Optionally, this can be a list of 2-tuples + /// to specify a list of characters to pair. This overrides the + /// global setting. + #[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")] + pub auto_pairs: Option<AutoPairs>, + + pub rulers: Option<Vec<u16>>, // if set, override editor's rulers + + /// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`. + /// Falling back to the current working directory if none are configured. + pub workspace_lsp_roots: Option<Vec<PathBuf>>, + #[serde(default)] + pub persistent_diagnostic_sources: Vec<String>, +} - reconfigure_highlights(&config, &loader.scopes()); +fn read_query(language: &str, filename: &str) -> String { + helix_syntax::read_query(language, filename, |lang, filename| { + load_runtime_file(lang, filename).unwrap_or_default() + }) +} +impl LanguageConfiguration { + fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { + let highlights_query = read_query(&self.language_id, "highlights.scm"); + // always highlight syntax errors + // highlights_query += "\n(ERROR) @error"; - Ok(Some(config)) - } + let injections_query = read_query(&self.language_id, "injections.scm"); + let locals_query = read_query(&self.language_id, "locals.scm"); - pub fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> { - self.syntax - .get_or_init(|| { - Self::compile_syntax_config(&self.config, loader) - .map_err(|err| { - log::error!("{err:#}"); - }) - .ok() - .flatten() - }) - .as_ref() + if highlights_query.is_empty() { + None + } else { + let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) + .map_err(|err| { + log::error!( + "Failed to load tree-sitter parser for language {:?}: {}", + self.language_id, + err + ) + }) + .ok()?; + let config = HighlightConfiguration::new( + language, + &highlights_query, + &injections_query, + &locals_query, + ) + .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) + .ok()?; + + config.configure(scopes); + Some(Arc::new(config)) + } } - /// Compiles the indents.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_indent_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<IndentQuery>> { - let name = &config.language_id; - let text = read_query(name, "indents.scm"); - if text.is_empty() { - return Ok(None); + pub fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.highlight_config.get() { + config.configure(scopes); } - let indent_query = IndentQuery::new(grammar, &text) - .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?; - Ok(Some(indent_query)) } - fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> { - self.indent_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_indent_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() - }) - .as_ref() + pub fn get_highlight_config(&self) -> Option<Arc<HighlightConfiguration>> { + self.highlight_config.get().cloned().flatten() } - /// Compiles the textobjects.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_textobject_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<TextObjectQuery>> { - let name = &config.language_id; - let text = read_query(name, "textobjects.scm"); - if text.is_empty() { - return Ok(None); - } - let query = Query::new(grammar, &text, |_, _| Ok(())) - .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?; - Ok(Some(TextObjectQuery::new(query))) + pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> { + self.highlight_config + .get_or_init(|| self.initialize_highlight(scopes)) + .clone() } - fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> { - self.textobject_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_textobject_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() - }) - .as_ref() + pub fn is_highlight_initialized(&self) -> bool { + self.highlight_config.get().is_some() } - /// Compiles the tags.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_tag_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<TagQuery>> { - let name = &config.language_id; - let text = read_query(name, "tags.scm"); - if text.is_empty() { - return Ok(None); - } - let query = Query::new(grammar, &text, |_pattern, predicate| match predicate { - // TODO: these predicates are allowed in tags.scm queries but not yet used. - UserPredicate::IsPropertySet { key: "local", .. } => Ok(()), - UserPredicate::Other(pred) => match pred.name() { - "strip!" | "select-adjacent!" => Ok(()), - _ => Err(InvalidPredicateError::unknown(predicate)), - }, - _ => Err(InvalidPredicateError::unknown(predicate)), - }) - .with_context(|| format!("Failed to compile tags.scm query for '{name}'"))?; - Ok(Some(TagQuery { query })) + pub fn indent_query(&self) -> Option<&Query> { + self.indent_query + .get_or_init(|| self.load_query("indents.scm")) + .as_ref() } - fn tag_query(&self, loader: &Loader) -> Option<&TagQuery> { - self.tag_query + pub fn textobject_query(&self) -> Option<&TextObjectQuery> { + self.textobject_query .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_tag_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() + self.load_query("textobjects.scm") + .map(|query| TextObjectQuery { query }) }) .as_ref() } - /// Compiles the rainbows.scm query for a language. - /// This function should only be used by this module or the xtask crate. - pub fn compile_rainbow_query( - grammar: Grammar, - config: &LanguageConfiguration, - ) -> Result<Option<RainbowQuery>> { - let name = &config.language_id; - let text = read_query(name, "rainbows.scm"); - if text.is_empty() { - return Ok(None); - } - let rainbow_query = RainbowQuery::new(grammar, &text) - .with_context(|| format!("Failed to compile rainbows.scm query for '{name}'"))?; - Ok(Some(rainbow_query)) + pub fn scope(&self) -> &str { + &self.scope } - fn rainbow_query(&self, loader: &Loader) -> Option<&RainbowQuery> { - self.rainbow_query - .get_or_init(|| { - let grammar = self.syntax_config(loader)?.grammar; - Self::compile_rainbow_query(grammar, &self.config) - .map_err(|err| { - log::error!("{err}"); - }) - .ok() - .flatten() + fn load_query(&self, kind: &str) -> Option<Query> { + let query_text = read_query(&self.language_id, kind); + if query_text.is_empty() { + return None; + } + let lang = &self.highlight_config.get()?.as_ref()?.grammar; + Query::new(lang, &query_text) + .map_err(|e| { + log::error!( + "Failed to parse {} queries for {}: {}", + kind, + self.language_id, + e + ) }) - .as_ref() + .ok() } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum FileType { + /// The extension of the file, either the `Path::extension` or the full + /// filename if the file does not have an extension. + Extension(String), + /// A Unix-style path glob. This is compared to the file's absolute path, so + /// it can be used to detect files based on their directories. If the glob + /// is not an absolute path and does not already start with a glob pattern, + /// a glob pattern will be prepended to it. + Glob(globset::Glob), +} - fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.syntax.get() { - reconfigure_highlights(config, scopes); +impl Serialize for FileType { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + use serde::ser::SerializeMap; + + match self { + FileType::Extension(extension) => serializer.serialize_str(extension), + FileType::Glob(glob) => { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry("glob", glob.glob())?; + map.end() + } } } } -pub fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) { - config.configure(move |capture_name| { - let capture_parts: Vec<_> = capture_name.split('.').collect(); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; +impl<'de> Deserialize<'de> for FileType { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::de::Deserializer<'de>, + { + struct FileTypeVisitor; + + impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { + type Value = FileType; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("string or table") + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(FileType::Extension(value.to_string())) + } + + fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error> + where + M: serde::de::MapAccess<'de>, + { + match map.next_entry::<String, String>()? { + Some((key, mut glob)) if key == "glob" => { + // If the glob isn't an absolute path or already starts + // with a glob pattern, add a leading glob so we + // properly match relative paths. + if !glob.starts_with('/') && !glob.starts_with("*/") { + glob.insert_str(0, "*/"); + } + + globset::Glob::new(glob.as_str()) + .map(FileType::Glob) + .map_err(|err| { + serde::de::Error::custom(format!("invalid `glob` pattern: {}", err)) + }) } + Some((key, _value)) => Err(serde::de::Error::custom(format!( + "unknown key in `file-types` list: {}", + key + ))), + None => Err(serde::de::Error::custom( + "expected a `suffix` key in the `file-types` entry", + )), } } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } } - best_index.map(|idx| Highlight::new(idx as u32)) - }); -} -pub fn read_query(lang: &str, query_filename: &str) -> String { - tree_house::read_query(lang, |language| { - helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default() - }) + deserializer.deserialize_any(FileTypeVisitor) + } } -#[derive(Debug, Default)] -pub struct Loader { - languages: Vec<LanguageData>, - languages_by_extension: HashMap<String, Language>, - languages_by_shebang: HashMap<String, Language>, - languages_glob_matcher: FileTypeGlobMatcher, - language_server_configs: HashMap<String, LanguageServerConfiguration>, - scopes: ArcSwap<Vec<String>>, +fn from_comment_tokens<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum CommentTokens { + Multiple(Vec<String>), + Single(String), + } + Ok( + Option::<CommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens { + CommentTokens::Single(val) => vec![val], + CommentTokens::Multiple(vals) => vals, + }), + ) } -pub type LoaderError = globset::Error; - -impl Loader { - pub fn new(config: Configuration) -> Result<Self, LoaderError> { - let mut languages = Vec::with_capacity(config.language.len()); - let mut languages_by_extension = HashMap::new(); - let mut languages_by_shebang = HashMap::new(); - let mut file_type_globs = Vec::new(); +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct BlockCommentToken { + pub start: String, + pub end: String, +} - for mut config in config.language { - let language = Language(languages.len() as u32); - config.language = Some(language); +impl Default for BlockCommentToken { + fn default() -> Self { + BlockCommentToken { + start: "/*".to_string(), + end: "*/".to_string(), + } + } +} - for file_type in &config.file_types { - match file_type { - FileType::Extension(extension) => { - languages_by_extension.insert(extension.clone(), language); - } - FileType::Glob(glob) => { - file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language)); - } - }; - } - for shebang in &config.shebangs { - languages_by_shebang.insert(shebang.clone(), language); - } +fn from_block_comment_tokens<'de, D>( + deserializer: D, +) -> Result<Option<Vec<BlockCommentToken>>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum BlockCommentTokens { + Multiple(Vec<BlockCommentToken>), + Single(BlockCommentToken), + } + Ok( + Option::<BlockCommentTokens>::deserialize(deserializer)?.map(|tokens| match tokens { + BlockCommentTokens::Single(val) => vec![val], + BlockCommentTokens::Multiple(vals) => vals, + }), + ) +} - languages.push(LanguageData::new(config)); - } +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "kebab-case")] +pub enum LanguageServerFeature { + Format, + GotoDeclaration, + GotoDefinition, + GotoTypeDefinition, + GotoReference, + GotoImplementation, + // Goto, use bitflags, combining previous Goto members? + SignatureHelp, + Hover, + DocumentHighlight, + Completion, + CodeAction, + WorkspaceCommand, + DocumentSymbols, + WorkspaceSymbols, + // Symbols, use bitflags, see above? + Diagnostics, + RenameSymbol, + InlayHints, +} - Ok(Self { - languages, - languages_by_extension, - languages_by_shebang, - languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, - language_server_configs: config.language_server, - scopes: ArcSwap::from_pointee(Vec::new()), - }) +impl Display for LanguageServerFeature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LanguageServerFeature::*; + let feature = match self { + Format => "format", + GotoDeclaration => "goto-declaration", + GotoDefinition => "goto-definition", + GotoTypeDefinition => "goto-type-definition", + GotoReference => "goto-reference", + GotoImplementation => "goto-implementation", + SignatureHelp => "signature-help", + Hover => "hover", + DocumentHighlight => "document-highlight", + Completion => "completion", + CodeAction => "code-action", + WorkspaceCommand => "workspace-command", + DocumentSymbols => "document-symbols", + WorkspaceSymbols => "workspace-symbols", + Diagnostics => "diagnostics", + RenameSymbol => "rename-symbol", + InlayHints => "inlay-hints", + }; + write!(f, "{feature}",) } +} - pub fn languages(&self) -> impl ExactSizeIterator<Item = (Language, &LanguageData)> { - self.languages - .iter() - .enumerate() - .map(|(idx, data)| (Language(idx as u32), data)) - } +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] +enum LanguageServerFeatureConfiguration { + #[serde(rename_all = "kebab-case")] + Features { + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + only_features: HashSet<LanguageServerFeature>, + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + except_features: HashSet<LanguageServerFeature>, + name: String, + }, + Simple(String), +} - pub fn language_configs(&self) -> impl ExactSizeIterator<Item = &LanguageConfiguration> { - self.languages.iter().map(|language| &*language.config) - } +#[derive(Debug, Default)] +pub struct LanguageServerFeatures { + pub name: String, + pub only: HashSet<LanguageServerFeature>, + pub excluded: HashSet<LanguageServerFeature>, +} - pub fn language(&self, lang: Language) -> &LanguageData { - &self.languages[lang.idx()] +impl LanguageServerFeatures { + pub fn has_feature(&self, feature: LanguageServerFeature) -> bool { + (self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature) } +} - pub fn language_for_name(&self, name: impl PartialEq<String>) -> Option<Language> { - self.languages.iter().enumerate().find_map(|(idx, config)| { - (name == config.config.language_id).then_some(Language(idx as u32)) +fn deserialize_lang_features<'de, D>( + deserializer: D, +) -> Result<Vec<LanguageServerFeatures>, D::Error> +where + D: serde::Deserializer<'de>, +{ + let raw: Vec<LanguageServerFeatureConfiguration> = Deserialize::deserialize(deserializer)?; + let res = raw + .into_iter() + .map(|config| match config { + LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures { + name, + ..Default::default() + }, + LanguageServerFeatureConfiguration::Features { + only_features, + except_features, + name, + } => LanguageServerFeatures { + name, + only: only_features, + excluded: except_features, + }, }) + .collect(); + Ok(res) +} +fn serialize_lang_features<S>( + map: &Vec<LanguageServerFeatures>, + serializer: S, +) -> Result<S::Ok, S::Error> +where + S: serde::Serializer, +{ + let mut serializer = serializer.serialize_seq(Some(map.len()))?; + for features in map { + let features = if features.only.is_empty() && features.excluded.is_empty() { + LanguageServerFeatureConfiguration::Simple(features.name.to_owned()) + } else { + LanguageServerFeatureConfiguration::Features { + only_features: features.only.clone(), + except_features: features.excluded.clone(), + name: features.name.to_owned(), + } + }; + serializer.serialize_element(&features)?; } + serializer.end() +} - pub fn language_for_scope(&self, scope: &str) -> Option<Language> { - self.languages.iter().enumerate().find_map(|(idx, config)| { - (scope == config.config.scope).then_some(Language(idx as u32)) - }) +fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result<Option<GlobSet>, D::Error> +where + D: serde::Deserializer<'de>, +{ + let patterns = Vec::<String>::deserialize(deserializer)?; + if patterns.is_empty() { + return Ok(None); } + let mut builder = globset::GlobSetBuilder::new(); + for pattern in patterns { + let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?; + builder.add(glob); + } + builder.build().map(Some).map_err(serde::de::Error::custom) +} - pub fn language_for_match(&self, text: RopeSlice) -> Option<Language> { - // PERF: If the name matches up with the id, then this saves the need to do expensive regex. - let shortcircuit = self.language_for_name(text); - if shortcircuit.is_some() { - return shortcircuit; - } - - // If the name did not match up with a known id, then match on injection regex. +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct LanguageServerConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec<String>, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub environment: HashMap<String, String>, + #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] + pub config: Option<serde_json::Value>, + #[serde(default = "default_timeout")] + pub timeout: u64, + #[serde( + default, + skip_serializing, + deserialize_with = "deserialize_required_root_patterns" + )] + pub required_root_patterns: Option<GlobSet>, +} - let mut best_match_length = 0; - let mut best_match_position = None; - for (idx, data) in self.languages.iter().enumerate() { - if let Some(injection_regex) = &data.config.injection_regex { - if let Some(mat) = injection_regex.find(text.regex_input()) { - let length = mat.end() - mat.start(); - if length > best_match_length { - best_match_position = Some(idx); - best_match_length = length; - } - } - } - } +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct FormatterConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec<String>, +} - best_match_position.map(|i| Language(i as u32)) - } +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct AdvancedCompletion { + pub name: Option<String>, + pub completion: Option<String>, + pub default: Option<String>, +} - pub fn language_for_filename(&self, path: &Path) -> Option<Language> { - // Find all the language configurations that match this file name - // or a suffix of the file name. +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case", untagged)] +pub enum DebugConfigCompletion { + Named(String), + Advanced(AdvancedCompletion), +} - // TODO: content_regex handling conflict resolution - self.languages_glob_matcher - .language_for_path(path) - .or_else(|| { - path.extension() - .and_then(|extension| extension.to_str()) - .and_then(|extension| self.languages_by_extension.get(extension).copied()) - }) - } +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum DebugArgumentValue { + String(String), + Array(Vec<String>), + Boolean(bool), +} - pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> { - // NOTE: this is slightly different than the one for injection markers in tree-house. It - // is anchored at the beginning. - use helix_stdx::rope::Regex; - use once_cell::sync::Lazy; - const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap()); - - let marker = SHEBANG_REGEX - .captures_iter(regex_cursor::Input::new(text)) - .map(|cap| text.byte_slice(cap.get_group(1).unwrap().range())) - .next()?; - self.language_for_shebang_marker(marker) - } +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugTemplate { + pub name: String, + pub request: String, + #[serde(default)] + pub completion: Vec<DebugConfigCompletion>, + pub args: HashMap<String, DebugArgumentValue>, +} - fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> { - let shebang: Cow<str> = marker.into(); - self.languages_by_shebang.get(shebang.as_ref()).copied() - } +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugAdapterConfig { + pub name: String, + pub transport: String, + #[serde(default)] + pub command: String, + #[serde(default)] + pub args: Vec<String>, + pub port_arg: Option<String>, + pub templates: Vec<DebugTemplate>, + #[serde(default)] + pub quirks: DebuggerQuirks, +} - pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> { - self.language(lang).indent_query(self) - } +// Different workarounds for adapters' differences +#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub struct DebuggerQuirks { + #[serde(default)] + pub absolute_paths: bool, +} - pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> { - self.language(lang).textobject_query(self) - } +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndentationConfiguration { + #[serde(deserialize_with = "deserialize_tab_width")] + pub tab_width: usize, + pub unit: String, +} - pub fn tag_query(&self, lang: Language) -> Option<&TagQuery> { - self.language(lang).tag_query(self) - } +/// How the indentation for a newly inserted line should be determined. +/// If the selected heuristic is not available (e.g. because the current +/// language has no tree-sitter indent queries), a simpler one will be used. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum IndentationHeuristic { + /// Just copy the indentation of the line that the cursor is currently on. + Simple, + /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. + TreeSitter, + /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line + /// and the line before. Add this to the actual indentation level of the line before. + #[default] + Hybrid, +} - fn rainbow_query(&self, lang: Language) -> Option<&RainbowQuery> { - self.language(lang).rainbow_query(self) - } +/// Configuration for auto pairs +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] +pub enum AutoPairConfig { + /// Enables or disables auto pairing. False means disabled. True means to use the default pairs. + Enable(bool), - pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> { - &self.language_server_configs - } + /// The mappings of pairs. + Pairs(HashMap<char, char>), +} - pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { - self.scopes.load() +impl Default for AutoPairConfig { + fn default() -> Self { + AutoPairConfig::Enable(true) } +} - pub fn set_scopes(&self, scopes: Vec<String>) { - self.scopes.store(Arc::new(scopes)); - - // Reconfigure existing grammars - for data in &self.languages { - data.reconfigure(&self.scopes()); +impl From<&AutoPairConfig> for Option<AutoPairs> { + fn from(auto_pair_config: &AutoPairConfig) -> Self { + match auto_pair_config { + AutoPairConfig::Enable(false) => None, + AutoPairConfig::Enable(true) => Some(AutoPairs::default()), + AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())), } } } -impl LanguageLoader for Loader { - fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<Language> { - match marker { - InjectionLanguageMarker::Name(name) => self.language_for_name(name), - InjectionLanguageMarker::Match(text) => self.language_for_match(text), - InjectionLanguageMarker::Filename(text) => { - let path: Cow<str> = text.into(); - self.language_for_filename(Path::new(path.as_ref())) - } - InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text), - } +impl From<AutoPairConfig> for Option<AutoPairs> { + fn from(auto_pairs_config: AutoPairConfig) -> Self { + (&auto_pairs_config).into() } +} + +impl FromStr for AutoPairConfig { + type Err = std::str::ParseBoolError; - fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> { - self.languages[lang.idx()].syntax_config(self) + // only do bool parsing for runtime setting + fn from_str(s: &str) -> Result<Self, Self::Err> { + let enable: bool = s.parse()?; + Ok(AutoPairConfig::Enable(enable)) } } +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] +pub struct SoftWrap { + /// Soft wrap lines that exceed viewport width. Default to off + // NOTE: Option on purpose because the struct is shared between language config and global config. + // By default the option is None so that the language config falls back to the global config unless explicitly set. + pub enable: Option<bool>, + /// Maximum space left free at the end of the line. + /// This space is used to wrap text at word boundaries. If that is not possible within this limit + /// the word is simply split at the end of the line. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 20 + pub max_wrap: Option<u16>, + /// Maximum number of indentation that can be carried over from the previous line when softwrapping. + /// If a line is indented further then this limit it is rendered at the start of the viewport instead. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 40 + pub max_indent_retain: Option<u16>, + /// Indicator placed at the beginning of softwrapped lines + /// + /// Defaults to ↪ + pub wrap_indicator: Option<String>, + /// Softwrap at `text_width` instead of viewport width if it is shorter + pub wrap_at_text_width: Option<bool>, +} + #[derive(Debug)] struct FileTypeGlob { glob: globset::Glob, - language: Language, + language_id: usize, } impl FileTypeGlob { - pub fn new(glob: globset::Glob, language: Language) -> Self { - Self { glob, language } + fn new(glob: globset::Glob, language_id: usize) -> Self { + Self { glob, language_id } } } @@ -478,15 +741,6 @@ struct FileTypeGlobMatcher { file_types: Vec<FileTypeGlob>, } -impl Default for FileTypeGlobMatcher { - fn default() -> Self { - Self { - matcher: globset::GlobSet::empty(), - file_types: Default::default(), - } - } -} - impl FileTypeGlobMatcher { fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> { let mut builder = globset::GlobSetBuilder::new(); @@ -500,196 +754,180 @@ impl FileTypeGlobMatcher { }) } - fn language_for_path(&self, path: &Path) -> Option<Language> { + fn language_id_for_path(&self, path: &Path) -> Option<&usize> { self.matcher .matches(path) .iter() .filter_map(|idx| self.file_types.get(*idx)) .max_by_key(|file_type| file_type.glob.glob().len()) - .map(|file_type| file_type.language) + .map(|file_type| &file_type.language_id) } } +// Expose loader as Lazy<> global since it's always static? + #[derive(Debug)] -pub struct Syntax { - pub inner: tree_house::Syntax, +pub struct Loader { + // highlight_names ? + language_configs: Vec<Arc<LanguageConfiguration>>, + language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize> + language_config_ids_glob_matcher: FileTypeGlobMatcher, + language_config_ids_by_shebang: HashMap<String, usize>, + + language_server_configs: HashMap<String, LanguageServerConfiguration>, + + scopes: ArcSwap<Vec<String>>, } -const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous +pub type LoaderError = globset::Error; -impl Syntax { - pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result<Self, Error> { - let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?; - Ok(Self { inner }) - } +impl Loader { + pub fn new(config: Configuration) -> Result<Self, LoaderError> { + let mut language_configs = Vec::new(); + let mut language_config_ids_by_extension = HashMap::new(); + let mut language_config_ids_by_shebang = HashMap::new(); + let mut file_type_globs = Vec::new(); - pub fn update( - &mut self, - old_source: RopeSlice, - source: RopeSlice, - changeset: &ChangeSet, - loader: &Loader, - ) -> Result<(), Error> { - let edits = generate_edits(old_source, changeset); - if edits.is_empty() { - Ok(()) - } else { - self.inner.update(source, PARSE_TIMEOUT, &edits, loader) - } - } + for config in config.language { + // get the next id + let language_id = language_configs.len(); - pub fn layer(&self, layer: Layer) -> &tree_house::LayerData { - self.inner.layer(layer) - } + for file_type in &config.file_types { + // entry().or_insert(Vec::new).push(language_id); + match file_type { + FileType::Extension(extension) => { + language_config_ids_by_extension.insert(extension.clone(), language_id); + } + FileType::Glob(glob) => { + file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id)); + } + }; + } + for shebang in &config.shebangs { + language_config_ids_by_shebang.insert(shebang.clone(), language_id); + } - pub fn root_layer(&self) -> Layer { - self.inner.root() - } + language_configs.push(Arc::new(config)); + } - pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer { - self.inner.layer_for_byte_range(start, end) + Ok(Self { + language_configs, + language_config_ids_by_extension, + language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, + language_config_ids_by_shebang, + language_server_configs: config.language_server, + scopes: ArcSwap::from_pointee(Vec::new()), + }) } - pub fn root_language(&self) -> Language { - self.layer(self.root_layer()).language - } + pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> { + // Find all the language configurations that match this file name + // or a suffix of the file name. + let configuration_id = self + .language_config_ids_glob_matcher + .language_id_for_path(path) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.language_config_ids_by_extension.get(extension)) + }); - pub fn tree(&self) -> &Tree { - self.inner.tree() - } + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree { - self.inner.tree_for_byte_range(start, end) + // TODO: content_regex handling conflict resolution } - pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node<'_>> { - self.inner.named_descendant_for_byte_range(start, end) - } + pub fn language_config_for_shebang( + &self, + source: RopeSlice, + ) -> Option<Arc<LanguageConfiguration>> { + let line = Cow::from(source.line(0)); + // TODO: resue detection from helix-syntax + const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; + static SHEBANG_REGEX: Lazy<Regex> = + Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap()); + let configuration_id = SHEBANG_REGEX + .captures(&line) + .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); - pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node<'_>> { - self.inner.descendant_for_byte_range(start, end) + configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) } - pub fn walk(&self) -> TreeCursor<'_> { - self.inner.walk() + pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> { + self.language_configs + .iter() + .find(|config| config.scope == scope) + .cloned() } - pub fn highlighter<'a>( - &'a self, - source: RopeSlice<'a>, - loader: &'a Loader, - range: impl RangeBounds<u32>, - ) -> Highlighter<'a> { - Highlighter::new(&self.inner, source, loader, range) + pub fn language_config_for_language_id(&self, id: &str) -> Option<Arc<LanguageConfiguration>> { + self.language_configs + .iter() + .find(|config| config.language_id == id) + .cloned() } - pub fn query_iter<'a, QueryLoader, LayerState, Range>( - &'a self, - source: RopeSlice<'a>, - loader: QueryLoader, - range: Range, - ) -> QueryIter<'a, 'a, QueryLoader, LayerState> - where - QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a, - LayerState: Default, - Range: RangeBounds<u32>, - { - QueryIter::new(&self.inner, source, loader, range) - } + /// Unlike language_config_for_language_id, which only returns Some for an exact id, this + /// function will perform a regex match on the given string to find the closest language match. + pub fn language_config_for_name(&self, name: &str) -> Option<Arc<LanguageConfiguration>> { + let mut best_match_length = 0; + let mut best_match_position = None; + for (i, configuration) in self.language_configs.iter().enumerate() { + if let Some(injection_regex) = &configuration.injection_regex { + if let Some(mat) = injection_regex.find(name) { + let length = mat.end() - mat.start(); + if length > best_match_length { + best_match_position = Some(i); + best_match_length = length; + } + } + } + } - pub fn tags<'a>( - &'a self, - source: RopeSlice<'a>, - loader: &'a Loader, - range: impl RangeBounds<u32>, - ) -> QueryIter<'a, 'a, impl FnMut(Language) -> Option<&'a Query> + 'a, ()> { - self.query_iter( - source, - |lang| loader.tag_query(lang).map(|q| &q.query), - range, - ) + best_match_position.map(|i| self.language_configs[i].clone()) } - pub fn rainbow_highlights( + pub fn language_configuration_for_injection_string( &self, - source: RopeSlice, - rainbow_length: usize, - loader: &Loader, - range: impl RangeBounds<u32>, - ) -> OverlayHighlights { - struct RainbowScope<'tree> { - end: u32, - node: Option<Node<'tree>>, - highlight: Highlight, + capture: &InjectionLanguageMarker, + ) -> Option<Arc<LanguageConfiguration>> { + match capture { + InjectionLanguageMarker::Name(string) => self.language_config_for_name(string), + InjectionLanguageMarker::Filename(file) => self.language_config_for_file_name(file), + InjectionLanguageMarker::Shebang(shebang) => { + self.language_config_for_language_id(shebang) + } } + } - let mut scope_stack = Vec::<RainbowScope>::new(); - let mut highlights = Vec::new(); - let mut query_iter = self.query_iter::<_, (), _>( - source, - |lang| loader.rainbow_query(lang).map(|q| &q.query), - range, - ); - - while let Some(event) = query_iter.next() { - let QueryIterEvent::Match(mat) = event else { - continue; - }; + pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> { + self.language_configs.iter() + } - let rainbow_query = loader - .rainbow_query(query_iter.current_language()) - .expect("language must have a rainbow query to emit matches"); + pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> { + &self.language_server_configs + } - let byte_range = mat.node.byte_range(); - // Pop any scopes that end before this capture begins. - while scope_stack - .last() - .is_some_and(|scope| byte_range.start >= scope.end) - { - scope_stack.pop(); - } + pub fn set_scopes(&self, scopes: Vec<String>) { + self.scopes.store(Arc::new(scopes)); - let capture = Some(mat.capture); - if capture == rainbow_query.scope_capture { - scope_stack.push(RainbowScope { - end: byte_range.end, - node: if rainbow_query - .include_children_patterns - .contains(&mat.pattern) - { - None - } else { - Some(mat.node.clone()) - }, - highlight: Highlight::new((scope_stack.len() % rainbow_length) as u32), - }); - } else if capture == rainbow_query.bracket_capture { - if let Some(scope) = scope_stack.last() { - if !scope - .node - .as_ref() - .is_some_and(|node| mat.node.parent().as_ref() != Some(node)) - { - let start = source - .byte_to_char(source.floor_char_boundary(byte_range.start as usize)); - let end = - source.byte_to_char(source.ceil_char_boundary(byte_range.end as usize)); - highlights.push((scope.highlight, start..end)); - } - } - } + // Reconfigure existing grammars + for config in self + .language_configs + .iter() + .filter(|cfg| cfg.is_highlight_initialized()) + { + config.reconfigure(&self.scopes()); } + } - OverlayHighlights::Heterogenous { highlights } + pub fn scopes(&self) -> Guard<Arc<Vec<String>>> { + self.scopes.load() } } -pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; - -fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> { - use crate::Operation::*; - use tree_sitter::Point; - +pub fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<tree_sitter::InputEdit> { + use Operation::*; let mut old_pos = 0; let mut edits = Vec::new(); @@ -701,6 +939,35 @@ fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> let mut iter = changeset.changes.iter().peekable(); // TODO; this is a lot easier with Change instead of Operation. + + fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { + let byte = text.char_to_byte(pos); // <- attempted to index past end + let line = text.char_to_line(pos); + let line_start_byte = text.line_to_byte(line); + let col = byte - line_start_byte; + + (byte, Point::new(line, col)) + } + + fn traverse(point: Point, text: &Tendril) -> Point { + let Point { + mut row, + mut column, + } = point; + + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { + row += 1; + column = 0; + } else { + column += 1; + } + } + Point { row, column } + } + while let Some(change) = iter.next() { let len = match change { Delete(i) | Retain(i) => *i, @@ -711,47 +978,47 @@ fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> match change { Retain(_) => {} Delete(_) => { - let start_byte = old_text.char_to_byte(old_pos) as u32; - let old_end_byte = old_text.char_to_byte(old_end) as u32; + let (start_byte, start_position) = point_at_pos(old_text, old_pos); + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); // deletion - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: start_position, // old pos to coords }); } Insert(s) => { - let start_byte = old_text.char_to_byte(old_pos) as u32; + let (start_byte, start_position) = point_at_pos(old_text, old_pos); // a subsequent delete means a replace, consume it if let Some(Delete(len)) = iter.peek() { old_end = old_pos + len; - let old_end_byte = old_text.char_to_byte(old_end) as u32; + let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); iter.next(); // replacement - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len() - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() + start_position, // old pos to coords + old_end_position, // old_end to coords + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) }); } else { // insert - edits.push(InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len() as u32, // old_pos + s.len() - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO, + edits.push(tree_sitter::InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len(), // old_pos + s.len() + start_position, // old pos to coords + old_end_position: start_position, // same + new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) }); } } @@ -761,433 +1028,13 @@ fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> edits } -/// A set of "overlay" highlights and ranges they apply to. -/// -/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights. -#[derive(Debug)] -pub enum OverlayHighlights { - /// All highlights use a single `Highlight`. - /// - /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in - /// the future though. - Homogeneous { - highlight: Highlight, - ranges: Vec<ops::Range<usize>>, - }, - /// A collection of different highlights for given ranges. - /// - /// Note that the ranges **must be non-overlapping**. - Heterogenous { - highlights: Vec<(Highlight, ops::Range<usize>)>, - }, -} - -impl OverlayHighlights { - pub fn single(highlight: Highlight, range: ops::Range<usize>) -> Self { - Self::Homogeneous { - highlight, - ranges: vec![range], - } - } - - fn is_empty(&self) -> bool { - match self { - Self::Homogeneous { ranges, .. } => ranges.is_empty(), - Self::Heterogenous { highlights } => highlights.is_empty(), - } - } -} - -#[derive(Debug)] -struct Overlay { - highlights: OverlayHighlights, - /// The position of the highlighter into the Vec of ranges of the overlays. - /// - /// Used by the `OverlayHighlighter`. - idx: usize, - /// The currently active highlight (and the ending character index) for this overlay. - /// - /// Used by the `OverlayHighlighter`. - active_highlight: Option<(Highlight, usize)>, -} - -impl Overlay { - fn new(highlights: OverlayHighlights) -> Option<Self> { - (!highlights.is_empty()).then_some(Self { - highlights, - idx: 0, - active_highlight: None, - }) - } - - fn current(&self) -> Option<(Highlight, ops::Range<usize>)> { - match &self.highlights { - OverlayHighlights::Homogeneous { highlight, ranges } => ranges - .get(self.idx) - .map(|range| (*highlight, range.clone())), - OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(), - } - } - - fn start(&self) -> Option<usize> { - match &self.highlights { - OverlayHighlights::Homogeneous { ranges, .. } => { - ranges.get(self.idx).map(|range| range.start) - } - OverlayHighlights::Heterogenous { highlights } => highlights - .get(self.idx) - .map(|(_highlight, range)| range.start), - } - } -} - -/// A collection of highlights to apply when rendering which merge on top of syntax highlights. -#[derive(Debug)] -pub struct OverlayHighlighter { - overlays: Vec<Overlay>, - next_highlight_start: usize, - next_highlight_end: usize, -} - -impl OverlayHighlighter { - pub fn new(overlays: impl IntoIterator<Item = OverlayHighlights>) -> Self { - let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect(); - let next_highlight_start = overlays - .iter() - .filter_map(|overlay| overlay.start()) - .min() - .unwrap_or(usize::MAX); - - Self { - overlays, - next_highlight_start, - next_highlight_end: usize::MAX, - } - } - - /// The current position in the overlay highlights. - /// - /// This method is meant to be used when treating this type as a cursor over the overlay - /// highlights. - /// - /// `usize::MAX` is returned when there are no more overlay highlights. - pub fn next_event_offset(&self) -> usize { - self.next_highlight_start.min(self.next_highlight_end) - } - - pub fn advance(&mut self) -> (HighlightEvent, impl Iterator<Item = Highlight> + '_) { - let mut refresh = false; - let prev_stack_size = self - .overlays - .iter() - .filter(|overlay| overlay.active_highlight.is_some()) - .count(); - let pos = self.next_event_offset(); - - if self.next_highlight_end == pos { - for overlay in self.overlays.iter_mut() { - if overlay - .active_highlight - .is_some_and(|(_highlight, end)| end == pos) - { - overlay.active_highlight.take(); - } - } - - refresh = true; - } - - while self.next_highlight_start == pos { - let mut activated_idx = usize::MAX; - for (idx, overlay) in self.overlays.iter_mut().enumerate() { - let Some((highlight, range)) = overlay.current() else { - continue; - }; - if range.start != self.next_highlight_start { - continue; - } - - // If this overlay has a highlight at this start index, set its active highlight - // and increment the cursor position within the overlay. - overlay.active_highlight = Some((highlight, range.end)); - overlay.idx += 1; - - activated_idx = activated_idx.min(idx); - } - - // If `self.next_highlight_start == pos` that means that some overlay was ready to - // emit a highlight, so `activated_idx` must have been set to an existing index. - assert!( - (0..self.overlays.len()).contains(&activated_idx), - "expected an overlay to highlight (at pos {pos}, there are {} overlays)", - self.overlays.len() - ); - - // If any overlays are active after the (lowest) one which was just activated, the - // highlights need to be refreshed. - refresh |= self.overlays[activated_idx..] - .iter() - .any(|overlay| overlay.active_highlight.is_some()); - - self.next_highlight_start = self - .overlays - .iter() - .filter_map(|overlay| overlay.start()) - .min() - .unwrap_or(usize::MAX); - } - - self.next_highlight_end = self - .overlays - .iter() - .filter_map(|overlay| Some(overlay.active_highlight?.1)) - .min() - .unwrap_or(usize::MAX); - - let (event, start) = if refresh { - (HighlightEvent::Refresh, 0) - } else { - (HighlightEvent::Push, prev_stack_size) - }; - - ( - event, - self.overlays - .iter() - .flat_map(|overlay| overlay.active_highlight) - .map(|(highlight, _end)| highlight) - .skip(start), - ) - } -} - -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec<Node<'a>>), -} - -impl CapturedNode<'_> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte() as usize, - Self::Grouped(ns) => ns[0].start_byte() as usize, - } - } - - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte() as usize, - Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize, - } - } - - pub fn byte_range(&self) -> ops::Range<usize> { - self.start_byte()..self.end_byte() - } -} - -#[derive(Debug)] -pub struct TextObjectQuery { - query: Query, -} - -impl TextObjectQuery { - pub fn new(query: Query) -> Self { - Self { query } - } - - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: &Node<'a>, - slice: RopeSlice<'a>, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - self.capture_nodes_any(&[capture_name], node, slice) - } - - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: &Node<'a>, - slice: RopeSlice<'a>, - ) -> Option<impl Iterator<Item = CapturedNode<'a>>> { - let capture = capture_names - .iter() - .find_map(|cap| self.query.get_capture(cap))?; - - let mut cursor = InactiveQueryCursor::new(0..u32::MAX, TREE_SITTER_MATCH_LIMIT) - .execute_query(&self.query, node, RopeInput::new(slice)); - let capture_node = iter::from_fn(move || { - let (mat, _) = cursor.next_matched_node()?; - Some(mat.nodes_for_capture(capture).cloned().collect()) - }) - .filter_map(move |nodes: Vec<_>| { - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - Some(capture_node) - } -} - -#[derive(Debug)] -pub struct TagQuery { - pub query: Query, -} - -pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result { - if node.child_count() == 0 { - if node_is_visible(&node) { - write!(fmt, "({})", node.kind()) - } else { - write!(fmt, "\"{}\"", format_anonymous_node_kind(node.kind())) - } - } else { - pretty_print_tree_impl(fmt, &mut node.walk(), 0) - } -} - -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id())) -} - -fn format_anonymous_node_kind(kind: &str) -> Cow<'_, str> { - if kind.contains('"') { - Cow::Owned(kind.replace('"', "\\\"")) - } else { - Cow::Borrowed(kind) - } -} - -fn pretty_print_tree_impl<W: fmt::Write>( - fmt: &mut W, - cursor: &mut tree_sitter::TreeCursor, - depth: usize, -) -> fmt::Result { - let node = cursor.node(); - let visible = node_is_visible(&node); - - if visible { - let indentation_columns = depth * 2; - write!(fmt, "{:indentation_columns$}", "")?; - - if let Some(field_name) = cursor.field_name() { - write!(fmt, "{}: ", field_name)?; - } - - write!(fmt, "({}", node.kind())?; - } else { - write!(fmt, " \"{}\"", format_anonymous_node_kind(node.kind()))?; - } - - // Handle children. - if cursor.goto_first_child() { - loop { - if node_is_visible(&cursor.node()) { - fmt.write_char('\n')?; - } - - pretty_print_tree_impl(fmt, cursor, depth + 1)?; - - if !cursor.goto_next_sibling() { - break; - } - } - - let moved = cursor.goto_parent(); - // The parent of the first child must exist, and must be `node`. - debug_assert!(moved); - debug_assert!(cursor.node() == node); - } - - if visible { - fmt.write_char(')')?; - } - - Ok(()) -} - -/// Finds the child of `node` which contains the given byte range. -pub fn child_for_byte_range<'a>(node: &Node<'a>, range: ops::Range<u32>) -> Option<Node<'a>> { - for child in node.children() { - let child_range = child.byte_range(); - - if range.start >= child_range.start && range.end <= child_range.end { - return Some(child); - } - } - - None -} - -#[derive(Debug)] -pub struct RainbowQuery { - query: Query, - include_children_patterns: HashSet<Pattern>, - scope_capture: Option<Capture>, - bracket_capture: Option<Capture>, -} - -impl RainbowQuery { - fn new(grammar: Grammar, source: &str) -> Result<Self, tree_sitter::query::ParseError> { - let mut include_children_patterns = HashSet::default(); - - let query = Query::new(grammar, source, |pattern, predicate| match predicate { - UserPredicate::SetProperty { - key: "rainbow.include-children", - val, - } => { - if val.is_some() { - return Err( - "property 'rainbow.include-children' does not take an argument".into(), - ); - } - include_children_patterns.insert(pattern); - Ok(()) - } - _ => Err(InvalidPredicateError::unknown(predicate)), - })?; - - Ok(Self { - include_children_patterns, - scope_capture: query.get_capture("rainbow.scope"), - bracket_capture: query.get_capture("rainbow.bracket"), - query, - }) - } -} - #[cfg(test)] mod test { - use once_cell::sync::Lazy; + use tree_sitter::QueryCursor; use super::*; use crate::{Rope, Transaction}; - static LOADER: Lazy<Loader> = Lazy::new(crate::config::default_lang_loader); - #[test] fn test_textobject_queries() { let query_str = r#" @@ -1202,33 +1049,19 @@ mod test { "#, ); - let language = LOADER.language_for_name("rust").unwrap(); - dbg!(language); - let grammar = LOADER.get_config(language).unwrap().grammar; - dbg!(grammar); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); - let mut h = syntax.highlighter( - "fn main() { 4 + 2; }".into(), - &LOADER, - 0.."fn main() { 4 + 2; }".len() as u32, - ); + let language = get_language("rust").unwrap(); - for n in 0..5 { - dbg!(h.active_highlights().collect::<Vec<_>>()); - dbg!(h.next_event_offset()); - let (e, h) = h.advance(); - dbg!(h.collect::<Vec<_>>(), e); - // panic!() - } + let query = Query::new(&language, query_str).unwrap(); + let textobject = TextObjectQuery { query }; + let mut cursor = QueryCursor::new(); - let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap(); - let textobject = TextObjectQuery::new(query); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); + let config = HighlightConfiguration::new(language, "", "", "").unwrap(); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); let root = syntax.tree().root_node(); - let test = |capture, range| { + let mut test = |capture, range| { let matches: Vec<_> = textobject - .capture_nodes(capture, &root, source.slice(..)) + .capture_nodes(capture, root, source.slice(..), &mut cursor) .unwrap() .collect(); @@ -1248,8 +1081,71 @@ mod test { } #[test] + fn test_parser() { + let highlight_names: Vec<String> = [ + "attribute", + "constant", + "function.builtin", + "function", + "keyword", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", + ] + .iter() + .cloned() + .map(String::from) + .collect(); + + let language = get_language("rust").unwrap(); + let config = HighlightConfiguration::new( + language, + &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") + .unwrap(), + &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") + .unwrap(), + "", // locals.scm + ) + .unwrap(); + config.configure(&highlight_names); + + let source = Rope::from_str( + " + struct Stuff {} + fn main() {} + ", + ); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); + let tree = syntax.tree(); + let root = tree.root_node(); + assert_eq!(root.kind(), "source_file"); + + assert_eq!( + root.to_sexp(), + concat!( + "(source_file ", + "(struct_item name: (type_identifier) body: (field_declaration_list)) ", + "(function_item name: (identifier) parameters: (parameters) body: (block)))" + ) + ); + + let struct_node = root.child(0).unwrap(); + assert_eq!(struct_node.kind(), "struct_item"); + } + + #[test] fn test_input_edits() { - use tree_sitter::{InputEdit, Point}; + use tree_sitter::InputEdit; let doc = Rope::from("hello world!\ntest 123"); let transaction = Transaction::change( @@ -1266,17 +1162,17 @@ mod test { start_byte: 6, old_end_byte: 11, new_end_byte: 10, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 6 }, + old_end_position: Point { row: 0, column: 11 }, + new_end_position: Point { row: 0, column: 10 } }, InputEdit { start_byte: 12, old_end_byte: 17, new_end_byte: 12, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 12 }, + old_end_position: Point { row: 1, column: 4 }, + new_end_position: Point { row: 0, column: 12 } } ] ); @@ -1295,9 +1191,9 @@ mod test { start_byte: 8, old_end_byte: 8, new_end_byte: 14, - start_point: Point::ZERO, - old_end_point: Point::ZERO, - new_end_point: Point::ZERO + start_position: Point { row: 0, column: 8 }, + old_end_position: Point { row: 0, column: 8 }, + new_end_position: Point { row: 0, column: 14 } }] ); } @@ -1311,13 +1207,16 @@ mod test { end: usize, ) { let source = Rope::from_str(source); - let language = LOADER.language_for_name(language_name).unwrap(); - let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); + + let language = get_language(language_name).unwrap(); + + let config = HighlightConfiguration::new(language, "", "", "").unwrap(); + let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap(); let root = syntax .tree() .root_node() - .descendant_for_byte_range(start as u32, end as u32) + .descendant_for_byte_range(start, end) .unwrap(); let mut output = String::new(); @@ -1329,7 +1228,7 @@ mod test { #[test] fn test_pretty_print() { let source = r#"// Hello"#; - assert_pretty_print("rust", source, "(line_comment \"//\")", 0, source.len()); + assert_pretty_print("rust", source, "(line_comment)", 0, source.len()); // A large tree should be indented with fields: let source = r#"fn main() { @@ -1339,16 +1238,16 @@ mod test { "rust", source, concat!( - "(function_item \"fn\"\n", + "(function_item\n", " name: (identifier)\n", - " parameters: (parameters \"(\" \")\")\n", - " body: (block \"{\"\n", + " parameters: (parameters)\n", + " body: (block\n", " (expression_statement\n", " (macro_invocation\n", - " macro: (identifier) \"!\"\n", - " (token_tree \"(\"\n", - " (string_literal \"\\\"\"\n", - " (string_content) \"\\\"\") \")\")) \";\") \"}\"))", + " macro: (identifier)\n", + " (token_tree\n", + " (string_literal\n", + " (string_content)))))))", ), 0, source.len(), @@ -1360,7 +1259,7 @@ mod test { // Error nodes are printed as errors: let source = r#"}{"#; - assert_pretty_print("rust", source, "(ERROR \"}\" \"{\")", 0, source.len()); + assert_pretty_print("rust", source, "(ERROR)", 0, source.len()); // Fields broken under unnamed nodes are determined correctly. // In the following source, `object` belongs to the `singleton_method` @@ -1375,34 +1274,24 @@ mod test { "ruby", source, concat!( - "(singleton_method \"def\"\n", - " object: (self) \".\"\n", + "(singleton_method\n", + " object: (self)\n", " name: (identifier)\n", " body: (body_statement\n", - " (true)) \"end\")" + " (true)))" ), 0, source.len(), ); } + #[test] - fn highlight() { - let source = Rope::from_str(r#"assert_eq!(0, Some(0));"#); - let loader = crate::config::default_lang_loader(); - loader.set_scopes(vec!["punctuation".to_string()]); - let language = loader.language_for_name("rust").unwrap(); - - let syntax = Syntax::new(source.slice(..), language, &loader).unwrap(); - println!( - "{}", - tree_house::fixtures::highlighter_fixture( - "", - &loader, - |_| "punct".to_string(), - &syntax.inner, - source.slice(..), - .., - ) - ); + fn test_load_runtime_file() { + // Test to make sure we can load some data from the runtime directory. + let contents = load_runtime_file("rust", "indents.scm").unwrap(); + assert!(!contents.is_empty()); + + let results = load_runtime_file("rust", "does-not-exist"); + assert!(results.is_err()); } } |