Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-syntax/src/config.rs')
| -rw-r--r-- | helix-syntax/src/config.rs | 257 |
1 files changed, 136 insertions, 121 deletions
diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs index a983d293..35774fdd 100644 --- a/helix-syntax/src/config.rs +++ b/helix-syntax/src/config.rs @@ -1,36 +1,38 @@ +use std::borrow::Cow; use std::path::Path; use std::sync::Arc; +use crate::tree_sitter::query::{Capture, Pattern, QueryStr, UserPredicate}; +use crate::tree_sitter::{query, Grammar, Query, QueryMatch, SyntaxTreeNode}; use arc_swap::ArcSwap; use helix_stdx::rope::{self, RopeSliceExt}; use once_cell::sync::Lazy; use regex::Regex; use ropey::RopeSlice; -use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch}; +use crate::byte_range_to_str; use crate::highlighter::Highlight; -use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG}; /// Contains the data needed to highlight code written in a particular language. /// /// This struct is immutable and can be shared between threads. #[derive(Debug)] pub struct HighlightConfiguration { - pub language: Grammar, + pub grammar: Grammar, pub query: Query, pub(crate) injections_query: Query, - pub(crate) combined_injections_patterns: Vec<usize>, - pub(crate) highlights_pattern_index: usize, - pub(crate) highlight_indices: ArcSwap<Vec<Option<Highlight>>>, + pub(crate) combined_injections_patterns: Vec<Pattern>, + first_highlights_pattern: Pattern, + pub(crate) highlight_indices: ArcSwap<Vec<Highlight>>, pub(crate) non_local_variable_patterns: Vec<bool>, - pub(crate) injection_content_capture_index: Option<u32>, - pub(crate) injection_language_capture_index: Option<u32>, - pub(crate) injection_filename_capture_index: Option<u32>, - pub(crate) injection_shebang_capture_index: Option<u32>, - pub(crate) local_scope_capture_index: Option<u32>, - pub(crate) local_def_capture_index: Option<u32>, - pub(crate) local_def_value_capture_index: Option<u32>, - pub(crate) local_ref_capture_index: Option<u32>, + pub(crate) injection_content_capture: Option<Capture>, + pub(crate) injection_language_capture: Option<Capture>, + pub(crate) injection_filename_capture: Option<Capture>, + pub(crate) injection_shebang_capture: Option<Capture>, + pub(crate) local_scope_capture: Option<Capture>, + pub(crate) local_def_capture: Option<Capture>, + pub(crate) local_def_value_capture: Option<Capture>, + pub(crate) local_ref_capture: Option<Capture>, } impl HighlightConfiguration { @@ -49,105 +51,89 @@ impl HighlightConfiguration { /// /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn new( - language: Grammar, + grammar: Grammar, + path: impl AsRef<Path>, highlights_query: &str, injection_query: &str, locals_query: &str, - ) -> Result<Self, QueryError> { + ) -> Result<Self, query::ParseError> { // Concatenate the query strings, keeping track of the start offset of each section. let mut query_source = String::new(); query_source.push_str(locals_query); let highlights_query_offset = query_source.len(); query_source.push_str(highlights_query); + let mut non_local_variable_patterns = Vec::with_capacity(32); // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; + let query = Query::new(grammar, &query_source, path, |pattern, predicate| { + match predicate { + UserPredicate::IsPropertySet { + negate: true, + key: "local", + val: None, + } => { + if non_local_variable_patterns.len() < pattern.idx() { + non_local_variable_patterns.resize(pattern.idx(), false) + } + non_local_variable_patterns[pattern.idx()] = true; + } + predicate => { + return Err(format!("unsupported predicate {predicate}").into()); + } } - } - - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); + Ok(()) + })?; - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} + let mut combined_injections_patterns = Vec::new(); + let injections_query = Query::new(grammar, injection_query, path, |pattern, predicate| { + match predicate { + UserPredicate::SetProperty { + key: "injection.combined", + val: None, + } => combined_injections_patterns.push(pattern), + predicate => { + return Err(format!("unsupported predicate {predicate}").into()); + } } - } + Ok(()) + })?; - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } + let first_highlights_pattern = query + .patterns() + .find(|pattern| query.start_byte_for_pattern(*pattern) >= highlights_query_offset) + .unwrap_or(Pattern::SENTINEL); + + let injection_content_capture = query.get_capture("injection.content"); + let injection_language_capture = query.get_capture("injection.language"); + let injection_filename_capture = query.get_capture("injection.filename"); + let injection_shebang_capture = query.get_capture("injection.shebang"); + let local_def_capture = query.get_capture("local.definition"); + let local_def_value_capture = query.get_capture("local.definition-value"); + let local_ref_capture = query.get_capture("local.reference"); + let local_scope_capture = query.get_capture("local.scope"); - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); + let highlight_indices = + ArcSwap::from_pointee(vec![Highlight::NONE; query.num_captures() as usize]); Ok(Self { - language, + grammar, query, injections_query, combined_injections_patterns, - highlights_pattern_index, + first_highlights_pattern, highlight_indices, non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, + injection_content_capture, + injection_language_capture, + injection_filename_capture, + injection_shebang_capture, + local_scope_capture, + local_def_capture, + local_def_value_capture, + local_ref_capture, }) } - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() - } - /// Set the list of recognized highlight names. /// /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated @@ -162,13 +148,12 @@ impl HighlightConfiguration { let mut capture_parts = Vec::new(); let indices: Vec<_> = self .query - .capture_names() - .iter() - .map(move |capture_name| { + .captures() + .map(move |(_, capture_name)| { capture_parts.clear(); capture_parts.extend(capture_name.split('.')); - let mut best_index = None; + let mut best_index = u32::MAX; let mut best_match_len = 0; for (i, recognized_name) in recognized_names.iter().enumerate() { let mut len = 0; @@ -183,11 +168,11 @@ impl HighlightConfiguration { } } if matches && len > best_match_len { - best_index = Some(i); + best_index = i as u32; best_match_len = len; } } - best_index.map(Highlight) + Highlight(best_index) }) .collect(); @@ -198,21 +183,24 @@ impl HighlightConfiguration { &self, query_match: &QueryMatch<'a, 'a>, source: RopeSlice<'a>, - ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) { + ) -> ( + Option<InjectionLanguageMarker<'a>>, + Option<SyntaxTreeNode<'a>>, + ) { let mut injection_capture = None; let mut content_node = None; - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); + for matched_node in query_match.matched_nodes() { + let capture = Some(matched_node.capture); + if capture == self.injection_language_capture { + let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source); injection_capture = Some(InjectionLanguageMarker::Name(name)); - } else if index == self.injection_filename_capture_index { - let name = byte_range_to_str(capture.node.byte_range(), source); + } else if capture == self.injection_filename_capture { + let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source); let path = Path::new(name.as_ref()).to_path_buf(); injection_capture = Some(InjectionLanguageMarker::Filename(path.into())); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); + } else if capture == self.injection_shebang_capture { + let node_slice = source.byte_slice(matched_node.syntax_node.byte_range()); // some languages allow space and newlines before the actual string content // so a shebang could be on either the first or second line @@ -222,9 +210,6 @@ impl HighlightConfiguration { node_slice }; - static SHEBANG_REGEX: Lazy<rope::Regex> = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); - injection_capture = SHEBANG_REGEX .captures_iter(lines.regex_input()) .map(|cap| { @@ -232,8 +217,8 @@ impl HighlightConfiguration { InjectionLanguageMarker::Shebang(cap.into()) }) .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); + } else if capture == self.injection_content_capture { + content_node = Some(matched_node.syntax_node.clone()); } } (injection_capture, content_node) @@ -246,7 +231,7 @@ impl HighlightConfiguration { source: RopeSlice<'a>, ) -> ( Option<InjectionLanguageMarker<'a>>, - Option<Node<'a>>, + Option<SyntaxTreeNode<'a>>, IncludedChildren, ) { let (mut injection_capture, content_node) = self.injection_pair(query_match, source); @@ -282,18 +267,20 @@ impl HighlightConfiguration { (injection_capture, content_node, included_children) } - pub fn load_query( - &self, - language: &str, - filename: &str, - read_query_text: impl FnMut(&str, &str) -> String, - ) -> Result<Option<Query>, QueryError> { - let query_text = read_query(language, filename, read_query_text); - if query_text.is_empty() { - return Ok(None); - } - Query::new(&self.language, &query_text).map(Some) - } + + // pub fn load_query( + // &self, + // language: &str, + // filename: &str, + // read_query_text: impl FnMut(&str, &str) -> String, + // ) -> Result<Option<Query>, QueryError> { + // let query_text = read_query(language, filename, read_query_text); + // if query_text.is_empty() { + // return Ok(None); + // } + + // Query::new(&self.grammar, &query_text, ).map(Some) + // } } /// reads a query by invoking `read_query_text`, handeles any `inherits` directives @@ -329,3 +316,31 @@ pub fn read_query( } read_query_impl(language, filename, &mut read_query_text) } + +const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; +static SHEBANG_REGEX: Lazy<rope::Regex> = Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); + +struct InjectionSettings { + include_children: IncludedChildren, + language: Option<QueryStr>, +} + +#[derive(Debug, Clone)] +pub enum InjectionLanguageMarker<'a> { + Name(Cow<'a, str>), + Filename(Cow<'a, Path>), + Shebang(String), +} + +#[derive(Clone)] +enum IncludedChildren { + None, + All, + Unnamed, +} + +impl Default for IncludedChildren { + fn default() -> Self { + Self::None + } +} |