Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-syntax/src/config.rs')
-rw-r--r--helix-syntax/src/config.rs257
1 files changed, 136 insertions, 121 deletions
diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs
index a983d293..35774fdd 100644
--- a/helix-syntax/src/config.rs
+++ b/helix-syntax/src/config.rs
@@ -1,36 +1,38 @@
+use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
+use crate::tree_sitter::query::{Capture, Pattern, QueryStr, UserPredicate};
+use crate::tree_sitter::{query, Grammar, Query, QueryMatch, SyntaxTreeNode};
use arc_swap::ArcSwap;
use helix_stdx::rope::{self, RopeSliceExt};
use once_cell::sync::Lazy;
use regex::Regex;
use ropey::RopeSlice;
-use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch};
+use crate::byte_range_to_str;
use crate::highlighter::Highlight;
-use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG};
/// Contains the data needed to highlight code written in a particular language.
///
/// This struct is immutable and can be shared between threads.
#[derive(Debug)]
pub struct HighlightConfiguration {
- pub language: Grammar,
+ pub grammar: Grammar,
pub query: Query,
pub(crate) injections_query: Query,
- pub(crate) combined_injections_patterns: Vec<usize>,
- pub(crate) highlights_pattern_index: usize,
- pub(crate) highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
+ pub(crate) combined_injections_patterns: Vec<Pattern>,
+ first_highlights_pattern: Pattern,
+ pub(crate) highlight_indices: ArcSwap<Vec<Highlight>>,
pub(crate) non_local_variable_patterns: Vec<bool>,
- pub(crate) injection_content_capture_index: Option<u32>,
- pub(crate) injection_language_capture_index: Option<u32>,
- pub(crate) injection_filename_capture_index: Option<u32>,
- pub(crate) injection_shebang_capture_index: Option<u32>,
- pub(crate) local_scope_capture_index: Option<u32>,
- pub(crate) local_def_capture_index: Option<u32>,
- pub(crate) local_def_value_capture_index: Option<u32>,
- pub(crate) local_ref_capture_index: Option<u32>,
+ pub(crate) injection_content_capture: Option<Capture>,
+ pub(crate) injection_language_capture: Option<Capture>,
+ pub(crate) injection_filename_capture: Option<Capture>,
+ pub(crate) injection_shebang_capture: Option<Capture>,
+ pub(crate) local_scope_capture: Option<Capture>,
+ pub(crate) local_def_capture: Option<Capture>,
+ pub(crate) local_def_value_capture: Option<Capture>,
+ pub(crate) local_ref_capture: Option<Capture>,
}
impl HighlightConfiguration {
@@ -49,105 +51,89 @@ impl HighlightConfiguration {
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new(
- language: Grammar,
+ grammar: Grammar,
+ path: impl AsRef<Path>,
highlights_query: &str,
injection_query: &str,
locals_query: &str,
- ) -> Result<Self, QueryError> {
+ ) -> Result<Self, query::ParseError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
query_source.push_str(locals_query);
let highlights_query_offset = query_source.len();
query_source.push_str(highlights_query);
+ let mut non_local_variable_patterns = Vec::with_capacity(32);
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
- let query = Query::new(&language, &query_source)?;
- let mut highlights_pattern_index = 0;
- for i in 0..(query.pattern_count()) {
- let pattern_offset = query.start_byte_for_pattern(i);
- if pattern_offset < highlights_query_offset {
- highlights_pattern_index += 1;
+ let query = Query::new(grammar, &query_source, path, |pattern, predicate| {
+ match predicate {
+ UserPredicate::IsPropertySet {
+ negate: true,
+ key: "local",
+ val: None,
+ } => {
+ if non_local_variable_patterns.len() < pattern.idx() {
+ non_local_variable_patterns.resize(pattern.idx(), false)
+ }
+ non_local_variable_patterns[pattern.idx()] = true;
+ }
+ predicate => {
+ return Err(format!("unsupported predicate {predicate}").into());
+ }
}
- }
-
- let injections_query = Query::new(&language, injection_query)?;
- let combined_injections_patterns = (0..injections_query.pattern_count())
- .filter(|&i| {
- injections_query
- .property_settings(i)
- .iter()
- .any(|s| &*s.key == "injection.combined")
- })
- .collect();
-
- // Find all of the highlighting patterns that are disabled for nodes that
- // have been identified as local variables.
- let non_local_variable_patterns = (0..query.pattern_count())
- .map(|i| {
- query
- .property_predicates(i)
- .iter()
- .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
- })
- .collect();
+ Ok(())
+ })?;
- // Store the numeric ids for all of the special captures.
- let mut injection_content_capture_index = None;
- let mut injection_language_capture_index = None;
- let mut injection_filename_capture_index = None;
- let mut injection_shebang_capture_index = None;
- let mut local_def_capture_index = None;
- let mut local_def_value_capture_index = None;
- let mut local_ref_capture_index = None;
- let mut local_scope_capture_index = None;
- for (i, name) in query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "local.definition" => local_def_capture_index = i,
- "local.definition-value" => local_def_value_capture_index = i,
- "local.reference" => local_ref_capture_index = i,
- "local.scope" => local_scope_capture_index = i,
- _ => {}
+ let mut combined_injections_patterns = Vec::new();
+ let injections_query = Query::new(grammar, injection_query, path, |pattern, predicate| {
+ match predicate {
+ UserPredicate::SetProperty {
+ key: "injection.combined",
+ val: None,
+ } => combined_injections_patterns.push(pattern),
+ predicate => {
+ return Err(format!("unsupported predicate {predicate}").into());
+ }
}
- }
+ Ok(())
+ })?;
- for (i, name) in injections_query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "injection.content" => injection_content_capture_index = i,
- "injection.language" => injection_language_capture_index = i,
- "injection.filename" => injection_filename_capture_index = i,
- "injection.shebang" => injection_shebang_capture_index = i,
- _ => {}
- }
- }
+ let first_highlights_pattern = query
+ .patterns()
+ .find(|pattern| query.start_byte_for_pattern(*pattern) >= highlights_query_offset)
+ .unwrap_or(Pattern::SENTINEL);
+
+ let injection_content_capture = query.get_capture("injection.content");
+ let injection_language_capture = query.get_capture("injection.language");
+ let injection_filename_capture = query.get_capture("injection.filename");
+ let injection_shebang_capture = query.get_capture("injection.shebang");
+ let local_def_capture = query.get_capture("local.definition");
+ let local_def_value_capture = query.get_capture("local.definition-value");
+ let local_ref_capture = query.get_capture("local.reference");
+ let local_scope_capture = query.get_capture("local.scope");
- let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
+ let highlight_indices =
+ ArcSwap::from_pointee(vec![Highlight::NONE; query.num_captures() as usize]);
Ok(Self {
- language,
+ grammar,
query,
injections_query,
combined_injections_patterns,
- highlights_pattern_index,
+ first_highlights_pattern,
highlight_indices,
non_local_variable_patterns,
- injection_content_capture_index,
- injection_language_capture_index,
- injection_filename_capture_index,
- injection_shebang_capture_index,
- local_scope_capture_index,
- local_def_capture_index,
- local_def_value_capture_index,
- local_ref_capture_index,
+ injection_content_capture,
+ injection_language_capture,
+ injection_filename_capture,
+ injection_shebang_capture,
+ local_scope_capture,
+ local_def_capture,
+ local_def_value_capture,
+ local_ref_capture,
})
}
- /// Get a slice containing all of the highlight names used in the configuration.
- pub fn names(&self) -> &[&str] {
- self.query.capture_names()
- }
-
/// Set the list of recognized highlight names.
///
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
@@ -162,13 +148,12 @@ impl HighlightConfiguration {
let mut capture_parts = Vec::new();
let indices: Vec<_> = self
.query
- .capture_names()
- .iter()
- .map(move |capture_name| {
+ .captures()
+ .map(move |(_, capture_name)| {
capture_parts.clear();
capture_parts.extend(capture_name.split('.'));
- let mut best_index = None;
+ let mut best_index = u32::MAX;
let mut best_match_len = 0;
for (i, recognized_name) in recognized_names.iter().enumerate() {
let mut len = 0;
@@ -183,11 +168,11 @@ impl HighlightConfiguration {
}
}
if matches && len > best_match_len {
- best_index = Some(i);
+ best_index = i as u32;
best_match_len = len;
}
}
- best_index.map(Highlight)
+ Highlight(best_index)
})
.collect();
@@ -198,21 +183,24 @@ impl HighlightConfiguration {
&self,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
- ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
+ ) -> (
+ Option<InjectionLanguageMarker<'a>>,
+ Option<SyntaxTreeNode<'a>>,
+ ) {
let mut injection_capture = None;
let mut content_node = None;
- for capture in query_match.captures {
- let index = Some(capture.index);
- if index == self.injection_language_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
+ for matched_node in query_match.matched_nodes() {
+ let capture = Some(matched_node.capture);
+ if capture == self.injection_language_capture {
+ let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
injection_capture = Some(InjectionLanguageMarker::Name(name));
- } else if index == self.injection_filename_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
+ } else if capture == self.injection_filename_capture {
+ let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
- } else if index == self.injection_shebang_capture_index {
- let node_slice = source.byte_slice(capture.node.byte_range());
+ } else if capture == self.injection_shebang_capture {
+ let node_slice = source.byte_slice(matched_node.syntax_node.byte_range());
// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
@@ -222,9 +210,6 @@ impl HighlightConfiguration {
node_slice
};
- static SHEBANG_REGEX: Lazy<rope::Regex> =
- Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
-
injection_capture = SHEBANG_REGEX
.captures_iter(lines.regex_input())
.map(|cap| {
@@ -232,8 +217,8 @@ impl HighlightConfiguration {
InjectionLanguageMarker::Shebang(cap.into())
})
.next()
- } else if index == self.injection_content_capture_index {
- content_node = Some(capture.node);
+ } else if capture == self.injection_content_capture {
+ content_node = Some(matched_node.syntax_node.clone());
}
}
(injection_capture, content_node)
@@ -246,7 +231,7 @@ impl HighlightConfiguration {
source: RopeSlice<'a>,
) -> (
Option<InjectionLanguageMarker<'a>>,
- Option<Node<'a>>,
+ Option<SyntaxTreeNode<'a>>,
IncludedChildren,
) {
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
@@ -282,18 +267,20 @@ impl HighlightConfiguration {
(injection_capture, content_node, included_children)
}
- pub fn load_query(
- &self,
- language: &str,
- filename: &str,
- read_query_text: impl FnMut(&str, &str) -> String,
- ) -> Result<Option<Query>, QueryError> {
- let query_text = read_query(language, filename, read_query_text);
- if query_text.is_empty() {
- return Ok(None);
- }
- Query::new(&self.language, &query_text).map(Some)
- }
+
+ // pub fn load_query(
+ // &self,
+ // language: &str,
+ // filename: &str,
+ // read_query_text: impl FnMut(&str, &str) -> String,
+ // ) -> Result<Option<Query>, QueryError> {
+ // let query_text = read_query(language, filename, read_query_text);
+ // if query_text.is_empty() {
+ // return Ok(None);
+ // }
+
+ // Query::new(&self.grammar, &query_text, ).map(Some)
+ // }
}
/// reads a query by invoking `read_query_text`, handeles any `inherits` directives
@@ -329,3 +316,31 @@ pub fn read_query(
}
read_query_impl(language, filename, &mut read_query_text)
}
+
+const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
+static SHEBANG_REGEX: Lazy<rope::Regex> = Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
+
+struct InjectionSettings {
+ include_children: IncludedChildren,
+ language: Option<QueryStr>,
+}
+
+#[derive(Debug, Clone)]
+pub enum InjectionLanguageMarker<'a> {
+ Name(Cow<'a, str>),
+ Filename(Cow<'a, Path>),
+ Shebang(String),
+}
+
+#[derive(Clone)]
+enum IncludedChildren {
+ None,
+ All,
+ Unnamed,
+}
+
+impl Default for IncludedChildren {
+ fn default() -> Self {
+ Self::None
+ }
+}