Unnamed repository; edit this file 'description' to name the repository.
Pascal Kuthe 2024-07-27
parent f7686d7 · commit ae0d418
-rw-r--r--helix-core/src/syntax.rs2
-rw-r--r--helix-syntax/src/config.rs257
-rw-r--r--helix-syntax/src/highlighter.rs877
-rw-r--r--helix-syntax/src/highlighter2.rs206
-rw-r--r--helix-syntax/src/injections_tree.rs268
-rw-r--r--helix-syntax/src/lib.rs355
-rw-r--r--helix-syntax/src/parse.rs828
-rw-r--r--helix-syntax/src/query_iter.rs236
-rw-r--r--helix-syntax/src/ropey.rs28
-rw-r--r--helix-syntax/src/text_object.rs93
-rw-r--r--helix-syntax/src/tree_sitter.rs10
-rw-r--r--helix-syntax/src/tree_sitter/query.rs93
-rw-r--r--helix-syntax/src/tree_sitter/query/predicate.rs57
-rw-r--r--helix-syntax/src/tree_sitter/query/property.rs5
-rw-r--r--helix-syntax/src/tree_sitter/query_cursor.rs32
15 files changed, 1823 insertions, 1524 deletions
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index e9451ed4..e2d862f4 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -246,7 +246,7 @@ impl LanguageConfiguration {
if query_text.is_empty() {
return None;
}
- let lang = &self.highlight_config.get()?.as_ref()?.language;
+ let lang = &self.highlight_config.get()?.as_ref()?.grammar;
Query::new(lang, &query_text)
.map_err(|e| {
log::error!(
diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs
index a983d293..35774fdd 100644
--- a/helix-syntax/src/config.rs
+++ b/helix-syntax/src/config.rs
@@ -1,36 +1,38 @@
+use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
+use crate::tree_sitter::query::{Capture, Pattern, QueryStr, UserPredicate};
+use crate::tree_sitter::{query, Grammar, Query, QueryMatch, SyntaxTreeNode};
use arc_swap::ArcSwap;
use helix_stdx::rope::{self, RopeSliceExt};
use once_cell::sync::Lazy;
use regex::Regex;
use ropey::RopeSlice;
-use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch};
+use crate::byte_range_to_str;
use crate::highlighter::Highlight;
-use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG};
/// Contains the data needed to highlight code written in a particular language.
///
/// This struct is immutable and can be shared between threads.
#[derive(Debug)]
pub struct HighlightConfiguration {
- pub language: Grammar,
+ pub grammar: Grammar,
pub query: Query,
pub(crate) injections_query: Query,
- pub(crate) combined_injections_patterns: Vec<usize>,
- pub(crate) highlights_pattern_index: usize,
- pub(crate) highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
+ pub(crate) combined_injections_patterns: Vec<Pattern>,
+ first_highlights_pattern: Pattern,
+ pub(crate) highlight_indices: ArcSwap<Vec<Highlight>>,
pub(crate) non_local_variable_patterns: Vec<bool>,
- pub(crate) injection_content_capture_index: Option<u32>,
- pub(crate) injection_language_capture_index: Option<u32>,
- pub(crate) injection_filename_capture_index: Option<u32>,
- pub(crate) injection_shebang_capture_index: Option<u32>,
- pub(crate) local_scope_capture_index: Option<u32>,
- pub(crate) local_def_capture_index: Option<u32>,
- pub(crate) local_def_value_capture_index: Option<u32>,
- pub(crate) local_ref_capture_index: Option<u32>,
+ pub(crate) injection_content_capture: Option<Capture>,
+ pub(crate) injection_language_capture: Option<Capture>,
+ pub(crate) injection_filename_capture: Option<Capture>,
+ pub(crate) injection_shebang_capture: Option<Capture>,
+ pub(crate) local_scope_capture: Option<Capture>,
+ pub(crate) local_def_capture: Option<Capture>,
+ pub(crate) local_def_value_capture: Option<Capture>,
+ pub(crate) local_ref_capture: Option<Capture>,
}
impl HighlightConfiguration {
@@ -49,105 +51,89 @@ impl HighlightConfiguration {
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new(
- language: Grammar,
+ grammar: Grammar,
+ path: impl AsRef<Path>,
highlights_query: &str,
injection_query: &str,
locals_query: &str,
- ) -> Result<Self, QueryError> {
+ ) -> Result<Self, query::ParseError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
query_source.push_str(locals_query);
let highlights_query_offset = query_source.len();
query_source.push_str(highlights_query);
+ let mut non_local_variable_patterns = Vec::with_capacity(32);
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
- let query = Query::new(&language, &query_source)?;
- let mut highlights_pattern_index = 0;
- for i in 0..(query.pattern_count()) {
- let pattern_offset = query.start_byte_for_pattern(i);
- if pattern_offset < highlights_query_offset {
- highlights_pattern_index += 1;
+ let query = Query::new(grammar, &query_source, path, |pattern, predicate| {
+ match predicate {
+ UserPredicate::IsPropertySet {
+ negate: true,
+ key: "local",
+ val: None,
+ } => {
+ if non_local_variable_patterns.len() < pattern.idx() {
+ non_local_variable_patterns.resize(pattern.idx(), false)
+ }
+ non_local_variable_patterns[pattern.idx()] = true;
+ }
+ predicate => {
+ return Err(format!("unsupported predicate {predicate}").into());
+ }
}
- }
-
- let injections_query = Query::new(&language, injection_query)?;
- let combined_injections_patterns = (0..injections_query.pattern_count())
- .filter(|&i| {
- injections_query
- .property_settings(i)
- .iter()
- .any(|s| &*s.key == "injection.combined")
- })
- .collect();
-
- // Find all of the highlighting patterns that are disabled for nodes that
- // have been identified as local variables.
- let non_local_variable_patterns = (0..query.pattern_count())
- .map(|i| {
- query
- .property_predicates(i)
- .iter()
- .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
- })
- .collect();
+ Ok(())
+ })?;
- // Store the numeric ids for all of the special captures.
- let mut injection_content_capture_index = None;
- let mut injection_language_capture_index = None;
- let mut injection_filename_capture_index = None;
- let mut injection_shebang_capture_index = None;
- let mut local_def_capture_index = None;
- let mut local_def_value_capture_index = None;
- let mut local_ref_capture_index = None;
- let mut local_scope_capture_index = None;
- for (i, name) in query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "local.definition" => local_def_capture_index = i,
- "local.definition-value" => local_def_value_capture_index = i,
- "local.reference" => local_ref_capture_index = i,
- "local.scope" => local_scope_capture_index = i,
- _ => {}
+ let mut combined_injections_patterns = Vec::new();
+ let injections_query = Query::new(grammar, injection_query, path, |pattern, predicate| {
+ match predicate {
+ UserPredicate::SetProperty {
+ key: "injection.combined",
+ val: None,
+ } => combined_injections_patterns.push(pattern),
+ predicate => {
+ return Err(format!("unsupported predicate {predicate}").into());
+ }
}
- }
+ Ok(())
+ })?;
- for (i, name) in injections_query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "injection.content" => injection_content_capture_index = i,
- "injection.language" => injection_language_capture_index = i,
- "injection.filename" => injection_filename_capture_index = i,
- "injection.shebang" => injection_shebang_capture_index = i,
- _ => {}
- }
- }
+ let first_highlights_pattern = query
+ .patterns()
+ .find(|pattern| query.start_byte_for_pattern(*pattern) >= highlights_query_offset)
+ .unwrap_or(Pattern::SENTINEL);
+
+ let injection_content_capture = query.get_capture("injection.content");
+ let injection_language_capture = query.get_capture("injection.language");
+ let injection_filename_capture = query.get_capture("injection.filename");
+ let injection_shebang_capture = query.get_capture("injection.shebang");
+ let local_def_capture = query.get_capture("local.definition");
+ let local_def_value_capture = query.get_capture("local.definition-value");
+ let local_ref_capture = query.get_capture("local.reference");
+ let local_scope_capture = query.get_capture("local.scope");
- let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
+ let highlight_indices =
+ ArcSwap::from_pointee(vec![Highlight::NONE; query.num_captures() as usize]);
Ok(Self {
- language,
+ grammar,
query,
injections_query,
combined_injections_patterns,
- highlights_pattern_index,
+ first_highlights_pattern,
highlight_indices,
non_local_variable_patterns,
- injection_content_capture_index,
- injection_language_capture_index,
- injection_filename_capture_index,
- injection_shebang_capture_index,
- local_scope_capture_index,
- local_def_capture_index,
- local_def_value_capture_index,
- local_ref_capture_index,
+ injection_content_capture,
+ injection_language_capture,
+ injection_filename_capture,
+ injection_shebang_capture,
+ local_scope_capture,
+ local_def_capture,
+ local_def_value_capture,
+ local_ref_capture,
})
}
- /// Get a slice containing all of the highlight names used in the configuration.
- pub fn names(&self) -> &[&str] {
- self.query.capture_names()
- }
-
/// Set the list of recognized highlight names.
///
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
@@ -162,13 +148,12 @@ impl HighlightConfiguration {
let mut capture_parts = Vec::new();
let indices: Vec<_> = self
.query
- .capture_names()
- .iter()
- .map(move |capture_name| {
+ .captures()
+ .map(move |(_, capture_name)| {
capture_parts.clear();
capture_parts.extend(capture_name.split('.'));
- let mut best_index = None;
+ let mut best_index = u32::MAX;
let mut best_match_len = 0;
for (i, recognized_name) in recognized_names.iter().enumerate() {
let mut len = 0;
@@ -183,11 +168,11 @@ impl HighlightConfiguration {
}
}
if matches && len > best_match_len {
- best_index = Some(i);
+ best_index = i as u32;
best_match_len = len;
}
}
- best_index.map(Highlight)
+ Highlight(best_index)
})
.collect();
@@ -198,21 +183,24 @@ impl HighlightConfiguration {
&self,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
- ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
+ ) -> (
+ Option<InjectionLanguageMarker<'a>>,
+ Option<SyntaxTreeNode<'a>>,
+ ) {
let mut injection_capture = None;
let mut content_node = None;
- for capture in query_match.captures {
- let index = Some(capture.index);
- if index == self.injection_language_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
+ for matched_node in query_match.matched_nodes() {
+ let capture = Some(matched_node.capture);
+ if capture == self.injection_language_capture {
+ let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
injection_capture = Some(InjectionLanguageMarker::Name(name));
- } else if index == self.injection_filename_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
+ } else if capture == self.injection_filename_capture {
+ let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
- } else if index == self.injection_shebang_capture_index {
- let node_slice = source.byte_slice(capture.node.byte_range());
+ } else if capture == self.injection_shebang_capture {
+ let node_slice = source.byte_slice(matched_node.syntax_node.byte_range());
// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
@@ -222,9 +210,6 @@ impl HighlightConfiguration {
node_slice
};
- static SHEBANG_REGEX: Lazy<rope::Regex> =
- Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
-
injection_capture = SHEBANG_REGEX
.captures_iter(lines.regex_input())
.map(|cap| {
@@ -232,8 +217,8 @@ impl HighlightConfiguration {
InjectionLanguageMarker::Shebang(cap.into())
})
.next()
- } else if index == self.injection_content_capture_index {
- content_node = Some(capture.node);
+ } else if capture == self.injection_content_capture {
+ content_node = Some(matched_node.syntax_node.clone());
}
}
(injection_capture, content_node)
@@ -246,7 +231,7 @@ impl HighlightConfiguration {
source: RopeSlice<'a>,
) -> (
Option<InjectionLanguageMarker<'a>>,
- Option<Node<'a>>,
+ Option<SyntaxTreeNode<'a>>,
IncludedChildren,
) {
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
@@ -282,18 +267,20 @@ impl HighlightConfiguration {
(injection_capture, content_node, included_children)
}
- pub fn load_query(
- &self,
- language: &str,
- filename: &str,
- read_query_text: impl FnMut(&str, &str) -> String,
- ) -> Result<Option<Query>, QueryError> {
- let query_text = read_query(language, filename, read_query_text);
- if query_text.is_empty() {
- return Ok(None);
- }
- Query::new(&self.language, &query_text).map(Some)
- }
+
+ // pub fn load_query(
+ // &self,
+ // language: &str,
+ // filename: &str,
+ // read_query_text: impl FnMut(&str, &str) -> String,
+ // ) -> Result<Option<Query>, QueryError> {
+ // let query_text = read_query(language, filename, read_query_text);
+ // if query_text.is_empty() {
+ // return Ok(None);
+ // }
+
+ // Query::new(&self.grammar, &query_text, ).map(Some)
+ // }
}
/// reads a query by invoking `read_query_text`, handeles any `inherits` directives
@@ -329,3 +316,31 @@ pub fn read_query(
}
read_query_impl(language, filename, &mut read_query_text)
}
+
+const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
+static SHEBANG_REGEX: Lazy<rope::Regex> = Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
+
+struct InjectionSettings {
+ include_children: IncludedChildren,
+ language: Option<QueryStr>,
+}
+
+#[derive(Debug, Clone)]
+pub enum InjectionLanguageMarker<'a> {
+ Name(Cow<'a, str>),
+ Filename(Cow<'a, Path>),
+ Shebang(String),
+}
+
+#[derive(Clone)]
+enum IncludedChildren {
+ None,
+ All,
+ Unnamed,
+}
+
+impl Default for IncludedChildren {
+ fn default() -> Self {
+ Self::None
+ }
+}
diff --git a/helix-syntax/src/highlighter.rs b/helix-syntax/src/highlighter.rs
index 1b53672f..8801b13f 100644
--- a/helix-syntax/src/highlighter.rs
+++ b/helix-syntax/src/highlighter.rs
@@ -1,439 +1,438 @@
-use std::borrow::Cow;
-use std::cell::RefCell;
-use std::sync::atomic::{self, AtomicUsize};
-use std::{fmt, iter, mem, ops};
-
-use ropey::RopeSlice;
-use tree_sitter::{QueryCaptures, QueryCursor, Tree};
-
-use crate::ropey::RopeProvider;
-use crate::{
- byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
-};
-
-const CANCELLATION_CHECK_INTERVAL: usize = 100;
-
-/// Indicates which highlight should be applied to a region of source code.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct Highlight(pub usize);
-
-/// Represents a single step in rendering a syntax-highlighted document.
-#[derive(Copy, Clone, Debug)]
-pub enum HighlightEvent {
- Source { start: usize, end: usize },
- HighlightStart(Highlight),
- HighlightEnd,
-}
-
-#[derive(Debug)]
-struct LocalDef<'a> {
- name: Cow<'a, str>,
- value_range: ops::Range<usize>,
- highlight: Option<Highlight>,
-}
-
-#[derive(Debug)]
-struct LocalScope<'a> {
- inherits: bool,
- range: ops::Range<usize>,
- local_defs: Vec<LocalDef<'a>>,
-}
-
-#[derive(Debug)]
-struct HighlightIter<'a> {
- source: RopeSlice<'a>,
- byte_offset: usize,
- cancellation_flag: Option<&'a AtomicUsize>,
- layers: Vec<HighlightIterLayer<'a>>,
- iter_count: usize,
- next_event: Option<HighlightEvent>,
- last_highlight_range: Option<(usize, usize, u32)>,
-}
-
-struct HighlightIterLayer<'a> {
- _tree: Option<Tree>,
- cursor: QueryCursor,
- captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
- config: &'a HighlightConfiguration,
- highlight_end_stack: Vec<usize>,
- scope_stack: Vec<LocalScope<'a>>,
- depth: u32,
-}
-
-impl<'a> fmt::Debug for HighlightIterLayer<'a> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("HighlightIterLayer").finish()
- }
-}
-
-impl<'a> HighlightIterLayer<'a> {
- // First, sort scope boundaries by their byte offset in the document. At a
- // given position, emit scope endings before scope beginnings. Finally, emit
- // scope boundaries from deeper layers first.
- fn sort_key(&self) -> Option<(usize, bool, isize)> {
- let depth = -(self.depth as isize);
- let next_start = self
- .captures
- .borrow_mut()
- .peek()
- .map(|(m, i)| m.captures[*i].node.start_byte());
- let next_end = self.highlight_end_stack.last().cloned();
- match (next_start, next_end) {
- (Some(start), Some(end)) => {
- if start < end {
- Some((start, true, depth))
- } else {
- Some((end, false, depth))
- }
- }
- (Some(i), None) => Some((i, true, depth)),
- (None, Some(j)) => Some((j, false, depth)),
- _ => None,
- }
- }
-}
-
-impl<'a> HighlightIter<'a> {
- fn emit_event(
- &mut self,
- offset: usize,
- event: Option<HighlightEvent>,
- ) -> Option<Result<HighlightEvent, Error>> {
- let result;
- if self.byte_offset < offset {
- result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: offset,
- }));
- self.byte_offset = offset;
- self.next_event = event;
- } else {
- result = event.map(Ok);
- }
- self.sort_layers();
- result
- }
-
- fn sort_layers(&mut self) {
- while !self.layers.is_empty() {
- if let Some(sort_key) = self.layers[0].sort_key() {
- let mut i = 0;
- while i + 1 < self.layers.len() {
- if let Some(next_offset) = self.layers[i + 1].sort_key() {
- if next_offset < sort_key {
- i += 1;
- continue;
- }
- } else {
- let layer = self.layers.remove(i + 1);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- break;
- }
- if i > 0 {
- self.layers[0..(i + 1)].rotate_left(1);
- }
- break;
- } else {
- let layer = self.layers.remove(0);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- }
- }
-}
-
-impl<'a> Iterator for HighlightIter<'a> {
- type Item = Result<HighlightEvent, Error>;
-
- fn next(&mut self) -> Option<Self::Item> {
- 'main: loop {
- // If we've already determined the next highlight boundary, just return it.
- if let Some(e) = self.next_event.take() {
- return Some(Ok(e));
- }
-
- // Periodically check for cancellation, returning `Cancelled` error if the
- // cancellation flag was flipped.
- if let Some(cancellation_flag) = self.cancellation_flag {
- self.iter_count += 1;
- if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
- self.iter_count = 0;
- if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
- return Some(Err(Error::Cancelled));
- }
- }
- }
-
- // If none of the layers have any more highlight boundaries, terminate.
- if self.layers.is_empty() {
- let len = self.source.len_bytes();
- return if self.byte_offset < len {
- let result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: len,
- }));
- self.byte_offset = len;
- result
- } else {
- None
- };
- }
-
- // Get the next capture from whichever layer has the earliest highlight boundary.
- let range;
- let layer = &mut self.layers[0];
- let captures = layer.captures.get_mut();
- if let Some((next_match, capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*capture_index];
- range = next_capture.node.byte_range();
-
- // If any previous highlight ends before this node starts, then before
- // processing this capture, emit the source code up until the end of the
- // previous highlight, and an end event for that highlight.
- if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- if end_byte <= range.start {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- }
- }
- }
- // If there are no more captures, then emit any remaining highlight end events.
- // And if there are none of those, then just advance to the end of the document.
- else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- } else {
- return self.emit_event(self.source.len_bytes(), None);
- };
-
- let (mut match_, capture_index) = captures.next().unwrap();
- let mut capture = match_.captures[capture_index];
-
- // Remove from the local scope stack any local scopes that have already ended.
- while range.start > layer.scope_stack.last().unwrap().range.end {
- layer.scope_stack.pop();
- }
-
- // If this capture is for tracking local variables, then process the
- // local variable info.
- let mut reference_highlight = None;
- let mut definition_highlight = None;
- while match_.pattern_index < layer.config.highlights_pattern_index {
- // If the node represents a local scope, push a new local scope onto
- // the scope stack.
- if Some(capture.index) == layer.config.local_scope_capture_index {
- definition_highlight = None;
- let mut scope = LocalScope {
- inherits: true,
- range: range.clone(),
- local_defs: Vec::new(),
- };
- for prop in layer.config.query.property_settings(match_.pattern_index) {
- if let "local.scope-inherits" = prop.key.as_ref() {
- scope.inherits =
- prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
- }
- }
- layer.scope_stack.push(scope);
- }
- // If the node represents a definition, add a new definition to the
- // local scope at the top of the scope stack.
- else if Some(capture.index) == layer.config.local_def_capture_index {
- reference_highlight = None;
- let scope = layer.scope_stack.last_mut().unwrap();
-
- let mut value_range = 0..0;
- for capture in match_.captures {
- if Some(capture.index) == layer.config.local_def_value_capture_index {
- value_range = capture.node.byte_range();
- }
- }
-
- let name = byte_range_to_str(range.clone(), self.source);
- scope.local_defs.push(LocalDef {
- name,
- value_range,
- highlight: None,
- });
- definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
- }
- // If the node represents a reference, then try to find the corresponding
- // definition in the scope stack.
- else if Some(capture.index) == layer.config.local_ref_capture_index
- && definition_highlight.is_none()
- {
- definition_highlight = None;
- let name = byte_range_to_str(range.clone(), self.source);
- for scope in layer.scope_stack.iter().rev() {
- if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
- if def.name == name && range.start >= def.value_range.end {
- Some(def.highlight)
- } else {
- None
- }
- }) {
- reference_highlight = highlight;
- break;
- }
- if !scope.inherits {
- break;
- }
- }
- }
-
- // Continue processing any additional matches for the same node.
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
-
- // Otherwise, this capture must represent a highlight.
- // If this exact range has already been highlighted by an earlier pattern, or by
- // a different layer, then skip over this one.
- if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
- if range.start == last_start && range.end == last_end && layer.depth < last_depth {
- self.sort_layers();
- continue 'main;
- }
- }
-
- // If the current node was found to be a local variable, then skip over any
- // highlighting patterns that are disabled for local variables.
- if definition_highlight.is_some() || reference_highlight.is_some() {
- while layer.config.non_local_variable_patterns[match_.pattern_index] {
- match_.remove();
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
- }
-
- // Once a highlighting pattern is found for the current node, skip over
- // any later highlighting patterns that also match this node. Captures
- // for a given node are ordered by pattern index, so these subsequent
- // captures are guaranteed to be for highlighting, not injections or
- // local variables.
- while let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- captures.next();
- } else {
- break;
- }
- }
-
- let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
-
- // If this node represents a local definition, then store the current
- // highlight value on the local scope entry representing this node.
- if let Some(definition_highlight) = definition_highlight {
- *definition_highlight = current_highlight;
- }
-
- // Emit a scope start event and push the node's end position to the stack.
- if let Some(highlight) = reference_highlight.or(current_highlight) {
- self.last_highlight_range = Some((range.start, range.end, layer.depth));
- layer.highlight_end_stack.push(range.end);
- return self
- .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
- }
-
- self.sort_layers();
- }
- }
-}
-
-impl Syntax {
- /// Iterate over the highlighted regions for a given slice of source code.
- pub fn highlight_iter<'a>(
- &'a self,
- source: RopeSlice<'a>,
- range: Option<std::ops::Range<usize>>,
- cancellation_flag: Option<&'a AtomicUsize>,
- ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
- let mut layers = self
- .layers
- .iter()
- .filter_map(|(_, layer)| {
- // TODO: if range doesn't overlap layer range, skip it
-
- // Reuse a cursor from the pool if available.
- let mut cursor = PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
- });
-
- // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
- // prevents them from being moved. But both of these values are really just
- // pointers, so it's actually ok to move them.
- let cursor_ref =
- unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
-
- // if reusing cursors & no range this resets to whole range
- cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
- cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let mut captures = cursor_ref
- .captures(
- &layer.config.query,
- layer.tree().root_node(),
- RopeProvider(source),
- )
- .peekable();
-
- // If there's no captures, skip the layer
- captures.peek()?;
-
- Some(HighlightIterLayer {
- highlight_end_stack: Vec::new(),
- scope_stack: vec![LocalScope {
- inherits: false,
- range: 0..usize::MAX,
- local_defs: Vec::new(),
- }],
- cursor,
- _tree: None,
- captures: RefCell::new(captures),
- config: layer.config.as_ref(), // TODO: just reuse `layer`
- depth: layer.depth, // TODO: just reuse `layer`
- })
- })
- .collect::<Vec<_>>();
-
- layers.sort_unstable_by_key(|layer| layer.sort_key());
-
- let mut result = HighlightIter {
- source,
- byte_offset: range.map_or(0, |r| r.start),
- cancellation_flag,
- iter_count: 0,
- layers,
- next_event: None,
- last_highlight_range: None,
- };
- result.sort_layers();
- result
- }
-}
+pub use super::highlighter2::*;
+
+// use std::borrow::Cow;
+// use std::cell::RefCell;
+// use std::sync::atomic::{self, AtomicUsize};
+// use std::{fmt, iter, mem, ops};
+
+// use ropey::RopeSlice;
+// use tree_sitter::{QueryCaptures, QueryCursor, Tree};
+
+// use crate::{byte_range_to_str, Error, HighlightConfiguration, Syntax, TREE_SITTER_MATCH_LIMIT};
+
+// const CANCELLATION_CHECK_INTERVAL: usize = 100;
+
+// /// Indicates which highlight should be applied to a region of source code.
+// #[derive(Copy, Clone, Debug, PartialEq, Eq)]
+// pub struct Highlight(pub usize);
+
+// /// Represents a single step in rendering a syntax-highlighted document.
+// #[derive(Copy, Clone, Debug)]
+// pub enum HighlightEvent {
+// Source { start: usize, end: usize },
+// HighlightStart(Highlight),
+// HighlightEnd,
+// }
+
+// #[derive(Debug)]
+// struct LocalDef<'a> {
+// name: Cow<'a, str>,
+// value_range: ops::Range<usize>,
+// highlight: Option<Highlight>,
+// }
+
+// #[derive(Debug)]
+// struct LocalScope<'a> {
+// inherits: bool,
+// range: ops::Range<usize>,
+// local_defs: Vec<LocalDef<'a>>,
+// }
+
+// #[derive(Debug)]
+// struct HighlightIter<'a> {
+// source: RopeSlice<'a>,
+// byte_offset: usize,
+// cancellation_flag: Option<&'a AtomicUsize>,
+// layers: Vec<HighlightIterLayer<'a>>,
+// iter_count: usize,
+// next_event: Option<HighlightEvent>,
+// last_highlight_range: Option<(usize, usize, u32)>,
+// }
+
+// struct HighlightIterLayer<'a> {
+// _tree: Option<Tree>,
+// cursor: QueryCursor,
+// captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
+// config: &'a HighlightConfiguration,
+// highlight_end_stack: Vec<usize>,
+// scope_stack: Vec<LocalScope<'a>>,
+// depth: u32,
+// }
+
+// impl<'a> fmt::Debug for HighlightIterLayer<'a> {
+// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+// f.debug_struct("HighlightIterLayer").finish()
+// }
+// }
+
+// impl<'a> HighlightIterLayer<'a> {
+// // First, sort scope boundaries by their byte offset in the document. At a
+// // given position, emit scope endings before scope beginnings. Finally, emit
+// // scope boundaries from deeper layers first.
+// fn sort_key(&self) -> Option<(usize, bool, isize)> {
+// let depth = -(self.depth as isize);
+// let next_start = self
+// .captures
+// .borrow_mut()
+// .peek()
+// .map(|(m, i)| m.captures[*i].node.start_byte());
+// let next_end = self.highlight_end_stack.last().cloned();
+// match (next_start, next_end) {
+// (Some(start), Some(end)) => {
+// if start < end {
+// Some((start, true, depth))
+// } else {
+// Some((end, false, depth))
+// }
+// }
+// (Some(i), None) => Some((i, true, depth)),
+// (None, Some(j)) => Some((j, false, depth)),
+// _ => None,
+// }
+// }
+// }
+
+// impl<'a> HighlightIter<'a> {
+// fn emit_event(
+// &mut self,
+// offset: usize,
+// event: Option<HighlightEvent>,
+// ) -> Option<Result<HighlightEvent, Error>> {
+// let result;
+// if self.byte_offset < offset {
+// result = Some(Ok(HighlightEvent::Source {
+// start: self.byte_offset,
+// end: offset,
+// }));
+// self.byte_offset = offset;
+// self.next_event = event;
+// } else {
+// result = event.map(Ok);
+// }
+// self.sort_layers();
+// result
+// }
+
+// fn sort_layers(&mut self) {
+// while !self.layers.is_empty() {
+// if let Some(sort_key) = self.layers[0].sort_key() {
+// let mut i = 0;
+// while i + 1 < self.layers.len() {
+// if let Some(next_offset) = self.layers[i + 1].sort_key() {
+// if next_offset < sort_key {
+// i += 1;
+// continue;
+// }
+// } else {
+// let layer = self.layers.remove(i + 1);
+// PARSER.with(|ts_parser| {
+// let highlighter = &mut ts_parser.borrow_mut();
+// highlighter.cursors.push(layer.cursor);
+// });
+// }
+// break;
+// }
+// if i > 0 {
+// self.layers[0..(i + 1)].rotate_left(1);
+// }
+// break;
+// } else {
+// let layer = self.layers.remove(0);
+// PARSER.with(|ts_parser| {
+// let highlighter = &mut ts_parser.borrow_mut();
+// highlighter.cursors.push(layer.cursor);
+// });
+// }
+// }
+// }
+// }
+
+// impl<'a> Iterator for HighlightIter<'a> {
+// type Item = Result<HighlightEvent, Error>;
+
+// fn next(&mut self) -> Option<Self::Item> {
+// 'main: loop {
+// // If we've already determined the next highlight boundary, just return it.
+// if let Some(e) = self.next_event.take() {
+// return Some(Ok(e));
+// }
+
+// // Periodically check for cancellation, returning `Cancelled` error if the
+// // cancellation flag was flipped.
+// if let Some(cancellation_flag) = self.cancellation_flag {
+// self.iter_count += 1;
+// if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
+// self.iter_count = 0;
+// if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
+// return Some(Err(Error::Cancelled));
+// }
+// }
+// }
+
+// // If none of the layers have any more highlight boundaries, terminate.
+// if self.layers.is_empty() {
+// let len = self.source.len_bytes();
+// return if self.byte_offset < len {
+// let result = Some(Ok(HighlightEvent::Source {
+// start: self.byte_offset,
+// end: len,
+// }));
+// self.byte_offset = len;
+// result
+// } else {
+// None
+// };
+// }
+
+// // Get the next capture from whichever layer has the earliest highlight boundary.
+// let range;
+// let layer = &mut self.layers[0];
+// let captures = layer.captures.get_mut();
+// if let Some((next_match, capture_index)) = captures.peek() {
+// let next_capture = next_match.captures[*capture_index];
+// range = next_capture.node.byte_range();
+
+// // If any previous highlight ends before this node starts, then before
+// // processing this capture, emit the source code up until the end of the
+// // previous highlight, and an end event for that highlight.
+// if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+// if end_byte <= range.start {
+// layer.highlight_end_stack.pop();
+// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+// }
+// }
+// }
+// // If there are no more captures, then emit any remaining highlight end events.
+// // And if there are none of those, then just advance to the end of the document.
+// else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+// layer.highlight_end_stack.pop();
+// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+// } else {
+// return self.emit_event(self.source.len_bytes(), None);
+// };
+
+// let (mut match_, capture_index) = captures.next().unwrap();
+// let mut capture = match_.captures[capture_index];
+
+// // Remove from the local scope stack any local scopes that have already ended.
+// while range.start > layer.scope_stack.last().unwrap().range.end {
+// layer.scope_stack.pop();
+// }
+
+// // If this capture is for tracking local variables, then process the
+// // local variable info.
+// let mut reference_highlight = None;
+// let mut definition_highlight = None;
+// while match_.pattern_index < layer.config.highlights_pattern_index {
+// // If the node represents a local scope, push a new local scope onto
+// // the scope stack.
+// if Some(capture.index) == layer.config.local_scope_capture_index {
+// definition_highlight = None;
+// let mut scope = LocalScope {
+// inherits: true,
+// range: range.clone(),
+// local_defs: Vec::new(),
+// };
+// for prop in layer.config.query.property_settings(match_.pattern_index) {
+// if let "local.scope-inherits" = prop.key.as_ref() {
+// scope.inherits =
+// prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
+// }
+// }
+// layer.scope_stack.push(scope);
+// }
+// // If the node represents a definition, add a new definition to the
+// // local scope at the top of the scope stack.
+// else if Some(capture.index) == layer.config.local_def_capture_index {
+// reference_highlight = None;
+// let scope = layer.scope_stack.last_mut().unwrap();
+
+// let mut value_range = 0..0;
+// for capture in match_.captures {
+// if Some(capture.index) == layer.config.local_def_value_capture_index {
+// value_range = capture.node.byte_range();
+// }
+// }
+
+// let name = byte_range_to_str(range.clone(), self.source);
+// scope.local_defs.push(LocalDef {
+// name,
+// value_range,
+// highlight: None,
+// });
+// definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
+// }
+// // If the node represents a reference, then try to find the corresponding
+// // definition in the scope stack.
+// else if Some(capture.index) == layer.config.local_ref_capture_index
+// && definition_highlight.is_none()
+// {
+// definition_highlight = None;
+// let name = byte_range_to_str(range.clone(), self.source);
+// for scope in layer.scope_stack.iter().rev() {
+// if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
+// if def.name == name && range.start >= def.value_range.end {
+// Some(def.highlight)
+// } else {
+// None
+// }
+// }) {
+// reference_highlight = highlight;
+// break;
+// }
+// if !scope.inherits {
+// break;
+// }
+// }
+// }
+
+// // Continue processing any additional matches for the same node.
+// if let Some((next_match, next_capture_index)) = captures.peek() {
+// let next_capture = next_match.captures[*next_capture_index];
+// if next_capture.node == capture.node {
+// capture = next_capture;
+// match_ = captures.next().unwrap().0;
+// continue;
+// }
+// }
+
+// self.sort_layers();
+// continue 'main;
+// }
+
+// // Otherwise, this capture must represent a highlight.
+// // If this exact range has already been highlighted by an earlier pattern, or by
+// // a different layer, then skip over this one.
+// if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
+// if range.start == last_start && range.end == last_end && layer.depth < last_depth {
+// self.sort_layers();
+// continue 'main;
+// }
+// }
+
+// // If the current node was found to be a local variable, then skip over any
+// // highlighting patterns that are disabled for local variables.
+// if definition_highlight.is_some() || reference_highlight.is_some() {
+// while layer.config.non_local_variable_patterns[match_.pattern_index] {
+// match_.remove();
+// if let Some((next_match, next_capture_index)) = captures.peek() {
+// let next_capture = next_match.captures[*next_capture_index];
+// if next_capture.node == capture.node {
+// capture = next_capture;
+// match_ = captures.next().unwrap().0;
+// continue;
+// }
+// }
+
+// self.sort_layers();
+// continue 'main;
+// }
+// }
+
+// // Once a highlighting pattern is found for the current node, skip over
+// // any later highlighting patterns that also match this node. Captures
+// // for a given node are ordered by pattern index, so these subsequent
+// // captures are guaranteed to be for highlighting, not injections or
+// // local variables.
+// while let Some((next_match, next_capture_index)) = captures.peek() {
+// let next_capture = next_match.captures[*next_capture_index];
+// if next_capture.node == capture.node {
+// captures.next();
+// } else {
+// break;
+// }
+// }
+
+// let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
+
+// // If this node represents a local definition, then store the current
+// // highlight value on the local scope entry representing this node.
+// if let Some(definition_highlight) = definition_highlight {
+// *definition_highlight = current_highlight;
+// }
+
+// // Emit a scope start event and push the node's end position to the stack.
+// if let Some(highlight) = reference_highlight.or(current_highlight) {
+// self.last_highlight_range = Some((range.start, range.end, layer.depth));
+// layer.highlight_end_stack.push(range.end);
+// return self
+// .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
+// }
+
+// self.sort_layers();
+// }
+// }
+// }
+
+// impl Syntax {
+// /// Iterate over the highlighted regions for a given slice of source code.
+// pub fn highlight_iter<'a>(
+// &'a self,
+// source: RopeSlice<'a>,
+// range: Option<std::ops::Range<usize>>,
+// cancellation_flag: Option<&'a AtomicUsize>,
+// ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
+// let mut layers = self
+// .layers
+// .iter()
+// .filter_map(|(_, layer)| {
+// // TODO: if range doesn't overlap layer range, skip it
+
+// // Reuse a cursor from the pool if available.
+// let mut cursor = PARSER.with(|ts_parser| {
+// let highlighter = &mut ts_parser.borrow_mut();
+// highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
+// });
+
+// // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
+// // prevents them from being moved. But both of these values are really just
+// // pointers, so it's actually ok to move them.
+// let cursor_ref =
+// unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
+
+// // if reusing cursors & no range this resets to whole range
+// cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
+// cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+
+// let mut captures = cursor_ref
+// .captures(
+// &layer.config.query,
+// layer.tree().root_node(),
+// RopeProvider(source),
+// )
+// .peekable();
+
+// // If there's no captures, skip the layer
+// captures.peek()?;
+
+// Some(HighlightIterLayer {
+// highlight_end_stack: Vec::new(),
+// scope_stack: vec![LocalScope {
+// inherits: false,
+// range: 0..usize::MAX,
+// local_defs: Vec::new(),
+// }],
+// cursor,
+// _tree: None,
+// captures: RefCell::new(captures),
+// config: layer.config.as_ref(), // TODO: just reuse `layer`
+// depth: layer.depth, // TODO: just reuse `layer`
+// })
+// })
+// .collect::<Vec<_>>();
+
+// layers.sort_unstable_by_key(|layer| layer.sort_key());
+
+// let mut result = HighlightIter {
+// source,
+// byte_offset: range.map_or(0, |r| r.start),
+// cancellation_flag,
+// iter_count: 0,
+// layers,
+// next_event: None,
+// last_highlight_range: None,
+// };
+// result.sort_layers();
+// result
+// }
+// }
diff --git a/helix-syntax/src/highlighter2.rs b/helix-syntax/src/highlighter2.rs
new file mode 100644
index 00000000..9abe5726
--- /dev/null
+++ b/helix-syntax/src/highlighter2.rs
@@ -0,0 +1,206 @@
+use std::borrow::Cow;
+use std::iter::{self, Peekable};
+use std::mem::{replace, take};
+use std::slice;
+
+use hashbrown::HashMap;
+
+use crate::query_iter::{MatchedNode, QueryIter, QueryIterEvent};
+use crate::{Injection, LayerId, Range, Syntax};
+
+/// Indicates which highlight should be applied to a region of source code.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Highlight(pub u32);
+impl Highlight{
+ pub(crate) const NONE = Highlight(u32::MAX);
+}
+
+#[derive(Debug)]
+struct LocalDef<'a> {
+ name: Cow<'a, str>,
+ value_range: Range,
+ highlight: Option<Highlight>,
+}
+
+#[derive(Debug)]
+struct LocalScope<'a> {
+ inherits: bool,
+ range: Range,
+ local_defs: Vec<LocalDef<'a>>,
+}
+
+#[derive(Debug)]
+struct HighlightedNode {
+ end: u32,
+ highlight: Highlight,
+}
+
+#[derive(Debug, Default)]
+struct LayerData<'a> {
+ parent_highlights: usize,
+ dormant_highlights: Vec<HighlightedNode>,
+ scope_stack: Vec<LocalDef<'a>>,
+}
+
+struct HighlighterConfig<'a> {
+ new_precedance: bool,
+ highlight_indices: &'a [Highlight],
+}
+
+pub struct Highligther<'a> {
+ query: QueryIter<'a, LayerData<'a>>,
+ next_query_event: Option<QueryIterEvent<LayerData<'a>>>,
+ active_highlights: Vec<HighlightedNode>,
+ next_highlight_end: u32,
+ next_highlight_start: u32,
+ config: HighlighterConfig<'a>,
+}
+
+pub struct HighlightList<'a>(slice::Iter<'a, HighlightedNode>);
+
+impl<'a> Iterator for HighlightList<'a> {
+ type Item = Highlight;
+
+ fn next(&mut self) -> Option<Highlight> {
+ self.0.next().map(|node| node.highlight)
+ }
+}
+
+pub enum HighlighEvent<'a> {
+ RefreshHiglights(HighlightList<'a>),
+ PushHighlights(HighlightList<'a>),
+}
+
+impl<'a> Highligther<'a> {
+ pub fn active_highlights(&self) -> HighlightList<'_> {
+ HighlightList(self.active_highlights.iter())
+ }
+
+ pub fn next_event_offset(&self) -> u32 {
+ self.next_highlight_start.min(self.next_highlight_end)
+ }
+
+ pub fn advance(&mut self) -> HighlighEvent<'_> {
+ let mut refresh = false;
+ let prev_stack_size = self.active_highlights.len();
+
+ let pos = self.next_event_offset();
+ if self.next_highlight_end == pos {
+ self.process_injection_ends();
+ self.process_higlight_end();
+ refresh = true;
+ }
+
+ let mut first_highlight = true;
+ while self.next_highlight_start == pos {
+ let Some(query_event) = self.adance_query_iter() else {
+ break;
+ };
+ match query_event {
+ QueryIterEvent::EnterInjection(_) => self.enter_injection(),
+ QueryIterEvent::Match(node) => self.start_highlight(node, &mut first_highlight),
+ QueryIterEvent::ExitInjection { injection, state } => {
+ // state is returned if the layer is finifhed, if it isn't we have
+ // a combined injection and need to deactive its highlights
+ if state.is_none() {
+ self.deactive_layer(injection.layer);
+ refresh = true;
+ }
+ }
+ }
+ }
+ self.next_highlight_end = self
+ .active_highlights
+ .last()
+ .map_or(u32::MAX, |node| node.end);
+
+ if refresh {
+ HighlighEvent::RefreshHiglights(HighlightList(self.active_highlights.iter()))
+ } else {
+ HighlighEvent::PushHighlights(HighlightList(
+ self.active_highlights[prev_stack_size..].iter(),
+ ))
+ }
+ }
+
+ fn adance_query_iter(&mut self) -> Option<QueryIterEvent<LayerData<'a>>> {
+ let event = replace(&mut self.next_query_event, self.query.next());
+ self.next_highlight_start = self
+ .next_query_event
+ .as_ref()
+ .map_or(u32::MAX, |event| event.start());
+ event
+ }
+
+ fn process_higlight_end(&mut self) {
+ let i = self
+ .active_highlights
+ .iter()
+ .rposition(|highlight| highlight.end != self.next_highlight_end)
+ .unwrap();
+ self.active_highlights.truncate(i);
+ }
+
+ /// processes injections that end at the same position as highlights first.
+ fn process_injection_ends(&mut self) {
+ while self.next_highlight_end == self.next_highlight_start {
+ match self.next_query_event.as_ref() {
+ Some(QueryIterEvent::ExitInjection { injection, state }) => {
+ if state.is_none() {
+ self.deactive_layer(injection.layer);
+ }
+ }
+ Some(QueryIterEvent::Match(matched_node)) if matched_node.byte_range.is_empty() => {
+ }
+ _ => break,
+ }
+ }
+ }
+
+ fn enter_injection(&mut self) {
+ self.query.current_layer_state().parent_highlights = self.active_highlights.len();
+ }
+
+ fn deactive_layer(&mut self, layer: LayerId) {
+ let LayerData {
+ parent_highlights,
+ ref mut dormant_highlights,
+ ..
+ } = *self.query.layer_state(layer);
+ let i = self.active_highlights[parent_highlights..]
+ .iter()
+ .rposition(|highlight| highlight.end != self.next_highlight_end)
+ .unwrap();
+ self.active_highlights.truncate(parent_highlights + i);
+ dormant_highlights.extend(self.active_highlights.drain(parent_highlights..))
+ }
+
+ fn start_highlight(&mut self, node: MatchedNode, first_highlight: &mut bool) {
+ if node.byte_range.is_empty() {
+ return;
+ }
+
+ // if there are multiple matches for the exact same node
+ // only use one of the (the last with new/nvim precedance)
+ if !*first_highlight
+ && self.active_highlights.last().map_or(false, |prev_node| {
+ prev_node.end == node.byte_range.end as u32
+ })
+ {
+ if self.config.new_precedance {
+ self.active_highlights.pop();
+ } else {
+ return;
+ }
+ }
+ let highlight = self.config.highlight_indices[node.capture.idx()];
+ if highlight.0 == u32::MAX {
+ return;
+ }
+ self.active_highlights.push(HighlightedNode {
+ end: node.byte_range.end as u32,
+ highlight,
+ });
+ *first_highlight = false;
+ }
+}
diff --git a/helix-syntax/src/injections_tree.rs b/helix-syntax/src/injections_tree.rs
deleted file mode 100644
index 2290a0e6..00000000
--- a/helix-syntax/src/injections_tree.rs
+++ /dev/null
@@ -1,268 +0,0 @@
-use core::slice;
-use std::cell::RefCell;
-use std::iter::Peekable;
-use std::mem::replace;
-use std::sync::Arc;
-
-use hashbrown::HashMap;
-use ropey::RopeSlice;
-use slotmap::{new_key_type, SlotMap};
-
-use crate::parse::LayerUpdateFlags;
-use crate::tree_sitter::{
- self, Capture, InactiveQueryCursor, Parser, Query, QueryCursor, RopeTsInput, SyntaxTree,
- SyntaxTreeNode,
-};
-use crate::HighlightConfiguration;
-
-// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
-type Range = std::ops::Range<usize>;
-
-new_key_type! {
- /// The default slot map key type.
- pub struct LayerId;
-}
-
-#[derive(Debug)]
-pub struct LanguageLayer {
- pub config: Arc<HighlightConfiguration>,
- pub(crate) parse_tree: Option<SyntaxTree>,
- /// internal flags used during parsing to track incremental invalidation
- pub(crate) flags: LayerUpdateFlags,
- ranges: Vec<tree_sitter::Range>,
- pub(crate) parent: Option<LayerId>,
- /// a list of **sorted** non-overlapping injection ranges. Note that
- /// injection ranges are not relative to the start of this layer but the
- /// start of the root layer
- pub(crate) injections: Box<[Injection]>,
-}
-
-#[derive(Debug, Clone)]
-pub(crate) struct Injection {
- pub byte_range: Range,
- pub layer: LayerId,
-}
-
-impl LanguageLayer {
- /// Returns the injection range **within this layers** that contains `idx`.
- /// This function will not descend into nested injections
- pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
- let i = self
- .injections
- .partition_point(|range| range.byte_range.start <= idx);
- self.injections
- .get(i)
- .filter(|injection| injection.byte_range.end > idx)
- }
-}
-
-struct InjectionTree {
- layers: SlotMap<LayerId, LanguageLayer>,
- root: LayerId,
-}
-
-impl InjectionTree {
- pub fn layer_for_byte_range(&self, start: usize, end: usize) -> LayerId {
- let mut cursor = self.root;
- loop {
- let layer = &self.layers[cursor];
- let Some(start_injection) = layer.injection_at_byte_idx(start) else {
- break;
- };
- let Some(end_injection) = layer.injection_at_byte_idx(end) else {
- break;
- };
- if start_injection.layer == end_injection.layer {
- cursor = start_injection.layer;
- } else {
- break;
- }
- }
- cursor
- }
-}
-
-#[derive(Clone)]
-pub struct MatchedNode {
- pub capture: Capture,
- pub byte_range: Range,
-}
-
-struct LayerQueryIter<'a> {
- cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
- peeked: Option<MatchedNode>,
-}
-
-impl<'a> LayerQueryIter<'a> {
- fn peek(&mut self) -> Option<&MatchedNode> {
- if self.peeked.is_none() {
- let (query_match, node_idx) = self.cursor.next_matched_node()?;
- let matched_node = query_match.matched_node(node_idx);
- self.peeked = Some(MatchedNode {
- capture: matched_node.capture,
- byte_range: matched_node.syntax_node.byte_range(),
- });
- }
- self.peeked.as_ref()
- }
-
- fn consume(&mut self) -> MatchedNode {
- self.peeked.take().unwrap()
- }
-}
-
-struct ActiveLayer<'a> {
- query_iter: LayerQueryIter<'a>,
- injections: Peekable<slice::Iter<'a, Injection>>,
-}
-
-struct QueryBuilder<'a, 'tree> {
- query: &'a Query,
- node: &'a SyntaxTreeNode<'tree>,
- src: RopeSlice<'a>,
- injection_tree: &'a InjectionTree,
-}
-
-pub struct QueryIter<'a, 'tree> {
- query_builder: Box<QueryBuilder<'a, 'tree>>,
- active_layers: HashMap<LayerId, ActiveLayer<'a>>,
- active_injections: Vec<Injection>,
- current_injection: Injection,
-}
-
-impl<'a> QueryIter<'a, '_> {
- fn enter_injection(&mut self, injection: Injection) -> bool {
- self.active_layers
- .entry(injection.layer)
- .or_insert_with(|| {
- let layer = &self.query_builder.injection_tree.layers[injection.layer];
- let injection_start = layer
- .injections
- .partition_point(|child| child.byte_range.start < injection.byte_range.start);
- let cursor = get_cursor().execute_query(
- self.query_builder.query,
- self.query_builder.node,
- RopeTsInput::new(self.query_builder.src),
- );
- ActiveLayer {
- query_iter: LayerQueryIter {
- cursor,
- peeked: None,
- },
- injections: layer.injections[injection_start..].iter().peekable(),
- }
- });
- let old_injection = replace(&mut self.current_injection, injection);
- self.active_injections.push(old_injection);
- true
- }
-
- fn exit_injection(&mut self) -> Option<Injection> {
- let injection = replace(&mut self.current_injection, self.active_injections.pop()?);
- let finished_layer = self.active_layers[&injection.layer]
- .query_iter
- .peeked
- .is_none();
- if finished_layer {
- let layer = self.active_layers.remove(&injection.layer).unwrap();
- reuse_cursor(layer.query_iter.cursor.reuse());
- }
- Some(injection)
- }
-}
-
-pub enum QueryIterEvent {
- EnterInjection(Injection),
- Match(MatchedNode),
- ExitInjection(Injection),
-}
-
-impl<'a> Iterator for QueryIter<'a, '_> {
- type Item = QueryIterEvent;
-
- fn next(&mut self) -> Option<QueryIterEvent> {
- loop {
- let active_layer = self
- .active_layers
- .get_mut(&self.current_injection.layer)
- .unwrap();
- let next_injection = active_layer.injections.peek().filter(|injection| {
- injection.byte_range.start < self.current_injection.byte_range.end
- });
- let next_match = active_layer.query_iter.peek().filter(|matched_node| {
- matched_node.byte_range.start < self.current_injection.byte_range.end
- });
-
- match (next_match, next_injection) {
- (None, None) => {
- return self.exit_injection().map(QueryIterEvent::ExitInjection);
- }
- (Some(_), None) => {
- // consume match
- let matched_node = active_layer.query_iter.consume();
- return Some(QueryIterEvent::Match(matched_node));
- }
- (Some(matched_node), Some(injection))
- if matched_node.byte_range.start <= injection.byte_range.end =>
- {
- // consume match
- let matched_node = active_layer.query_iter.consume();
- // ignore nodes that are overlapped by the injection
- if matched_node.byte_range.start <= injection.byte_range.start {
- return Some(QueryIterEvent::Match(matched_node));
- }
- }
- (Some(_), Some(_)) | (None, Some(_)) => {
- // consume injection
- let injection = active_layer.injections.next().unwrap();
- if self.enter_injection(injection.clone()) {
- return Some(QueryIterEvent::EnterInjection(injection.clone()));
- }
- }
- }
- }
- }
-}
-
-struct TsParser {
- parser: crate::tree_sitter::Parser,
- pub cursors: Vec<crate::tree_sitter::InactiveQueryCursor>,
-}
-
-// could also just use a pool, or a single instance?
-thread_local! {
- static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
- parser: Parser::new(),
- cursors: Vec::new(),
- })
-}
-
-pub fn with_cursor<T>(f: impl FnOnce(&mut InactiveQueryCursor) -> T) -> T {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
- let mut cursor = parser
- .cursors
- .pop()
- .unwrap_or_else(InactiveQueryCursor::new);
- let res = f(&mut cursor);
- parser.cursors.push(cursor);
- res
- })
-}
-
-pub fn get_cursor() -> InactiveQueryCursor {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
- parser
- .cursors
- .pop()
- .unwrap_or_else(InactiveQueryCursor::new)
- })
-}
-
-pub fn reuse_cursor(cursor: InactiveQueryCursor) {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
- parser.cursors.push(cursor)
- })
-}
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
index b7331a3a..593fe3de 100644
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@@ -1,33 +1,82 @@
use ::ropey::RopeSlice;
-use ::tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};
-use slotmap::HopSlotMap;
+use slotmap::{new_key_type, HopSlotMap};
use std::borrow::Cow;
-use std::cell::RefCell;
use std::hash::{Hash, Hasher};
use std::path::Path;
use std::str;
use std::sync::Arc;
-use crate::injections_tree::LayerId;
use crate::parse::LayerUpdateFlags;
pub use crate::config::{read_query, HighlightConfiguration};
-pub use crate::ropey::RopeProvider;
-pub use merge::merge;
+use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode};
pub use pretty_print::pretty_print_tree;
pub use tree_cursor::TreeCursor;
mod config;
pub mod highlighter;
-mod injections_tree;
-mod merge;
+pub mod highlighter2;
mod parse;
mod pretty_print;
-mod ropey;
+mod query_iter;
+pub mod text_object;
mod tree_cursor;
pub mod tree_sitter;
+new_key_type! {
+ /// The default slot map key type.
+ pub struct LayerId;
+}
+
+/// The maximum number of in-progress matches a TS cursor can consider at once.
+/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
+/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
+///
+///
+/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
+/// However, this causes performance issues for medium to large files.
+/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
+///
+///
+/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
+/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
+/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
+///
+///
+/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
+/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
+/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
+pub const TREE_SITTER_MATCH_LIMIT: u32 = 256;
+
+// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
+type Range = std::ops::Range<usize>;
+
+/// The Tree siitter syntax tree for a single language.
+
+/// This is really multipe nested different syntax trees due to tree sitter
+/// injections. A single syntax tree/parser is called layer. Each layer
+/// is parsed as a single "file" by tree sitter. There can be multiple layers
+/// for the same language. A layer corresponds to one of three things:
+/// * the root layer
+/// * a singular injection limited to a single node in it's parent layer
+/// * Multiple injections (multiple disjoint nodes in parent layer) that are
+/// parsed as tough they are a single uninterrupted file.
+///
+/// An injection always refer to a single node into which another layer is
+/// injected. As injections only correspond to syntax tree nodes injections in
+/// the same layer do not intersect. However, the syntax tree in a an injected
+/// layer can have nodes that intersect with nodes from the parent layer. For
+/// example:
+/// ```
+/// layer2: | Sibling A | Sibling B (layer3) | Sibling C |
+/// layer1: | Sibling A (layer2) | Sibling B | Sibling C (layer2) |
+/// ````
+/// In this case Sibling B really spans across a "GAP" in layer2. While the syntax
+/// node can not be split up by tree sitter directly, we can treat Sibling B as two
+/// seperate injections. That is done while parsing/running the query capture. As
+/// a result the injections from a tree. Note that such other queries must account for
+/// such multi injection nodes.
#[derive(Debug)]
pub struct Syntax {
layers: HopSlotMap<LayerId, LanguageLayer>,
@@ -41,16 +90,20 @@ impl Syntax {
injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
) -> Option<Self> {
let root_layer = LanguageLayer {
- tree: None,
+ parse_tree: None,
config,
- depth: 0,
flags: LayerUpdateFlags::empty(),
- ranges: vec![Range {
+ ranges: vec![tree_sitter::Range {
start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
+ end_byte: u32::MAX,
+ start_point: tree_sitter::Point { row: 0, col: 0 },
+ end_point: tree_sitter::Point {
+ row: u32::MAX,
+ col: u32::MAX,
+ },
+ }]
+ .into_boxed_slice(),
+ injections: Box::new([]),
parent: None,
};
@@ -70,49 +123,75 @@ impl Syntax {
Some(syntax)
}
- pub fn tree(&self) -> &Tree {
+ pub fn tree(&self) -> &SyntaxTree {
self.layers[self.root].tree()
}
- pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
- let mut container_id = self.root;
-
- for (layer_id, layer) in self.layers.iter() {
- if layer.depth > self.layers[container_id].depth
- && layer.contains_byte_range(start, end)
- {
- container_id = layer_id;
- }
- }
-
- self.layers[container_id].tree()
+ pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &SyntaxTree {
+ let layer = self.layer_for_byte_range(start, end);
+ self.layers[layer].tree()
}
- pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
+ pub fn named_descendant_for_byte_range(
+ &self,
+ start: usize,
+ end: usize,
+ ) -> Option<SyntaxTreeNode<'_>> {
self.tree_for_byte_range(start, end)
.root_node()
.named_descendant_for_byte_range(start, end)
}
- pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
+ pub fn descendant_for_byte_range(
+ &self,
+ start: usize,
+ end: usize,
+ ) -> Option<SyntaxTreeNode<'_>> {
self.tree_for_byte_range(start, end)
.root_node()
.descendant_for_byte_range(start, end)
}
+ pub fn layer_for_byte_range(&self, start: usize, end: usize) -> LayerId {
+ let mut cursor = self.root;
+ loop {
+ let layer = &self.layers[cursor];
+ let Some(start_injection) = layer.injection_at_byte_idx(start) else {
+ break;
+ };
+ let Some(end_injection) = layer.injection_at_byte_idx(end) else {
+ break;
+ };
+ if start_injection.layer == end_injection.layer {
+ cursor = start_injection.layer;
+ } else {
+ break;
+ }
+ }
+ cursor
+ }
+
pub fn walk(&self) -> TreeCursor<'_> {
TreeCursor::new(&self.layers, self.root)
}
}
+#[derive(Debug, Clone)]
+pub(crate) struct Injection {
+ pub byte_range: Range,
+ pub layer: LayerId,
+}
+
#[derive(Debug)]
pub struct LanguageLayer {
- // mode
- // grammar
pub config: Arc<HighlightConfiguration>,
- pub(crate) tree: Option<Tree>,
- pub ranges: Vec<Range>,
- pub depth: u32,
+ parse_tree: Option<SyntaxTree>,
+ ranges: Box<[tree_sitter::Range]>,
+ /// a list of **sorted** non-overlapping injection ranges. Note that
+ /// injection ranges are not relative to the start of this layer but the
+ /// start of the root layer
+ injections: Box<[Injection]>,
+ /// internal flags used during parsing to track incremental invalidation
flags: LayerUpdateFlags,
parent: Option<LayerId>,
}
@@ -123,8 +202,8 @@ pub struct LanguageLayer {
/// state.
impl PartialEq for LanguageLayer {
fn eq(&self, other: &Self) -> bool {
- self.depth == other.depth
- && self.config.language == other.config.language
+ self.parent == other.parent
+ && self.config.grammar == other.config.grammar
&& self.ranges == other.ranges
}
}
@@ -133,165 +212,27 @@ impl PartialEq for LanguageLayer {
/// See its documentation for details.
impl Hash for LanguageLayer {
fn hash<H: Hasher>(&self, state: &mut H) {
- self.depth.hash(state);
- self.config.language.hash(state);
+ self.parent.hash(state);
+ self.config.grammar.hash(state);
self.ranges.hash(state);
}
}
impl LanguageLayer {
- pub fn tree(&self) -> &Tree {
+ pub fn tree(&self) -> &SyntaxTree {
// TODO: no unwrap
- self.tree.as_ref().unwrap()
- }
-
- /// Whether the layer contains the given byte range.
- ///
- /// If the layer has multiple ranges (i.e. combined injections), the
- /// given range is considered contained if it is within the start and
- /// end bytes of the first and last ranges **and** if the given range
- /// starts or ends within any of the layer's ranges.
- fn contains_byte_range(&self, start: usize, end: usize) -> bool {
- let layer_start = self
- .ranges
- .first()
- .expect("ranges should not be empty")
- .start_byte;
- let layer_end = self
- .ranges
- .last()
- .expect("ranges should not be empty")
- .end_byte;
-
- layer_start <= start
- && layer_end >= end
- && self.ranges.iter().any(|range| {
- let byte_range = range.start_byte..range.end_byte;
- byte_range.contains(&start) || byte_range.contains(&end)
- })
- }
-}
-
-#[derive(Debug, Clone)]
-pub enum InjectionLanguageMarker<'a> {
- Name(Cow<'a, str>),
- Filename(Cow<'a, Path>),
- Shebang(String),
-}
-
-const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
-
-#[derive(Debug)]
-pub enum CapturedNode<'a> {
- Single(Node<'a>),
- /// Guaranteed to be not empty
- Grouped(Vec<Node<'a>>),
-}
-
-impl<'a> CapturedNode<'a> {
- pub fn start_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.start_byte(),
- Self::Grouped(ns) => ns[0].start_byte(),
- }
- }
-
- pub fn end_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.end_byte(),
- Self::Grouped(ns) => ns.last().unwrap().end_byte(),
- }
+ self.parse_tree.as_ref().unwrap()
}
- pub fn byte_range(&self) -> std::ops::Range<usize> {
- self.start_byte()..self.end_byte()
- }
-}
-
-/// The maximum number of in-progress matches a TS cursor can consider at once.
-/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
-/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
-///
-///
-/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
-/// However, this causes performance issues for medium to large files.
-/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
-///
-///
-/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
-/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
-/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
-///
-///
-/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
-/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
-/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
-const TREE_SITTER_MATCH_LIMIT: u32 = 256;
-
-#[derive(Debug)]
-pub struct TextObjectQuery {
- pub query: Query,
-}
-
-impl TextObjectQuery {
- /// Run the query on the given node and return sub nodes which match given
- /// capture ("function.inside", "class.around", etc).
- ///
- /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
- /// and support for this is partial and could use improvement.
- ///
- /// ```query
- /// (comment)+ @capture
- ///
- /// ; OR
- /// (
- /// (comment)*
- /// .
- /// (function)
- /// ) @capture
- /// ```
- pub fn capture_nodes<'a>(
- &'a self,
- capture_name: &str,
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- self.capture_nodes_any(&[capture_name], node, slice, cursor)
- }
-
- /// Find the first capture that exists out of all given `capture_names`
- /// and return sub nodes that match this capture.
- pub fn capture_nodes_any<'a>(
- &'a self,
- capture_names: &[&str],
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- let capture_idx = capture_names
- .iter()
- .find_map(|cap| self.query.capture_index_for_name(cap))?;
-
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let nodes = cursor
- .captures(&self.query, node, RopeProvider(slice))
- .filter_map(move |(mat, _)| {
- let nodes: Vec<_> = mat
- .captures
- .iter()
- .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
- .collect();
-
- if nodes.len() > 1 {
- Some(CapturedNode::Grouped(nodes))
- } else {
- nodes.into_iter().map(CapturedNode::Single).next()
- }
- });
-
- Some(nodes)
+ /// Returns the injection range **within this layers** that contains `idx`.
+ /// This function will not descend into nested injections
+ pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
+ let i = self
+ .injections
+ .partition_point(|range| range.byte_range.start < idx);
+ self.injections
+ .get(i)
+ .filter(|injection| injection.byte_range.end > idx)
}
}
@@ -304,42 +245,6 @@ pub enum Error {
Unknown,
}
-#[derive(Clone)]
-enum IncludedChildren {
- None,
- All,
- Unnamed,
-}
-
-impl Default for IncludedChildren {
- fn default() -> Self {
- Self::None
- }
-}
-
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
Cow::from(source.byte_slice(range))
}
-
-struct TsParser {
- parser: ::tree_sitter::Parser,
- pub cursors: Vec<QueryCursor>,
-}
-
-// could also just use a pool, or a single instance?
-thread_local! {
- static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
- parser: Parser::new(),
- cursors: Vec::new(),
- })
-}
-
-pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
- PARSER.with(|parser| {
- let mut parser = parser.borrow_mut();
- let mut cursor = parser.cursors.pop().unwrap_or_default();
- let res = f(&mut cursor);
- parser.cursors.push(cursor);
- res
- })
-}
diff --git a/helix-syntax/src/parse.rs b/helix-syntax/src/parse.rs
index de70f2a1..d2903b07 100644
--- a/helix-syntax/src/parse.rs
+++ b/helix-syntax/src/parse.rs
@@ -1,18 +1,18 @@
-use std::collections::VecDeque;
-use std::mem::replace;
-use std::sync::Arc;
+// use std::collections::VecDeque;
+// use std::mem::replace;
+// use std::sync::Arc;
-use ahash::RandomState;
+// use ahash::RandomState;
use bitflags::bitflags;
-use hashbrown::raw::RawTable;
-use ropey::RopeSlice;
-use tree_sitter::{Node, Parser, Point, QueryCursor, Range};
+// use hashbrown::raw::RawTable;
+// use ropey::RopeSlice;
+// use tree_sitter::{Node, Parser, Point, QueryCursor, Range};
-use crate::ropey::RopeProvider;
-use crate::{
- Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer,
- Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
-};
+// use crate::ropey::RopeProvider;
+// use crate::{
+// Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer,
+// Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
+// };
bitflags! {
/// Flags that track the status of a layer
@@ -25,405 +25,405 @@ bitflags! {
}
}
-impl Syntax {
- pub fn update(
- &mut self,
- source: RopeSlice,
- edits: Vec<tree_sitter::InputEdit>,
- injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
- ) -> Result<(), Error> {
- let mut queue = VecDeque::new();
- queue.push_back(self.root);
-
- // This table allows inverse indexing of `layers`.
- // That is by hashing a `Layer` you can find
- // the `LayerId` of an existing equivalent `Layer` in `layers`.
- //
- // It is used to determine if a new layer exists for an injection
- // or if an existing layer needs to be updated.
- let mut layers_table = RawTable::with_capacity(self.layers.len());
- let layers_hasher = RandomState::new();
- // Use the edits to update all layers markers
- fn point_add(a: Point, b: Point) -> Point {
- if b.row > 0 {
- Point::new(a.row.saturating_add(b.row), b.column)
- } else {
- Point::new(0, a.column.saturating_add(b.column))
- }
- }
- fn point_sub(a: Point, b: Point) -> Point {
- if a.row > b.row {
- Point::new(a.row.saturating_sub(b.row), a.column)
- } else {
- Point::new(0, a.column.saturating_sub(b.column))
- }
- }
-
- for (layer_id, layer) in self.layers.iter_mut() {
- // The root layer always covers the whole range (0..usize::MAX)
- if layer.depth == 0 {
- layer.flags = LayerUpdateFlags::MODIFIED;
- continue;
- }
-
- if !edits.is_empty() {
- for range in &mut layer.ranges {
- // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
- for edit in edits.iter().rev() {
- let is_pure_insertion = edit.old_end_byte == edit.start_byte;
-
- // if edit is after range, skip
- if edit.start_byte > range.end_byte {
- // TODO: || (is_noop && edit.start_byte == range.end_byte)
- continue;
- }
-
- // if edit is before range, shift entire range by len
- if edit.old_end_byte < range.start_byte {
- range.start_byte =
- edit.new_end_byte + (range.start_byte - edit.old_end_byte);
- range.start_point = point_add(
- edit.new_end_position,
- point_sub(range.start_point, edit.old_end_position),
- );
-
- range.end_byte = edit
- .new_end_byte
- .saturating_add(range.end_byte - edit.old_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
-
- layer.flags |= LayerUpdateFlags::MOVED;
- }
- // if the edit starts in the space before and extends into the range
- else if edit.start_byte < range.start_byte {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
-
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- // If the edit is an insertion at the start of the tree, shift
- else if edit.start_byte == range.start_byte && is_pure_insertion {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
- layer.flags |= LayerUpdateFlags::MOVED;
- } else {
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- }
- }
- }
-
- let hash = layers_hasher.hash_one(layer);
- // Safety: insert_no_grow is unsafe because it assumes that the table
- // has enough capacity to hold additional elements.
- // This is always the case as we reserved enough capacity above.
- unsafe { layers_table.insert_no_grow(hash, layer_id) };
- }
-
- PARSER.with(|ts_parser| {
- let ts_parser = &mut ts_parser.borrow_mut();
- ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
- let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
- // TODO: might need to set cursor range
- cursor.set_byte_range(0..usize::MAX);
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let source_slice = source.slice(..);
-
- while let Some(layer_id) = queue.pop_front() {
- let layer = &mut self.layers[layer_id];
-
- // Mark the layer as touched
- layer.flags |= LayerUpdateFlags::TOUCHED;
-
- // If a tree already exists, notify it of changes.
- if let Some(tree) = &mut layer.tree {
- if layer
- .flags
- .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
- {
- for edit in edits.iter().rev() {
- // Apply the edits in reverse.
- // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
- tree.edit(edit);
- }
- }
-
- if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
- // Re-parse the tree.
- layer.parse(&mut ts_parser.parser, source)?;
- }
- } else {
- // always parse if this layer has never been parsed before
- layer.parse(&mut ts_parser.parser, source)?;
- }
-
- // Switch to an immutable borrow.
- let layer = &self.layers[layer_id];
-
- // Process injections.
- let matches = cursor.matches(
- &layer.config.injections_query,
- layer.tree().root_node(),
- RopeProvider(source_slice),
- );
- let mut combined_injections = vec![
- (None, Vec::new(), IncludedChildren::default());
- layer.config.combined_injections_patterns.len()
- ];
- let mut injections = Vec::new();
- let mut last_injection_end = 0;
- for mat in matches {
- let (injection_capture, content_node, included_children) = layer
- .config
- .injection_for_match(&layer.config.injections_query, &mat, source_slice);
-
- // in case this is a combined injection save it for more processing later
- if let Some(combined_injection_idx) = layer
- .config
- .combined_injections_patterns
- .iter()
- .position(|&pattern| pattern == mat.pattern_index)
- {
- let entry = &mut combined_injections[combined_injection_idx];
- if injection_capture.is_some() {
- entry.0 = injection_capture;
- }
- if let Some(content_node) = content_node {
- if content_node.start_byte() >= last_injection_end {
- entry.1.push(content_node);
- last_injection_end = content_node.end_byte();
- }
- }
- entry.2 = included_children;
- continue;
- }
-
- // Explicitly remove this match so that none of its other captures will remain
- // in the stream of captures.
- mat.remove();
-
- // If a language is found with the given name, then add a new language layer
- // to the highlighted document.
- if let (Some(injection_capture), Some(content_node)) =
- (injection_capture, content_node)
- {
- if let Some(config) = (injection_callback)(&injection_capture) {
- let ranges =
- intersect_ranges(&layer.ranges, &[content_node], included_children);
-
- if !ranges.is_empty() {
- if content_node.start_byte() < last_injection_end {
- continue;
- }
- last_injection_end = content_node.end_byte();
- injections.push((config, ranges));
- }
- }
- }
- }
-
- for (lang_name, content_nodes, included_children) in combined_injections {
- if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
- if let Some(config) = (injection_callback)(&lang_name) {
- let ranges =
- intersect_ranges(&layer.ranges, &content_nodes, included_children);
- if !ranges.is_empty() {
- injections.push((config, ranges));
- }
- }
- }
- }
-
- let depth = layer.depth + 1;
- // TODO: can't inline this since matches borrows self.layers
- for (config, ranges) in injections {
- let parent = Some(layer_id);
- let new_layer = LanguageLayer {
- tree: None,
- config,
- depth,
- ranges,
- flags: LayerUpdateFlags::empty(),
- parent: None,
- };
-
- // Find an identical existing layer
- let layer = layers_table
- .get(layers_hasher.hash_one(&new_layer), |&it| {
- self.layers[it] == new_layer
- })
- .copied();
-
- // ...or insert a new one.
- let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
- self.layers[layer_id].parent = parent;
-
- queue.push_back(layer_id);
- }
-
- // TODO: pre-process local scopes at this time, rather than highlight?
- // would solve problems with locals not working across boundaries
- }
-
- // Return the cursor back in the pool.
- ts_parser.cursors.push(cursor);
-
- // Reset all `LayerUpdateFlags` and remove all untouched layers
- self.layers.retain(|_, layer| {
- replace(&mut layer.flags, LayerUpdateFlags::empty())
- .contains(LayerUpdateFlags::TOUCHED)
- });
-
- Ok(())
- })
- }
-}
-
-/// Compute the ranges that should be included when parsing an injection.
-/// This takes into account three things:
-/// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
-/// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
-/// are the ranges of those nodes.
-/// * `includes_children` - For some injections, the content nodes' children should be
-/// excluded from the nested document, so that only the content nodes' *own* content
-/// is reparsed. For other injections, the content nodes' entire ranges should be
-/// reparsed, including the ranges of their children.
-fn intersect_ranges(
- parent_ranges: &[Range],
- nodes: &[Node],
- included_children: IncludedChildren,
-) -> Vec<Range> {
- let mut cursor = nodes[0].walk();
- let mut result = Vec::new();
- let mut parent_range_iter = parent_ranges.iter();
- let mut parent_range = parent_range_iter
- .next()
- .expect("Layers should only be constructed with non-empty ranges vectors");
- for node in nodes.iter() {
- let mut preceding_range = Range {
- start_byte: 0,
- start_point: Point::new(0, 0),
- end_byte: node.start_byte(),
- end_point: node.start_position(),
- };
- let following_range = Range {
- start_byte: node.end_byte(),
- start_point: node.end_position(),
- end_byte: usize::MAX,
- end_point: Point::new(usize::MAX, usize::MAX),
- };
-
- for excluded_range in node
- .children(&mut cursor)
- .filter_map(|child| match included_children {
- IncludedChildren::None => Some(child.range()),
- IncludedChildren::All => None,
- IncludedChildren::Unnamed => {
- if child.is_named() {
- Some(child.range())
- } else {
- None
- }
- }
- })
- .chain([following_range].iter().cloned())
- {
- let mut range = Range {
- start_byte: preceding_range.end_byte,
- start_point: preceding_range.end_point,
- end_byte: excluded_range.start_byte,
- end_point: excluded_range.start_point,
- };
- preceding_range = excluded_range;
-
- if range.end_byte < parent_range.start_byte {
- continue;
- }
-
- while parent_range.start_byte <= range.end_byte {
- if parent_range.end_byte > range.start_byte {
- if range.start_byte < parent_range.start_byte {
- range.start_byte = parent_range.start_byte;
- range.start_point = parent_range.start_point;
- }
-
- if parent_range.end_byte < range.end_byte {
- if range.start_byte < parent_range.end_byte {
- result.push(Range {
- start_byte: range.start_byte,
- start_point: range.start_point,
- end_byte: parent_range.end_byte,
- end_point: parent_range.end_point,
- });
- }
- range.start_byte = parent_range.end_byte;
- range.start_point = parent_range.end_point;
- } else {
- if range.start_byte < range.end_byte {
- result.push(range);
- }
- break;
- }
- }
-
- if let Some(next_range) = parent_range_iter.next() {
- parent_range = next_range;
- } else {
- return result;
- }
- }
- }
- }
- result
-}
-
-impl LanguageLayer {
- fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
- parser
- .set_included_ranges(&self.ranges)
- .map_err(|_| Error::InvalidRanges)?;
-
- parser
- .set_language(&self.config.language)
- .map_err(|_| Error::InvalidLanguage)?;
-
- // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
- let tree = parser
- .parse_with(
- &mut |byte, _| {
- if byte <= source.len_bytes() {
- let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
- &chunk.as_bytes()[byte - start_byte..]
- } else {
- // out of range
- &[]
- }
- },
- self.tree.as_ref(),
- )
- .ok_or(Error::Cancelled)?;
- // unsafe { ts_parser.parser.set_cancellation_flag(None) };
- self.tree = Some(tree);
- Ok(())
- }
-}
+// impl Syntax {
+// pub fn update(
+// &mut self,
+// source: RopeSlice,
+// edits: Vec<tree_sitter::InputEdit>,
+// injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
+// ) -> Result<(), Error> {
+// let mut queue = VecDeque::new();
+// queue.push_back(self.root);
+
+// // This table allows inverse indexing of `layers`.
+// // That is by hashing a `Layer` you can find
+// // the `LayerId` of an existing equivalent `Layer` in `layers`.
+// //
+// // It is used to determine if a new layer exists for an injection
+// // or if an existing layer needs to be updated.
+// let mut layers_table = RawTable::with_capacity(self.layers.len());
+// let layers_hasher = RandomState::new();
+// // Use the edits to update all layers markers
+// fn point_add(a: Point, b: Point) -> Point {
+// if b.row > 0 {
+// Point::new(a.row.saturating_add(b.row), b.column)
+// } else {
+// Point::new(0, a.column.saturating_add(b.column))
+// }
+// }
+// fn point_sub(a: Point, b: Point) -> Point {
+// if a.row > b.row {
+// Point::new(a.row.saturating_sub(b.row), a.column)
+// } else {
+// Point::new(0, a.column.saturating_sub(b.column))
+// }
+// }
+
+// for (layer_id, layer) in self.layers.iter_mut() {
+// // The root layer always covers the whole range (0..usize::MAX)
+// if layer.depth == 0 {
+// layer.flags = LayerUpdateFlags::MODIFIED;
+// continue;
+// }
+
+// if !edits.is_empty() {
+// for range in &mut layer.ranges {
+// // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
+// for edit in edits.iter().rev() {
+// let is_pure_insertion = edit.old_end_byte == edit.start_byte;
+
+// // if edit is after range, skip
+// if edit.start_byte > range.end_byte {
+// // TODO: || (is_noop && edit.start_byte == range.end_byte)
+// continue;
+// }
+
+// // if edit is before range, shift entire range by len
+// if edit.old_end_byte < range.start_byte {
+// range.start_byte =
+// edit.new_end_byte + (range.start_byte - edit.old_end_byte);
+// range.start_point = point_add(
+// edit.new_end_position,
+// point_sub(range.start_point, edit.old_end_position),
+// );
+
+// range.end_byte = edit
+// .new_end_byte
+// .saturating_add(range.end_byte - edit.old_end_byte);
+// range.end_point = point_add(
+// edit.new_end_position,
+// point_sub(range.end_point, edit.old_end_position),
+// );
+
+// layer.flags |= LayerUpdateFlags::MOVED;
+// }
+// // if the edit starts in the space before and extends into the range
+// else if edit.start_byte < range.start_byte {
+// range.start_byte = edit.new_end_byte;
+// range.start_point = edit.new_end_position;
+
+// range.end_byte = range
+// .end_byte
+// .saturating_sub(edit.old_end_byte)
+// .saturating_add(edit.new_end_byte);
+// range.end_point = point_add(
+// edit.new_end_position,
+// point_sub(range.end_point, edit.old_end_position),
+// );
+// layer.flags = LayerUpdateFlags::MODIFIED;
+// }
+// // If the edit is an insertion at the start of the tree, shift
+// else if edit.start_byte == range.start_byte && is_pure_insertion {
+// range.start_byte = edit.new_end_byte;
+// range.start_point = edit.new_end_position;
+// layer.flags |= LayerUpdateFlags::MOVED;
+// } else {
+// range.end_byte = range
+// .end_byte
+// .saturating_sub(edit.old_end_byte)
+// .saturating_add(edit.new_end_byte);
+// range.end_point = point_add(
+// edit.new_end_position,
+// point_sub(range.end_point, edit.old_end_position),
+// );
+// layer.flags = LayerUpdateFlags::MODIFIED;
+// }
+// }
+// }
+// }
+
+// let hash = layers_hasher.hash_one(layer);
+// // Safety: insert_no_grow is unsafe because it assumes that the table
+// // has enough capacity to hold additional elements.
+// // This is always the case as we reserved enough capacity above.
+// unsafe { layers_table.insert_no_grow(hash, layer_id) };
+// }
+
+// PARSER.with(|ts_parser| {
+// let ts_parser = &mut ts_parser.borrow_mut();
+// ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
+// let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+// // TODO: might need to set cursor range
+// cursor.set_byte_range(0..usize::MAX);
+// cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+
+// let source_slice = source.slice(..);
+
+// while let Some(layer_id) = queue.pop_front() {
+// let layer = &mut self.layers[layer_id];
+
+// // Mark the layer as touched
+// layer.flags |= LayerUpdateFlags::TOUCHED;
+
+// // If a tree already exists, notify it of changes.
+// if let Some(tree) = &mut layer.parse_tree {
+// if layer
+// .flags
+// .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
+// {
+// for edit in edits.iter().rev() {
+// // Apply the edits in reverse.
+// // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
+// tree.edit(edit);
+// }
+// }
+
+// if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
+// // Re-parse the tree.
+// layer.parse(&mut ts_parser.parser, source)?;
+// }
+// } else {
+// // always parse if this layer has never been parsed before
+// layer.parse(&mut ts_parser.parser, source)?;
+// }
+
+// // Switch to an immutable borrow.
+// let layer = &self.layers[layer_id];
+
+// // Process injections.
+// let matches = cursor.matches(
+// &layer.config.injections_query,
+// layer.tree().root_node(),
+// RopeProvider(source_slice),
+// );
+// let mut combined_injections = vec![
+// (None, Vec::new(), IncludedChildren::default());
+// layer.config.combined_injections_patterns.len()
+// ];
+// let mut injections = Vec::new();
+// let mut last_injection_end = 0;
+// for mat in matches {
+// let (injection_capture, content_node, included_children) = layer
+// .config
+// .injection_for_match(&layer.config.injections_query, &mat, source_slice);
+
+// // in case this is a combined injection save it for more processing later
+// if let Some(combined_injection_idx) = layer
+// .config
+// .combined_injections_patterns
+// .iter()
+// .position(|&pattern| pattern == mat.pattern_index)
+// {
+// let entry = &mut combined_injections[combined_injection_idx];
+// if injection_capture.is_some() {
+// entry.0 = injection_capture;
+// }
+// if let Some(content_node) = content_node {
+// if content_node.start_byte() >= last_injection_end {
+// entry.1.push(content_node);
+// last_injection_end = content_node.end_byte();
+// }
+// }
+// entry.2 = included_children;
+// continue;
+// }
+
+// // Explicitly remove this match so that none of its other captures will remain
+// // in the stream of captures.
+// mat.remove();
+
+// // If a language is found with the given name, then add a new language layer
+// // to the highlighted document.
+// if let (Some(injection_capture), Some(content_node)) =
+// (injection_capture, content_node)
+// {
+// if let Some(config) = (injection_callback)(&injection_capture) {
+// let ranges =
+// intersect_ranges(&layer.ranges, &[content_node], included_children);
+
+// if !ranges.is_empty() {
+// if content_node.start_byte() < last_injection_end {
+// continue;
+// }
+// last_injection_end = content_node.end_byte();
+// injections.push((config, ranges));
+// }
+// }
+// }
+// }
+
+// for (lang_name, content_nodes, included_children) in combined_injections {
+// if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
+// if let Some(config) = (injection_callback)(&lang_name) {
+// let ranges =
+// intersect_ranges(&layer.ranges, &content_nodes, included_children);
+// if !ranges.is_empty() {
+// injections.push((config, ranges));
+// }
+// }
+// }
+// }
+
+// let depth = layer.depth + 1;
+// // TODO: can't inline this since matches borrows self.layers
+// for (config, ranges) in injections {
+// let parent = Some(layer_id);
+// let new_layer = LanguageLayer {
+// parse_tree: None,
+// config,
+// depth,
+// ranges,
+// flags: LayerUpdateFlags::empty(),
+// parent: None,
+// };
+
+// // Find an identical existing layer
+// let layer = layers_table
+// .get(layers_hasher.hash_one(&new_layer), |&it| {
+// self.layers[it] == new_layer
+// })
+// .copied();
+
+// // ...or insert a new one.
+// let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
+// self.layers[layer_id].parent = parent;
+
+// queue.push_back(layer_id);
+// }
+
+// // TODO: pre-process local scopes at this time, rather than highlight?
+// // would solve problems with locals not working across boundaries
+// }
+
+// // Return the cursor back in the pool.
+// ts_parser.cursors.push(cursor);
+
+// // Reset all `LayerUpdateFlags` and remove all untouched layers
+// self.layers.retain(|_, layer| {
+// replace(&mut layer.flags, LayerUpdateFlags::empty())
+// .contains(LayerUpdateFlags::TOUCHED)
+// });
+
+// Ok(())
+// })
+// }
+// }
+
+// /// Compute the ranges that should be included when parsing an injection.
+// /// This takes into account three things:
+// /// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
+// /// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
+// /// are the ranges of those nodes.
+// /// * `includes_children` - For some injections, the content nodes' children should be
+// /// excluded from the nested document, so that only the content nodes' *own* content
+// /// is reparsed. For other injections, the content nodes' entire ranges should be
+// /// reparsed, including the ranges of their children.
+// fn intersect_ranges(
+// parent_ranges: &[Range],
+// nodes: &[Node],
+// included_children: IncludedChildren,
+// ) -> Vec<Range> {
+// let mut cursor = nodes[0].walk();
+// let mut result = Vec::new();
+// let mut parent_range_iter = parent_ranges.iter();
+// let mut parent_range = parent_range_iter
+// .next()
+// .expect("Layers should only be constructed with non-empty ranges vectors");
+// for node in nodes.iter() {
+// let mut preceding_range = Range {
+// start_byte: 0,
+// start_point: Point::new(0, 0),
+// end_byte: node.start_byte(),
+// end_point: node.start_position(),
+// };
+// let following_range = Range {
+// start_byte: node.end_byte(),
+// start_point: node.end_position(),
+// end_byte: usize::MAX,
+// end_point: Point::new(usize::MAX, usize::MAX),
+// };
+
+// for excluded_range in node
+// .children(&mut cursor)
+// .filter_map(|child| match included_children {
+// IncludedChildren::None => Some(child.range()),
+// IncludedChildren::All => None,
+// IncludedChildren::Unnamed => {
+// if child.is_named() {
+// Some(child.range())
+// } else {
+// None
+// }
+// }
+// })
+// .chain([following_range].iter().cloned())
+// {
+// let mut range = Range {
+// start_byte: preceding_range.end_byte,
+// start_point: preceding_range.end_point,
+// end_byte: excluded_range.start_byte,
+// end_point: excluded_range.start_point,
+// };
+// preceding_range = excluded_range;
+
+// if range.end_byte < parent_range.start_byte {
+// continue;
+// }
+
+// while parent_range.start_byte <= range.end_byte {
+// if parent_range.end_byte > range.start_byte {
+// if range.start_byte < parent_range.start_byte {
+// range.start_byte = parent_range.start_byte;
+// range.start_point = parent_range.start_point;
+// }
+
+// if parent_range.end_byte < range.end_byte {
+// if range.start_byte < parent_range.end_byte {
+// result.push(Range {
+// start_byte: range.start_byte,
+// start_point: range.start_point,
+// end_byte: parent_range.end_byte,
+// end_point: parent_range.end_point,
+// });
+// }
+// range.start_byte = parent_range.end_byte;
+// range.start_point = parent_range.end_point;
+// } else {
+// if range.start_byte < range.end_byte {
+// result.push(range);
+// }
+// break;
+// }
+// }
+
+// if let Some(next_range) = parent_range_iter.next() {
+// parent_range = next_range;
+// } else {
+// return result;
+// }
+// }
+// }
+// }
+// result
+// }
+
+// impl LanguageLayer {
+// fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
+// parser
+// .set_included_ranges(&self.ranges)
+// .map_err(|_| Error::InvalidRanges)?;
+
+// parser
+// .set_language(&self.config.language)
+// .map_err(|_| Error::InvalidLanguage)?;
+
+// // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
+// let tree = parser
+// .parse_with(
+// &mut |byte, _| {
+// if byte <= source.len_bytes() {
+// let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
+// &chunk.as_bytes()[byte - start_byte..]
+// } else {
+// // out of range
+// &[]
+// }
+// },
+// self.parse_tree.as_ref(),
+// )
+// .ok_or(Error::Cancelled)?;
+// // unsafe { ts_parser.parser.set_cancellation_flag(None) };
+// self.parse_tree = Some(tree);
+// Ok(())
+// }
+// }
diff --git a/helix-syntax/src/query_iter.rs b/helix-syntax/src/query_iter.rs
new file mode 100644
index 00000000..e672fe77
--- /dev/null
+++ b/helix-syntax/src/query_iter.rs
@@ -0,0 +1,236 @@
+use core::slice;
+use std::iter::Peekable;
+use std::mem::replace;
+
+use hashbrown::HashMap;
+use ropey::RopeSlice;
+
+use crate::tree_sitter::{
+ Capture, InactiveQueryCursor, Query, QueryCursor, RopeTsInput, SyntaxTreeNode,
+};
+use crate::{Injection, LayerId, Range, Syntax};
+
+#[derive(Clone)]
+pub struct MatchedNode {
+ pub capture: Capture,
+ pub byte_range: Range,
+}
+
+struct LayerQueryIter<'a> {
+ cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
+ peeked: Option<MatchedNode>,
+}
+
+impl<'a> LayerQueryIter<'a> {
+ fn peek(&mut self) -> Option<&MatchedNode> {
+ if self.peeked.is_none() {
+ let (query_match, node_idx) = self.cursor.next_matched_node()?;
+ let matched_node = query_match.matched_node(node_idx);
+ self.peeked = Some(MatchedNode {
+ capture: matched_node.capture,
+ byte_range: matched_node.syntax_node.byte_range(),
+ });
+ }
+ self.peeked.as_ref()
+ }
+
+ fn consume(&mut self) -> MatchedNode {
+ self.peeked.take().unwrap()
+ }
+}
+
+struct ActiveLayer<'a, S> {
+ state: S,
+ query_iter: LayerQueryIter<'a>,
+ injections: Peekable<slice::Iter<'a, Injection>>,
+}
+
+// data only needed when entering and exiting injections
+// seperate struck to keep the QueryIter reasonably small
+struct QueryIterLayerManager<'a, S> {
+ query: &'a Query,
+ node: SyntaxTreeNode<'a>,
+ src: RopeSlice<'a>,
+ syntax: &'a Syntax,
+ active_layers: HashMap<LayerId, Box<ActiveLayer<'a, S>>>,
+ active_injections: Vec<Injection>,
+}
+
+impl<'a, S: Default> QueryIterLayerManager<'a, S> {
+ fn init_layer(&mut self, injection: Injection) -> Box<ActiveLayer<'a, S>> {
+ self.active_layers
+ .remove(&injection.layer)
+ .unwrap_or_else(|| {
+ let layer = &self.syntax.layers[injection.layer];
+ let injection_start = layer
+ .injections
+ .partition_point(|child| child.byte_range.start < injection.byte_range.start);
+ let cursor = InactiveQueryCursor::new().execute_query(
+ self.query,
+ &self.node,
+ RopeTsInput::new(self.src),
+ );
+ Box::new(ActiveLayer {
+ state: S::default(),
+ query_iter: LayerQueryIter {
+ cursor,
+ peeked: None,
+ },
+ injections: layer.injections[injection_start..].iter().peekable(),
+ })
+ })
+ }
+}
+
+pub struct QueryIter<'a, LayerState: Default = ()> {
+ layer_manager: Box<QueryIterLayerManager<'a, LayerState>>,
+ current_layer: Box<ActiveLayer<'a, LayerState>>,
+ current_injection: Injection,
+}
+
+impl<'a, LayerState: Default> QueryIter<'a, LayerState> {
+ pub fn new(syntax: &'a Syntax, src: RopeSlice<'a>, query: &'a Query) -> Self {
+ Self::at(syntax, src, query, syntax.tree().root_node(), syntax.root)
+ }
+
+ pub fn at(
+ syntax: &'a Syntax,
+ src: RopeSlice<'a>,
+ query: &'a Query,
+ node: SyntaxTreeNode<'a>,
+ layer: LayerId,
+ ) -> Self {
+ // create fake injection for query root
+ let injection = Injection {
+ byte_range: node.byte_range(),
+ layer,
+ };
+ let mut layer_manager = Box::new(QueryIterLayerManager {
+ query,
+ node,
+ src,
+ syntax,
+ // TODO: reuse allocations with an allocation pool
+ active_layers: HashMap::with_capacity(8),
+ active_injections: Vec::with_capacity(8),
+ });
+ Self {
+ current_layer: layer_manager.init_layer(injection),
+ current_injection: injection,
+ layer_manager,
+ }
+ }
+
+ pub fn current_layer_state(&mut self) -> &mut LayerState {
+ &mut self.current_layer.state
+ }
+
+ pub fn layer_state(&mut self, layer: LayerId) -> &mut LayerState {
+ if layer == self.current_injection.layer {
+ self.current_layer_state()
+ } else {
+ &mut self
+ .layer_manager
+ .active_layers
+ .get_mut(&layer)
+ .unwrap()
+ .state
+ }
+ }
+
+ fn enter_injection(&mut self, injection: Injection) {
+ let active_layer = self.layer_manager.init_layer(injection);
+ let old_injection = replace(&mut self.current_injection, injection);
+ let old_layer = replace(&mut self.current_layer, active_layer);
+ self.layer_manager
+ .active_layers
+ .insert(old_injection.layer, old_layer);
+ self.layer_manager.active_injections.push(old_injection);
+ }
+
+ fn exit_injection(&mut self) -> Option<(Injection, Option<LayerState>)> {
+ let injection = replace(
+ &mut self.current_injection,
+ self.layer_manager.active_injections.pop()?,
+ );
+ let layer = replace(
+ &mut self.current_layer,
+ self.layer_manager
+ .active_layers
+ .remove(&self.current_injection.layer)?,
+ );
+ let layer_unfinished = layer.query_iter.peeked.is_some();
+ if layer_unfinished {
+ self.layer_manager
+ .active_layers
+ .insert(injection.layer, layer)
+ .unwrap();
+ Some((injection, None))
+ } else {
+ Some((injection, Some(layer.state)))
+ }
+ }
+}
+
+impl<'a, S: Default> Iterator for QueryIter<'a, S> {
+ type Item = QueryIterEvent<S>;
+
+ fn next(&mut self) -> Option<QueryIterEvent<S>> {
+ loop {
+ let next_injection = self.current_layer.injections.peek().filter(|injection| {
+ injection.byte_range.start < self.current_injection.byte_range.end
+ });
+ let next_match = self.current_layer.query_iter.peek().filter(|matched_node| {
+ matched_node.byte_range.start < self.current_injection.byte_range.end
+ });
+
+ match (next_match, next_injection) {
+ (None, None) => {
+ return self.exit_injection().map(|(injection, state)| {
+ QueryIterEvent::ExitInjection { injection, state }
+ });
+ }
+ (Some(_), None) => {
+ // consume match
+ let matched_node = self.current_layer.query_iter.consume();
+ return Some(QueryIterEvent::Match(matched_node));
+ }
+ (Some(matched_node), Some(injection))
+ if matched_node.byte_range.start <= injection.byte_range.end =>
+ {
+ // consume match
+ let matched_node = self.current_layer.query_iter.consume();
+ // ignore nodes that are overlapped by the injection
+ if matched_node.byte_range.start <= injection.byte_range.start {
+ return Some(QueryIterEvent::Match(matched_node));
+ }
+ }
+ (Some(_), Some(_)) | (None, Some(_)) => {
+ // consume injection
+ let injection = self.current_layer.injections.next().unwrap();
+ self.enter_injection(injection.clone());
+ return Some(QueryIterEvent::EnterInjection(injection.clone()));
+ }
+ }
+ }
+ }
+}
+
+pub enum QueryIterEvent<State = ()> {
+ EnterInjection(Injection),
+ Match(MatchedNode),
+ ExitInjection {
+ injection: Injection,
+ state: Option<State>,
+ },
+}
+
+impl<S> QueryIterEvent<S> {
+ pub fn start(&self) -> u32 {
+ match self {
+ QueryIterEvent::EnterInjection(injection) => injection.byte_range.start as u32,
+ QueryIterEvent::Match(mat) => mat.byte_range.start as u32,
+ QueryIterEvent::ExitInjection { injection, .. } => injection.byte_range.start as u32,
+ }
+ }
+}
diff --git a/helix-syntax/src/ropey.rs b/helix-syntax/src/ropey.rs
index 650fcfb9..8b137891 100644
--- a/helix-syntax/src/ropey.rs
+++ b/helix-syntax/src/ropey.rs
@@ -1,29 +1 @@
-// glue code for using TS with ropey, this should be put behind a feature flag
-// in the future (and potentially be partially removed)
-use ropey::RopeSlice;
-use tree_sitter::{Node, TextProvider};
-
-// Adapter to convert rope chunks to bytes
-pub struct ChunksBytes<'a> {
- chunks: ropey::iter::Chunks<'a>,
-}
-impl<'a> Iterator for ChunksBytes<'a> {
- type Item = &'a [u8];
- fn next(&mut self) -> Option<Self::Item> {
- self.chunks.next().map(str::as_bytes)
- }
-}
-
-pub struct RopeProvider<'a>(pub RopeSlice<'a>);
-
-impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
- type I = ChunksBytes<'a>;
-
- fn text(&mut self, node: Node) -> Self::I {
- let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
- ChunksBytes {
- chunks: fragment.chunks(),
- }
- }
-}
diff --git a/helix-syntax/src/text_object.rs b/helix-syntax/src/text_object.rs
new file mode 100644
index 00000000..09cb8a10
--- /dev/null
+++ b/helix-syntax/src/text_object.rs
@@ -0,0 +1,93 @@
+// TODO: rework using query iter
+
+use std::iter;
+
+use ropey::RopeSlice;
+
+use crate::tree_sitter::{InactiveQueryCursor, Query, RopeTsInput, SyntaxTreeNode};
+use crate::TREE_SITTER_MATCH_LIMIT;
+
+#[derive(Debug)]
+pub enum CapturedNode<'a> {
+ Single(SyntaxTreeNode<'a>),
+ /// Guaranteed to be not empty
+ Grouped(Vec<SyntaxTreeNode<'a>>),
+}
+
+impl<'a> CapturedNode<'a> {
+ pub fn start_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.start_byte(),
+ Self::Grouped(ns) => ns[0].start_byte(),
+ }
+ }
+
+ pub fn end_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.end_byte(),
+ Self::Grouped(ns) => ns.last().unwrap().end_byte(),
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct TextObjectQuery {
+ pub query: Query,
+}
+
+impl TextObjectQuery {
+ /// Run the query on the given node and return sub nodes which match given
+ /// capture ("function.inside", "class.around", etc).
+ ///
+ /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
+ /// and support for this is partial and could use improvement.
+ ///
+ /// ```query
+ /// (comment)+ @capture
+ ///
+ /// ; OR
+ /// (
+ /// (comment)*
+ /// .
+ /// (function)
+ /// ) @capture
+ /// ```
+ pub fn capture_nodes<'a>(
+ &'a self,
+ capture_name: &str,
+ node: SyntaxTreeNode<'a>,
+ slice: RopeSlice<'a>,
+ cursor: InactiveQueryCursor,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ self.capture_nodes_any(&[capture_name], node, slice, cursor)
+ }
+
+ /// Find the first capture that exists out of all given `capture_names`
+ /// and return sub nodes that match this capture.
+ pub fn capture_nodes_any<'a>(
+ &'a self,
+ capture_names: &[&str],
+ node: SyntaxTreeNode<'a>,
+ slice: RopeSlice<'a>,
+ mut cursor: InactiveQueryCursor,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ let capture = capture_names
+ .iter()
+ .find_map(|cap| self.query.get_capture(cap))?;
+
+ cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+ let mut cursor = cursor.execute_query(&self.query, &node, RopeTsInput::new(slice));
+ let capture_node = iter::from_fn(move || {
+ let (mat, _) = cursor.next_matched_node()?;
+ Some(mat.nodes_for_capture(capture).cloned().collect())
+ })
+ .filter_map(move |nodes: Vec<_>| {
+ if nodes.len() > 1 {
+ Some(CapturedNode::Grouped(nodes))
+ } else {
+ nodes.into_iter().map(CapturedNode::Single).next()
+ }
+ });
+ Some(capture_node)
+ }
+}
diff --git a/helix-syntax/src/tree_sitter.rs b/helix-syntax/src/tree_sitter.rs
index bb188d12..78e3d4a0 100644
--- a/helix-syntax/src/tree_sitter.rs
+++ b/helix-syntax/src/tree_sitter.rs
@@ -1,6 +1,6 @@
mod grammar;
mod parser;
-mod query;
+pub mod query;
mod query_cursor;
mod query_match;
mod ropey;
@@ -11,21 +11,21 @@ use std::ops;
pub use grammar::Grammar;
pub use parser::{Parser, ParserInputRaw};
-pub use query::{Capture, ParserErrorLocation, Pattern, Query, QueryStr};
+pub use query::{Capture, Pattern, Query, QueryStr};
pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch};
pub use ropey::RopeTsInput;
pub use syntax_tree::{InputEdit, SyntaxTree};
pub use syntax_tree_node::SyntaxTreeNode;
#[repr(C)]
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Point {
pub row: u32,
- pub column: u32,
+ pub col: u32,
}
#[repr(C)]
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Range {
pub start_point: Point,
pub end_point: Point,
diff --git a/helix-syntax/src/tree_sitter/query.rs b/helix-syntax/src/tree_sitter/query.rs
index 3fb1fc18..69a39417 100644
--- a/helix-syntax/src/tree_sitter/query.rs
+++ b/helix-syntax/src/tree_sitter/query.rs
@@ -5,28 +5,64 @@ use std::ptr::NonNull;
use std::{slice, str};
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
-use crate::tree_sitter::query::property::QueryProperty;
use crate::tree_sitter::Grammar;
mod predicate;
mod property;
+pub enum UserPredicate<'a> {
+ IsPropertySet {
+ negate: bool,
+ key: &'a str,
+ val: Option<&'a str>,
+ },
+ SetProperty {
+ key: &'a str,
+ val: Option<&'a str>,
+ },
+ Other(Predicate<'a>),
+}
+
+impl Display for UserPredicate<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ UserPredicate::IsPropertySet { negate, key, val } => {
+ let predicate = if negate { "is-not?" } else { "is?" };
+ write!(f, " ({predicate} {key} {})", val.unwrap_or(""))
+ }
+ UserPredicate::SetProperty { key, val } => {
+ write!(f, "(set! {key} {})", val.unwrap_or(""))
+ }
+ UserPredicate::Other(ref predicate) => {
+ write!(f, "{}", predicate.name())
+ }
+ }
+ }
+}
+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Pattern(pub(crate) u32);
+impl Pattern {
+ pub const SENTINEL: Pattern = Pattern(u32::MAX);
+ pub fn idx(&self) -> usize {
+ self.0 as usize
+ }
+}
+
pub enum QueryData {}
+#[derive(Debug)]
pub(super) struct PatternData {
text_predicates: Range<u32>,
- properties: Range<u32>,
}
+#[derive(Debug)]
pub struct Query {
pub(crate) raw: NonNull<QueryData>,
num_captures: u32,
num_strings: u32,
text_predicates: Vec<TextPredicate>,
- properties: Vec<QueryProperty>,
patterns: Box<[PatternData]>,
}
@@ -41,7 +77,7 @@ impl Query {
grammar: Grammar,
source: &str,
path: impl AsRef<Path>,
- mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
+ mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>,
) -> Result<Self, ParseError> {
assert!(
source.len() <= i32::MAX as usize,
@@ -136,7 +172,6 @@ impl Query {
num_captures,
num_strings,
text_predicates: Vec::new(),
- properties: Vec::new(),
patterns: Box::default(),
};
let patterns: Result<_, ParseError> = (0..num_patterns)
@@ -190,15 +225,54 @@ impl Query {
}
}
- pub fn pattern_properies(&self, pattern_idx: Pattern) -> &[QueryProperty] {
- let range = self.patterns[pattern_idx.0 as usize].properties.clone();
- &self.properties[range.start as usize..range.end as usize]
+ #[inline]
+ pub fn captures(&self) -> impl ExactSizeIterator<Item = (Capture, &str)> {
+ (0..self.num_captures).map(|cap| (Capture(cap), self.capture_name(Capture(cap))))
+ }
+
+ #[inline]
+ pub fn num_captures(&self) -> u32 {
+ self.num_captures
+ }
+
+ #[inline]
+ pub fn get_capture(&self, capture_name: &str) -> Option<Capture> {
+ for capture in 0..self.num_captures {
+ if capture_name == self.capture_name(Capture(capture)) {
+ return Some(Capture(capture));
+ }
+ }
+ None
}
pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] {
let range = self.patterns[pattern_idx as usize].text_predicates.clone();
&self.text_predicates[range.start as usize..range.end as usize]
}
+
+ /// Get the byte offset where the given pattern starts in the query's
+ /// source.
+ #[doc(alias = "ts_query_start_byte_for_pattern")]
+ #[must_use]
+ pub fn start_byte_for_pattern(&self, pattern: Pattern) -> usize {
+ assert!(
+ pattern.0 < self.text_predicates.len() as u32,
+ "Pattern index is {pattern_index} but the pattern count is {}",
+ self.text_predicates.len(),
+ );
+ unsafe { ts_query_start_byte_for_pattern(self.raw, pattern.0) as usize }
+ }
+
+ /// Get the number of patterns in the query.
+ #[must_use]
+ pub fn pattern_count(&self) -> usize {
+ unsafe { ts_query_pattern_count(self.raw) as usize }
+ }
+ /// Get the number of patterns in the query.
+ #[must_use]
+ pub fn patterns(&self) -> impl ExactSizeIterator<Item = Pattern> {
+ (0..self.pattern_count() as u32).map(Pattern)
+ }
}
impl Drop for Query {
@@ -215,6 +289,9 @@ impl Capture {
pub fn name(self, query: &Query) -> &str {
query.capture_name(self)
}
+ pub fn idx(self) -> usize {
+ self.0 as usize
+ }
}
/// A reference to a string stroed in a query
diff --git a/helix-syntax/src/tree_sitter/query/predicate.rs b/helix-syntax/src/tree_sitter/query/predicate.rs
index 7a2f858e..91f3dabe 100644
--- a/helix-syntax/src/tree_sitter/query/predicate.rs
+++ b/helix-syntax/src/tree_sitter/query/predicate.rs
@@ -5,7 +5,9 @@ use std::ptr::NonNull;
use std::{fmt, slice};
use crate::tree_sitter::query::property::QueryProperty;
-use crate::tree_sitter::query::{Capture, Pattern, PatternData, Query, QueryData, QueryStr};
+use crate::tree_sitter::query::{
+ Capture, Pattern, PatternData, Query, QueryData, QueryStr, UserPredicate,
+};
use crate::tree_sitter::query_cursor::MatchedNode;
use crate::tree_sitter::TsInput;
@@ -34,6 +36,7 @@ pub(super) enum TextPredicateKind {
AnyString(Box<[QueryStr]>),
}
+#[derive(Debug)]
pub(crate) struct TextPredicate {
capture: Capture,
kind: TextPredicateKind,
@@ -161,10 +164,9 @@ impl Query {
pub(super) fn parse_pattern_predicates(
&mut self,
pattern: Pattern,
- mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
+ mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>,
) -> Result<PatternData, InvalidPredicateError> {
let text_predicate_start = self.text_predicates.len() as u32;
- let property_start = self.properties.len() as u32;
let predicate_steps = unsafe {
let mut len = 0u32;
@@ -203,7 +205,7 @@ impl Query {
"match?" | "not-match?" | "any-match?" | "any-not-match?" => {
predicate.check_arg_count(2)?;
let capture_idx = predicate.capture_arg(0)?;
- let regex = predicate.str_arg(1)?.get(self);
+ let regex = predicate.query_str_arg(1)?.get(self);
let negated = matches!(predicate.name(), "not-match?" | "any-not-match?");
let match_all = matches!(predicate.name(), "match?" | "not-match?");
@@ -219,14 +221,34 @@ impl Query {
});
}
- "set!" => self.properties.push(QueryProperty::parse(&predicate)?),
+ "set!" => {
+ let property = QueryProperty::parse(&predicate)?;
+ custom_predicate(
+ pattern,
+ UserPredicate::SetProperty {
+ key: property.key.get(&self),
+ val: property.val.map(|val| val.get(&self)),
+ },
+ )?
+ }
+ "is-not?" | "is?" => {
+ let property = QueryProperty::parse(&predicate)?;
+ custom_predicate(
+ pattern,
+ UserPredicate::IsPropertySet {
+ negate: predicate.name() == "is-not?",
+ key: property.key.get(&self),
+ val: property.val.map(|val| val.get(&self)),
+ },
+ )?
+ }
"any-of?" | "not-any-of?" => {
predicate.check_min_arg_count(1)?;
let capture = predicate.capture_arg(0)?;
let negated = predicate.name() == "not-any-of?";
let values: Result<_, InvalidPredicateError> = (1..predicate.num_args())
- .map(|i| predicate.str_arg(i))
+ .map(|i| predicate.query_str_arg(i))
.collect();
self.text_predicates.push(TextPredicate {
capture,
@@ -239,12 +261,11 @@ impl Query {
// is and is-not are better handeled as custom predicates since interpreting is context dependent
// "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
// "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
- _ => custom_predicate(pattern, predicate)?,
+ _ => custom_predicate(pattern, UserPredicate::Other(predicate))?,
}
}
Ok(PatternData {
text_predicates: text_predicate_start..self.text_predicates.len() as u32,
- properties: property_start..self.properties.len() as u32,
})
}
}
@@ -312,7 +333,7 @@ impl<'a> Predicate<'a> {
Ok(())
}
- pub fn str_arg(&self, i: usize) -> Result<QueryStr, InvalidPredicateError> {
+ pub fn query_str_arg(&self, i: usize) -> Result<QueryStr, InvalidPredicateError> {
match self.arg(i) {
PredicateArg::String(str) => Ok(str),
PredicateArg::Capture(capture) => bail!(
@@ -323,6 +344,10 @@ impl<'a> Predicate<'a> {
}
}
+ pub fn str_arg(&self, i: usize) -> Result<&str, InvalidPredicateError> {
+ Ok(self.query_str_arg(i)?.get(self.query))
+ }
+
pub fn num_args(&self) -> usize {
self.args.len()
}
@@ -352,6 +377,20 @@ pub struct InvalidPredicateError {
pub(super) msg: Box<str>,
}
+impl From<String> for InvalidPredicateError {
+ fn from(value: String) -> Self {
+ InvalidPredicateError {
+ msg: value.into_boxed_str(),
+ }
+ }
+}
+
+impl<'a> From<&'a str> for InvalidPredicateError {
+ fn from(value: &'a str) -> Self {
+ InvalidPredicateError { msg: value.into() }
+ }
+}
+
impl fmt::Display for InvalidPredicateError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.msg)
diff --git a/helix-syntax/src/tree_sitter/query/property.rs b/helix-syntax/src/tree_sitter/query/property.rs
index 037644b9..53f162c5 100644
--- a/helix-syntax/src/tree_sitter/query/property.rs
+++ b/helix-syntax/src/tree_sitter/query/property.rs
@@ -1,6 +1,7 @@
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate};
use crate::tree_sitter::query::QueryStr;
+#[derive(Debug)]
pub struct QueryProperty {
pub key: QueryStr,
pub val: Option<QueryStr>,
@@ -10,9 +11,9 @@ impl QueryProperty {
pub fn parse(predicate: &Predicate) -> Result<Self, InvalidPredicateError> {
predicate.check_min_arg_count(1)?;
predicate.check_max_arg_count(2)?;
- let key = predicate.str_arg(0)?;
+ let key = predicate.query_str_arg(0)?;
let val = (predicate.num_args() == 1)
- .then(|| predicate.str_arg(1))
+ .then(|| predicate.query_str_arg(1))
.transpose()?;
Ok(QueryProperty { key, val })
}
diff --git a/helix-syntax/src/tree_sitter/query_cursor.rs b/helix-syntax/src/tree_sitter/query_cursor.rs
index 368aeadf..83df365d 100644
--- a/helix-syntax/src/tree_sitter/query_cursor.rs
+++ b/helix-syntax/src/tree_sitter/query_cursor.rs
@@ -1,4 +1,5 @@
use core::slice;
+use std::cell::UnsafeCell;
use std::marker::PhantomData;
use std::mem::replace;
use std::ops::Range;
@@ -10,6 +11,15 @@ use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput};
enum QueryCursorData {}
+thread_local! {
+ static CURSOR_CACHE: UnsafeCell<Vec<InactiveQueryCursor>> = UnsafeCell::new(Vec::with_capacity(8));
+}
+
+/// SAFETY: must not call itself recuresively
+unsafe fn with_cache<T>(f: impl FnOnce(&mut Vec<InactiveQueryCursor>) -> T) -> T {
+ CURSOR_CACHE.with(|cache| f(&mut *cache.get()))
+}
+
pub struct QueryCursor<'a, 'tree, I: TsInput> {
query: &'a Query,
ptr: *mut QueryCursorData,
@@ -115,8 +125,8 @@ impl<I: TsInput> Drop for QueryCursor<'_, '_, I> {
fn drop(&mut self) {
// we allow moving the cursor data out so we need the null check here
// would be cleaner with a subtype but doesn't really matter at the end of the day
- if !self.ptr.is_null() {
- unsafe { ts_query_cursor_delete(self.ptr) }
+ if let Some(ptr) = NonNull::new(self.ptr) {
+ unsafe { with_cache(|cache| cache.push(InactiveQueryCursor { ptr })) }
}
}
}
@@ -128,8 +138,12 @@ pub struct InactiveQueryCursor {
impl InactiveQueryCursor {
pub fn new() -> Self {
- InactiveQueryCursor {
- ptr: unsafe { NonNull::new_unchecked(ts_query_cursor_new()) },
+ unsafe {
+ with_cache(|cache| {
+ cache.pop().unwrap_or_else(|| InactiveQueryCursor {
+ ptr: NonNull::new_unchecked(ts_query_cursor_new()),
+ })
+ })
}
}
@@ -208,6 +222,16 @@ impl<'tree> QueryMatch<'_, 'tree> {
self.matched_nodes.iter()
}
+ pub fn nodes_for_capture(
+ &self,
+ capture: Capture,
+ ) -> impl Iterator<Item = &SyntaxTreeNode<'tree>> {
+ self.matched_nodes
+ .iter()
+ .filter(move |mat| mat.capture == capture)
+ .map(|mat| &mat.syntax_node)
+ }
+
pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode {
&self.matched_nodes[i as usize]
}