Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--Cargo.lock1
-rw-r--r--helix-syntax/src/injections_tree.rs216
-rw-r--r--helix-syntax/src/lib.rs2
-rw-r--r--helix-syntax/src/tree_sitter.rs19
-rw-r--r--helix-syntax/src/tree_sitter/parser.rs44
-rw-r--r--helix-syntax/src/tree_sitter/query.rs56
-rw-r--r--helix-syntax/src/tree_sitter/query/predicate.rs137
-rw-r--r--helix-syntax/src/tree_sitter/query_captures.rs66
-rw-r--r--helix-syntax/src/tree_sitter/query_cursor.rs319
-rw-r--r--helix-syntax/src/tree_sitter/query_match.rs1
-rw-r--r--helix-syntax/src/tree_sitter/ropey.rs40
-rw-r--r--helix-syntax/src/tree_sitter/syntax_tree_node.rs3
12 files changed, 731 insertions, 173 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 610f7e65..397606e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1433,6 +1433,7 @@ dependencies = [
"ahash",
"arc-swap",
"bitflags 2.6.0",
+ "cc",
"hashbrown 0.14.5",
"helix-stdx",
"libloading",
diff --git a/helix-syntax/src/injections_tree.rs b/helix-syntax/src/injections_tree.rs
index 793039a3..2290a0e6 100644
--- a/helix-syntax/src/injections_tree.rs
+++ b/helix-syntax/src/injections_tree.rs
@@ -1,15 +1,21 @@
use core::slice;
+use std::cell::RefCell;
use std::iter::Peekable;
+use std::mem::replace;
use std::sync::Arc;
use hashbrown::HashMap;
+use ropey::RopeSlice;
use slotmap::{new_key_type, SlotMap};
use crate::parse::LayerUpdateFlags;
-use crate::tree_sitter::SyntaxTree;
-use crate::{HighlightConfiguration, RopeProvider};
+use crate::tree_sitter::{
+ self, Capture, InactiveQueryCursor, Parser, Query, QueryCursor, RopeTsInput, SyntaxTree,
+ SyntaxTreeNode,
+};
+use crate::HighlightConfiguration;
-// TODO(perf): replace std::ops::Range with helix_core::Range once added
+// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
type Range = std::ops::Range<usize>;
new_key_type! {
@@ -23,15 +29,16 @@ pub struct LanguageLayer {
pub(crate) parse_tree: Option<SyntaxTree>,
/// internal flags used during parsing to track incremental invalidation
pub(crate) flags: LayerUpdateFlags,
+ ranges: Vec<tree_sitter::Range>,
pub(crate) parent: Option<LayerId>,
- /// a list of **sorted** non-overlapping injection ranges note that
+ /// a list of **sorted** non-overlapping injection ranges. Note that
/// injection ranges are not relative to the start of this layer but the
/// start of the root layer
- pub(crate) injection_ranges: Box<[InjectionRange]>,
+ pub(crate) injections: Box<[Injection]>,
}
-#[derive(Debug)]
-pub(crate) struct InjectionRange {
+#[derive(Debug, Clone)]
+pub(crate) struct Injection {
pub byte_range: Range,
pub layer: LayerId,
}
@@ -39,11 +46,11 @@ pub(crate) struct InjectionRange {
impl LanguageLayer {
/// Returns the injection range **within this layers** that contains `idx`.
/// This function will not descend into nested injections
- pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&InjectionRange> {
+ pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
let i = self
- .injection_ranges
+ .injections
.partition_point(|range| range.byte_range.start <= idx);
- self.injection_ranges
+ self.injections
.get(i)
.filter(|injection| injection.byte_range.end > idx)
}
@@ -75,20 +82,187 @@ impl InjectionTree {
}
}
-struct ActiveInjection<'a> {
- injections: Peekable<slice::Iter<'a, InjectionTree>>,
- range: InjectionRange,
+#[derive(Clone)]
+pub struct MatchedNode {
+ pub capture: Capture,
+ pub byte_range: Range,
+}
+
+struct LayerQueryIter<'a> {
+ cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
+ peeked: Option<MatchedNode>,
+}
+
+impl<'a> LayerQueryIter<'a> {
+ fn peek(&mut self) -> Option<&MatchedNode> {
+ if self.peeked.is_none() {
+ let (query_match, node_idx) = self.cursor.next_matched_node()?;
+ let matched_node = query_match.matched_node(node_idx);
+ self.peeked = Some(MatchedNode {
+ capture: matched_node.capture,
+ byte_range: matched_node.syntax_node.byte_range(),
+ });
+ }
+ self.peeked.as_ref()
+ }
+
+ fn consume(&mut self) -> MatchedNode {
+ self.peeked.take().unwrap()
+ }
}
-struct ActiveLayer<'a, State> {
- state: State,
- /// the query captures just for this layer
- layer_captures: Peekable<LayerQueryCaptures<'a>>,
+struct ActiveLayer<'a> {
+ query_iter: LayerQueryIter<'a>,
+ injections: Peekable<slice::Iter<'a, Injection>>,
}
-type LayerQueryCaptures<'a> = tree_sitter::QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>;
+struct QueryBuilder<'a, 'tree> {
+ query: &'a Query,
+ node: &'a SyntaxTreeNode<'tree>,
+ src: RopeSlice<'a>,
+ injection_tree: &'a InjectionTree,
+}
+
+pub struct QueryIter<'a, 'tree> {
+ query_builder: Box<QueryBuilder<'a, 'tree>>,
+ active_layers: HashMap<LayerId, ActiveLayer<'a>>,
+ active_injections: Vec<Injection>,
+ current_injection: Injection,
+}
+
+impl<'a> QueryIter<'a, '_> {
+ fn enter_injection(&mut self, injection: Injection) -> bool {
+ self.active_layers
+ .entry(injection.layer)
+ .or_insert_with(|| {
+ let layer = &self.query_builder.injection_tree.layers[injection.layer];
+ let injection_start = layer
+ .injections
+ .partition_point(|child| child.byte_range.start < injection.byte_range.start);
+ let cursor = get_cursor().execute_query(
+ self.query_builder.query,
+ self.query_builder.node,
+ RopeTsInput::new(self.query_builder.src),
+ );
+ ActiveLayer {
+ query_iter: LayerQueryIter {
+ cursor,
+ peeked: None,
+ },
+ injections: layer.injections[injection_start..].iter().peekable(),
+ }
+ });
+ let old_injection = replace(&mut self.current_injection, injection);
+ self.active_injections.push(old_injection);
+ true
+ }
+
+ fn exit_injection(&mut self) -> Option<Injection> {
+ let injection = replace(&mut self.current_injection, self.active_injections.pop()?);
+ let finished_layer = self.active_layers[&injection.layer]
+ .query_iter
+ .peeked
+ .is_none();
+ if finished_layer {
+ let layer = self.active_layers.remove(&injection.layer).unwrap();
+ reuse_cursor(layer.query_iter.cursor.reuse());
+ }
+ Some(injection)
+ }
+}
+
+pub enum QueryIterEvent {
+ EnterInjection(Injection),
+ Match(MatchedNode),
+ ExitInjection(Injection),
+}
+
+impl<'a> Iterator for QueryIter<'a, '_> {
+ type Item = QueryIterEvent;
+
+ fn next(&mut self) -> Option<QueryIterEvent> {
+ loop {
+ let active_layer = self
+ .active_layers
+ .get_mut(&self.current_injection.layer)
+ .unwrap();
+ let next_injection = active_layer.injections.peek().filter(|injection| {
+ injection.byte_range.start < self.current_injection.byte_range.end
+ });
+ let next_match = active_layer.query_iter.peek().filter(|matched_node| {
+ matched_node.byte_range.start < self.current_injection.byte_range.end
+ });
+
+ match (next_match, next_injection) {
+ (None, None) => {
+ return self.exit_injection().map(QueryIterEvent::ExitInjection);
+ }
+ (Some(_), None) => {
+ // consume match
+ let matched_node = active_layer.query_iter.consume();
+ return Some(QueryIterEvent::Match(matched_node));
+ }
+ (Some(matched_node), Some(injection))
+ if matched_node.byte_range.start <= injection.byte_range.end =>
+ {
+ // consume match
+ let matched_node = active_layer.query_iter.consume();
+ // ignore nodes that are overlapped by the injection
+ if matched_node.byte_range.start <= injection.byte_range.start {
+ return Some(QueryIterEvent::Match(matched_node));
+ }
+ }
+ (Some(_), Some(_)) | (None, Some(_)) => {
+ // consume injection
+ let injection = active_layer.injections.next().unwrap();
+ if self.enter_injection(injection.clone()) {
+ return Some(QueryIterEvent::EnterInjection(injection.clone()));
+ }
+ }
+ }
+ }
+ }
+}
+
+struct TsParser {
+ parser: crate::tree_sitter::Parser,
+ pub cursors: Vec<crate::tree_sitter::InactiveQueryCursor>,
+}
+
+// could also just use a pool, or a single instance?
+thread_local! {
+ static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
+ parser: Parser::new(),
+ cursors: Vec::new(),
+ })
+}
+
+pub fn with_cursor<T>(f: impl FnOnce(&mut InactiveQueryCursor) -> T) -> T {
+ PARSER.with(|parser| {
+ let mut parser = parser.borrow_mut();
+ let mut cursor = parser
+ .cursors
+ .pop()
+ .unwrap_or_else(InactiveQueryCursor::new);
+ let res = f(&mut cursor);
+ parser.cursors.push(cursor);
+ res
+ })
+}
+
+pub fn get_cursor() -> InactiveQueryCursor {
+ PARSER.with(|parser| {
+ let mut parser = parser.borrow_mut();
+ parser
+ .cursors
+ .pop()
+ .unwrap_or_else(InactiveQueryCursor::new)
+ })
+}
-pub struct QueryCaptures<'a> {
- active_layers: HashMap<LayerId, ActiveLayer<'a, ()>>,
- active_injections: Vec<ActiveInjection<'a>>,
+pub fn reuse_cursor(cursor: InactiveQueryCursor) {
+ PARSER.with(|parser| {
+ let mut parser = parser.borrow_mut();
+ parser.cursors.push(cursor)
+ })
}
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
index 074f8727..b7331a3a 100644
--- a/helix-syntax/src/lib.rs
+++ b/helix-syntax/src/lib.rs
@@ -337,7 +337,7 @@ thread_local! {
pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
- let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+ let mut cursor = parser.cursors.pop().unwrap_or_default();
let res = f(&mut cursor);
parser.cursors.push(cursor);
res
diff --git a/helix-syntax/src/tree_sitter.rs b/helix-syntax/src/tree_sitter.rs
index c2aa6fad..bb188d12 100644
--- a/helix-syntax/src/tree_sitter.rs
+++ b/helix-syntax/src/tree_sitter.rs
@@ -1,13 +1,19 @@
mod grammar;
mod parser;
mod query;
-mod query_captures;
+mod query_cursor;
+mod query_match;
mod ropey;
mod syntax_tree;
mod syntax_tree_node;
+use std::ops;
+
pub use grammar::Grammar;
pub use parser::{Parser, ParserInputRaw};
+pub use query::{Capture, ParserErrorLocation, Pattern, Query, QueryStr};
+pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch};
+pub use ropey::RopeTsInput;
pub use syntax_tree::{InputEdit, SyntaxTree};
pub use syntax_tree_node::SyntaxTreeNode;
@@ -26,3 +32,14 @@ pub struct Range {
pub start_byte: u32,
pub end_byte: u32,
}
+
+pub trait TsInput {
+ type Cursor: regex_cursor::Cursor;
+ fn cursor_at(&mut self, offset: usize) -> &mut Self::Cursor;
+ fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool;
+}
+
+pub trait IntoTsInput {
+ type TsInput: TsInput;
+ fn into_ts_input(self) -> Self::TsInput;
+}
diff --git a/helix-syntax/src/tree_sitter/parser.rs b/helix-syntax/src/tree_sitter/parser.rs
index 61163739..bcd5fa79 100644
--- a/helix-syntax/src/tree_sitter/parser.rs
+++ b/helix-syntax/src/tree_sitter/parser.rs
@@ -1,10 +1,12 @@
use std::os::raw::c_void;
-use std::panic::catch_unwind;
+use std::panic::{catch_unwind, AssertUnwindSafe};
use std::ptr::NonNull;
use std::{fmt, ptr};
+use regex_cursor::Cursor;
+
use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
-use crate::tree_sitter::{Grammar, Point, Range};
+use crate::tree_sitter::{Grammar, IntoTsInput, Point, Range, TsInput};
// opaque data
enum ParserData {}
@@ -51,25 +53,28 @@ impl Parser {
}
#[must_use]
- pub fn parse<I: ParserInput>(
+ pub fn parse<I: TsInput>(
&mut self,
- input: impl IntoParserInput<ParserInput = I>,
+ input: impl IntoTsInput<TsInput = I>,
old_tree: Option<&SyntaxTree>,
) -> Option<SyntaxTree> {
- let mut input = input.into_parser_input();
- unsafe extern "C" fn read<C: ParserInput>(
+ let mut input = input.into_ts_input();
+ unsafe extern "C" fn read<C: TsInput>(
payload: NonNull<c_void>,
byte_index: u32,
_position: Point,
- bytes_read: &mut u32,
+ bytes_read: *mut u32,
) -> *const u8 {
- match catch_unwind(|| {
- let cursor: &mut C = payload.cast().as_mut();
- cursor.read(byte_index as usize)
- }) {
- Ok(slice) => {
- *bytes_read = slice.len() as u32;
- slice.as_ptr()
+ let cursor = catch_unwind(AssertUnwindSafe(move || {
+ let input: &mut C = payload.cast().as_mut();
+ let cursor = input.cursor_at(byte_index as usize);
+ let slice = cursor.chunk();
+ (slice.as_ptr(), slice.len().try_into().unwrap())
+ }));
+ match cursor {
+ Ok((ptr, len)) => {
+ *bytes_read = len;
+ ptr
}
Err(_) => {
*bytes_read = 0;
@@ -121,7 +126,7 @@ type TreeSitterReadFn = unsafe extern "C" fn(
payload: NonNull<c_void>,
byte_index: u32,
position: Point,
- bytes_read: &mut u32,
+ bytes_read: *mut u32,
) -> *const u8;
#[repr(C)]
@@ -132,15 +137,6 @@ pub struct ParserInputRaw {
pub encoding: u32,
}
-pub trait ParserInput {
- fn read(&mut self, offset: usize) -> &[u8];
-}
-
-pub trait IntoParserInput {
- type ParserInput;
- fn into_parser_input(self) -> Self::ParserInput;
-}
-
extern "C" {
/// Create a new parser
fn ts_parser_new() -> NonNull<ParserData>;
diff --git a/helix-syntax/src/tree_sitter/query.rs b/helix-syntax/src/tree_sitter/query.rs
index 514b8990..3fb1fc18 100644
--- a/helix-syntax/src/tree_sitter/query.rs
+++ b/helix-syntax/src/tree_sitter/query.rs
@@ -4,8 +4,6 @@ use std::path::{Path, PathBuf};
use std::ptr::NonNull;
use std::{slice, str};
-use regex_cursor::Cursor;
-
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
use crate::tree_sitter::query::property::QueryProperty;
use crate::tree_sitter::Grammar;
@@ -13,20 +11,23 @@ use crate::tree_sitter::Grammar;
mod predicate;
mod property;
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Pattern(pub(crate) u32);
+
pub enum QueryData {}
-pub(super) struct Pattern {
+pub(super) struct PatternData {
text_predicates: Range<u32>,
properties: Range<u32>,
}
pub struct Query {
- raw: NonNull<QueryData>,
+ pub(crate) raw: NonNull<QueryData>,
num_captures: u32,
num_strings: u32,
text_predicates: Vec<TextPredicate>,
properties: Vec<QueryProperty>,
- patterns: Box<[Pattern]>,
+ patterns: Box<[PatternData]>,
}
impl Query {
@@ -40,7 +41,7 @@ impl Query {
grammar: Grammar,
source: &str,
path: impl AsRef<Path>,
- mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
+ mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
) -> Result<Self, ParseError> {
assert!(
source.len() <= i32::MAX as usize,
@@ -121,7 +122,7 @@ impl Query {
}
RawQueryError::Language => unreachable!("should be handled at grammar load"),
};
- return Err(err)
+ return Err(err);
};
// I am not going to bother with safety comments here, all of these are
@@ -141,7 +142,7 @@ impl Query {
let patterns: Result<_, ParseError> = (0..num_patterns)
.map(|pattern| {
query
- .parse_pattern_predicates(pattern, &mut custom_predicate)
+ .parse_pattern_predicates(Pattern(pattern), &mut custom_predicate)
.map_err(|err| ParseError::InvalidPredicate {
message: err.msg.into(),
location: ParserErrorLocation::new(
@@ -157,35 +158,6 @@ impl Query {
Ok(query)
}
- pub fn satsifies_text_predicate<C: Cursor>(
- &self,
- cursor: &mut regex_cursor::Input<C>,
- pattern: u32,
- ) {
- let text_predicates = self.patterns[pattern as usize].text_predicates;
- let text_predicates =
- &self.text_predicates[text_predicates.start as usize..text_predicates.end as usize];
- for predicate in text_predicates {
- match predicate.kind {
- predicate::TextPredicateKind::EqString(_) => todo!(),
- predicate::TextPredicateKind::EqCapture(_) => todo!(),
- predicate::TextPredicateKind::MatchString(_) => todo!(),
- predicate::TextPredicateKind::AnyString(_) => todo!(),
- }
- }
- }
-
- // fn parse_predicates(&mut self) {
- // let pattern_count = unsafe { ts_query_pattern_count(self.raw) };
-
- // let mut text_predicates = Vec::with_capacity(pattern_count as usize);
- // let mut property_predicates = Vec::with_capacity(pattern_count as usize);
- // let mut property_settings = Vec::with_capacity(pattern_count as usize);
- // let mut general_predicates = Vec::with_capacity(pattern_count as usize);
-
- // for i in 0..pattern_count {}
- // }
-
#[inline]
fn get_string(&self, str: QueryStr) -> &str {
let value_id = str.0;
@@ -218,10 +190,15 @@ impl Query {
}
}
- pub fn pattern_properies(&self, pattern_idx: u32) -> &[QueryProperty] {
- let range = self.patterns[pattern_idx as usize].properties.clone();
+ pub fn pattern_properies(&self, pattern_idx: Pattern) -> &[QueryProperty] {
+ let range = self.patterns[pattern_idx.0 as usize].properties.clone();
&self.properties[range.start as usize..range.end as usize]
}
+
+ pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] {
+ let range = self.patterns[pattern_idx as usize].text_predicates.clone();
+ &self.text_predicates[range.start as usize..range.end as usize]
+ }
}
impl Drop for Query {
@@ -231,6 +208,7 @@ impl Drop for Query {
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(transparent)]
pub struct Capture(u32);
impl Capture {
diff --git a/helix-syntax/src/tree_sitter/query/predicate.rs b/helix-syntax/src/tree_sitter/query/predicate.rs
index 8fac6cf7..7a2f858e 100644
--- a/helix-syntax/src/tree_sitter/query/predicate.rs
+++ b/helix-syntax/src/tree_sitter/query/predicate.rs
@@ -1,11 +1,16 @@
use std::error::Error;
+use std::iter::zip;
+use std::ops::Range;
use std::ptr::NonNull;
use std::{fmt, slice};
use crate::tree_sitter::query::property::QueryProperty;
-use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData, QueryStr};
+use crate::tree_sitter::query::{Capture, Pattern, PatternData, Query, QueryData, QueryStr};
+use crate::tree_sitter::query_cursor::MatchedNode;
+use crate::tree_sitter::TsInput;
use regex_cursor::engines::meta::Regex;
+use regex_cursor::Cursor;
macro_rules! bail {
($($args:tt)*) => {{
@@ -29,25 +34,141 @@ pub(super) enum TextPredicateKind {
AnyString(Box<[QueryStr]>),
}
-pub(super) struct TextPredicate {
+pub(crate) struct TextPredicate {
capture: Capture,
kind: TextPredicateKind,
negated: bool,
match_all: bool,
}
+fn input_matches_str<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
+ if str.len() != range.len() {
+ return false;
+ }
+ let mut str = str.as_bytes();
+ let cursor = input.cursor_at(range.start);
+ let start_in_chunk = range.start - cursor.offset();
+ if range.end - cursor.offset() <= cursor.chunk().len() {
+ // hotpath
+ return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
+ }
+ if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
+ return false;
+ }
+ str = &str[..cursor.chunk().len() - start_in_chunk];
+ while cursor.advance() {
+ if str.len() <= cursor.chunk().len() {
+ return &cursor.chunk()[..range.end - cursor.offset()] == str;
+ }
+ if &str[..cursor.chunk().len()] != cursor.chunk() {
+ return false;
+ }
+ str = &str[cursor.chunk().len()..]
+ }
+ // buggy cursor/invalid range
+ false
+}
+
+fn inputs_match<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
+ if str.len() != range.len() {
+ return false;
+ }
+ let mut str = str.as_bytes();
+ let cursor = input.cursor_at(range.start);
+ let start_in_chunk = range.start - cursor.offset();
+ if range.end - cursor.offset() <= cursor.chunk().len() {
+ // hotpath
+ return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
+ }
+ if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
+ return false;
+ }
+ str = &str[..cursor.chunk().len() - start_in_chunk];
+ while cursor.advance() {
+ if str.len() <= cursor.chunk().len() {
+ return &cursor.chunk()[..range.end - cursor.offset()] == str;
+ }
+ if &str[..cursor.chunk().len()] != cursor.chunk() {
+ return false;
+ }
+ str = &str[cursor.chunk().len()..]
+ }
+ // buggy cursor/invalid range
+ false
+}
+
+impl TextPredicate {
+ /// handlers match_all and negated
+ fn satisfied_helper(&self, mut nodes: impl Iterator<Item = bool>) -> bool {
+ if self.match_all {
+ nodes.all(|matched| matched != self.negated)
+ } else {
+ nodes.any(|matched| matched != self.negated)
+ }
+ }
+
+ pub fn satsified<I: TsInput>(
+ &self,
+ input: &mut I,
+ matched_nodes: &[MatchedNode],
+ query: &Query,
+ ) -> bool {
+ let mut capture_nodes = matched_nodes
+ .iter()
+ .filter(|matched_node| matched_node.capture == self.capture);
+ match self.kind {
+ TextPredicateKind::EqString(str) => self.satisfied_helper(capture_nodes.map(|node| {
+ let range = node.syntax_node.byte_range();
+ input_matches_str(query.get_string(str), range.clone(), input)
+ })),
+ TextPredicateKind::EqCapture(other_capture) => {
+ let mut other_nodes = matched_nodes
+ .iter()
+ .filter(|matched_node| matched_node.capture == other_capture);
+
+ let res = self.satisfied_helper(zip(&mut capture_nodes, &mut other_nodes).map(
+ |(node1, node2)| {
+ let range1 = node1.syntax_node.byte_range();
+ let range2 = node2.syntax_node.byte_range();
+ input.eq(range1, range2)
+ },
+ ));
+ let consumed_all = capture_nodes.next().is_none() && other_nodes.next().is_none();
+ res && (!self.match_all || consumed_all)
+ }
+ TextPredicateKind::MatchString(ref regex) => {
+ self.satisfied_helper(capture_nodes.map(|node| {
+ let range = node.syntax_node.byte_range();
+ let input = regex_cursor::Input::new(input.cursor_at(range.start)).range(range);
+ regex.is_match(input)
+ }))
+ }
+ TextPredicateKind::AnyString(ref strings) => {
+ let strings = strings.iter().map(|&str| query.get_string(str));
+ self.satisfied_helper(capture_nodes.map(|node| {
+ let range = node.syntax_node.byte_range();
+ strings
+ .clone()
+ .filter(|str| str.len() == range.len())
+ .any(|str| input_matches_str(str, range.clone(), input))
+ }))
+ }
+ }
+ }
+}
+
impl Query {
pub(super) fn parse_pattern_predicates(
&mut self,
- pattern_index: u32,
- mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
- ) -> Result<Pattern, InvalidPredicateError> {
+ pattern: Pattern,
+ mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
+ ) -> Result<PatternData, InvalidPredicateError> {
let text_predicate_start = self.text_predicates.len() as u32;
let property_start = self.properties.len() as u32;
let predicate_steps = unsafe {
let mut len = 0u32;
- let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern_index, &mut len);
+ let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern.0, &mut len);
(len != 0)
.then(|| slice::from_raw_parts(raw_predicates, len as usize))
.unwrap_or_default()
@@ -118,10 +239,10 @@ impl Query {
// is and is-not are better handeled as custom predicates since interpreting is context dependent
// "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
// "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
- _ => custom_predicate(predicate)?,
+ _ => custom_predicate(pattern, predicate)?,
}
}
- Ok(Pattern {
+ Ok(PatternData {
text_predicates: text_predicate_start..self.text_predicates.len() as u32,
properties: property_start..self.properties.len() as u32,
})
diff --git a/helix-syntax/src/tree_sitter/query_captures.rs b/helix-syntax/src/tree_sitter/query_captures.rs
deleted file mode 100644
index 41a2c093..00000000
--- a/helix-syntax/src/tree_sitter/query_captures.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-use std::ptr::{self, NonNull};
-
-use regex_cursor::Cursor;
-
-use crate::tree_sitter::query::Query;
-use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
-
-enum QueryCursorData {}
-
-pub struct QueryCaptures<'a> {
- query: &'a Query,
- query_cursor: &'a mut QueryCursorData,
- text_cursor: regex_cursor::RopeyCursor<'a>,
-}
-
-impl<C: Cursor> QueryCaptures<'_, C> {
- fn next(&mut self) {
- let mut query_match = TSQueryMatch {
- id: 0,
- pattern_index: 0,
- capture_count: 0,
- captures: ptr::null(),
- };
- let mut capture_idx = 0;
- loop {
- let success = unsafe {
- ts_query_cursor_next_capture(
- &mut self.query_cursor,
- &mut query_match,
- &mut capture_idx,
- )
- };
- if !success {
- break;
- }
- }
- let mut input = regex_cursor::Input::new(self.text_cursor.clone());
- }
-}
-
-#[repr(C)]
-#[derive(Debug)]
-struct TSQueryCapture {
- node: SyntaxTreeNodeRaw,
- index: u32,
-}
-
-#[repr(C)]
-#[derive(Debug)]
-struct TSQueryMatch {
- id: u32,
- pattern_index: u16,
- capture_count: u16,
- captures: *const TSQueryCapture,
-}
-
-extern "C" {
- /// Advance to the next capture of the currently running query.
- /// If there is a capture, write its match to `*match` and its index within
- /// the matche's capture list to `*capture_index`. Otherwise, return `false`.
- fn ts_query_cursor_next_capture(
- self_: &mut QueryCursorData,
- match_: &mut TSQueryMatch,
- capture_index: &mut u32,
- ) -> bool;
-}
diff --git a/helix-syntax/src/tree_sitter/query_cursor.rs b/helix-syntax/src/tree_sitter/query_cursor.rs
new file mode 100644
index 00000000..368aeadf
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/query_cursor.rs
@@ -0,0 +1,319 @@
+use core::slice;
+use std::marker::PhantomData;
+use std::mem::replace;
+use std::ops::Range;
+use std::ptr::{self, NonNull};
+
+use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData};
+use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
+use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput};
+
+enum QueryCursorData {}
+
+pub struct QueryCursor<'a, 'tree, I: TsInput> {
+ query: &'a Query,
+ ptr: *mut QueryCursorData,
+ tree: PhantomData<&'tree SyntaxTree>,
+ input: I,
+}
+
+impl<'tree, I: TsInput> QueryCursor<'_, 'tree, I> {
+ pub fn next_match(&mut self) -> Option<QueryMatch<'_, 'tree>> {
+ let mut query_match = TSQueryMatch {
+ id: 0,
+ pattern_index: 0,
+ capture_count: 0,
+ captures: ptr::null(),
+ };
+ loop {
+ let success = unsafe { ts_query_cursor_next_match(self.ptr, &mut query_match) };
+ if !success {
+ return None;
+ }
+ let matched_nodes = unsafe {
+ slice::from_raw_parts(
+ query_match.captures.cast(),
+ query_match.capture_count as usize,
+ )
+ };
+ let satisfies_predicates = self
+ .query
+ .pattern_text_predicates(query_match.pattern_index)
+ .iter()
+ .all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
+ if satisfies_predicates {
+ let res = QueryMatch {
+ id: query_match.id,
+ pattern: Pattern(query_match.pattern_index as u32),
+ matched_nodes,
+ query_cursor: unsafe { &mut *self.ptr },
+ _tree: PhantomData,
+ };
+ return Some(res);
+ }
+ }
+ }
+
+ pub fn next_matched_node(&mut self) -> Option<(QueryMatch<'_, 'tree>, MatchedNodeIdx)> {
+ let mut query_match = TSQueryMatch {
+ id: 0,
+ pattern_index: 0,
+ capture_count: 0,
+ captures: ptr::null(),
+ };
+ let mut capture_idx = 0;
+ loop {
+ let success = unsafe {
+ ts_query_cursor_next_capture(self.ptr, &mut query_match, &mut capture_idx)
+ };
+ if !success {
+ return None;
+ }
+ let matched_nodes = unsafe {
+ slice::from_raw_parts(
+ query_match.captures.cast(),
+ query_match.capture_count as usize,
+ )
+ };
+ let satisfies_predicates = self
+ .query
+ .pattern_text_predicates(query_match.pattern_index)
+ .iter()
+ .all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
+ if satisfies_predicates {
+ let res = QueryMatch {
+ id: query_match.id,
+ pattern: Pattern(query_match.pattern_index as u32),
+ matched_nodes,
+ query_cursor: unsafe { &mut *self.ptr },
+ _tree: PhantomData,
+ };
+ return Some((res, capture_idx));
+ } else {
+ unsafe {
+ ts_query_cursor_remove_match(self.ptr, query_match.id);
+ }
+ }
+ }
+ }
+
+ pub fn set_byte_range(&mut self, range: Range<usize>) {
+ unsafe {
+ ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32);
+ }
+ }
+
+ pub fn reuse(mut self) -> InactiveQueryCursor {
+ let ptr = replace(&mut self.ptr, ptr::null_mut());
+ InactiveQueryCursor {
+ ptr: unsafe { NonNull::new_unchecked(ptr) },
+ }
+ }
+}
+
+impl<I: TsInput> Drop for QueryCursor<'_, '_, I> {
+ fn drop(&mut self) {
+ // we allow moving the cursor data out so we need the null check here
+ // would be cleaner with a subtype but doesn't really matter at the end of the day
+ if !self.ptr.is_null() {
+ unsafe { ts_query_cursor_delete(self.ptr) }
+ }
+ }
+}
+
+/// A query cursor that is not actively associated with a query
+pub struct InactiveQueryCursor {
+ ptr: NonNull<QueryCursorData>,
+}
+
+impl InactiveQueryCursor {
+ pub fn new() -> Self {
+ InactiveQueryCursor {
+ ptr: unsafe { NonNull::new_unchecked(ts_query_cursor_new()) },
+ }
+ }
+
+ /// Return the maximum number of in-progress matches for this cursor.
+ #[doc(alias = "ts_query_cursor_match_limit")]
+ #[must_use]
+ pub fn match_limit(&self) -> u32 {
+ unsafe { ts_query_cursor_match_limit(self.ptr.as_ptr()) }
+ }
+
+ /// Set the maximum number of in-progress matches for this cursor. The
+ /// limit must be > 0 and <= 65536.
+ #[doc(alias = "ts_query_cursor_set_match_limit")]
+ pub fn set_match_limit(&mut self, limit: u32) {
+ unsafe {
+ ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit);
+ }
+ }
+
+ /// Check if, on its last execution, this cursor exceeded its maximum number
+ /// of in-progress matches.
+ #[doc(alias = "ts_query_cursor_did_exceed_match_limit")]
+ #[must_use]
+ pub fn did_exceed_match_limit(&self) -> bool {
+ unsafe { ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) }
+ }
+
+ pub fn set_byte_range(&mut self, range: Range<usize>) {
+ unsafe {
+ ts_query_cursor_set_byte_range(self.ptr.as_ptr(), range.start as u32, range.end as u32);
+ }
+ }
+
+ pub fn execute_query<'a, 'tree, I: TsInput>(
+ self,
+ query: &'a Query,
+ node: &SyntaxTreeNode<'tree>,
+ input: I,
+ ) -> QueryCursor<'a, 'tree, I> {
+ let ptr = self.ptr.as_ptr();
+ unsafe { ts_query_cursor_exec(self.ptr.as_ptr(), query.raw.as_ref(), node.as_raw()) };
+ QueryCursor {
+ query,
+ ptr,
+ tree: PhantomData,
+ input,
+ }
+ }
+}
+
+impl Drop for InactiveQueryCursor {
+ fn drop(&mut self) {
+ unsafe { ts_query_cursor_delete(self.ptr.as_ptr()) }
+ }
+}
+
+pub type MatchedNodeIdx = u32;
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct MatchedNode<'tree> {
+ pub syntax_node: SyntaxTreeNode<'tree>,
+ pub capture: Capture,
+}
+
+pub struct QueryMatch<'cursor, 'tree> {
+ id: u32,
+ pattern: Pattern,
+ matched_nodes: &'cursor [MatchedNode<'tree>],
+ query_cursor: &'cursor mut QueryCursorData,
+ _tree: PhantomData<&'tree super::SyntaxTree>,
+}
+
+impl<'tree> QueryMatch<'_, 'tree> {
+ pub fn matched_nodes(&self) -> impl Iterator<Item = &MatchedNode<'tree>> {
+ self.matched_nodes.iter()
+ }
+
+ pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode {
+ &self.matched_nodes[i as usize]
+ }
+
+ #[must_use]
+ pub const fn id(&self) -> u32 {
+ self.id
+ }
+
+ #[must_use]
+ pub const fn pattern(&self) -> Pattern {
+ self.pattern
+ }
+
+ #[doc(alias = "ts_query_cursor_remove_match")]
+ /// removes this match from the cursor so that further captures
+ /// from its cursor so that future captures that belong to this match
+ /// are no longer returned by capture iterators
+ pub fn remove(self) {
+ unsafe {
+ ts_query_cursor_remove_match(self.query_cursor, self.id);
+ }
+ }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct TSQueryCapture {
+ node: SyntaxTreeNodeRaw,
+ index: u32,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+struct TSQueryMatch {
+ id: u32,
+ pattern_index: u16,
+ capture_count: u16,
+ captures: *const TSQueryCapture,
+}
+
+extern "C" {
+ /// Advance to the next capture of the currently running query.
+ /// If there is a capture, write its match to `*match` and its index within
+ /// the matche's capture list to `*capture_index`. Otherwise, return `false`.
+ fn ts_query_cursor_next_capture(
+ self_: *mut QueryCursorData,
+ match_: &mut TSQueryMatch,
+ capture_index: &mut u32,
+ ) -> bool;
+
+ /// Advance to the next match of the currently running query.
+ ///
+ /// If there is a match, write it to `*match` and return `true`.
+ /// Otherwise, return `false`.
+ pub fn ts_query_cursor_next_match(
+ self_: *mut QueryCursorData,
+ match_: &mut TSQueryMatch,
+ ) -> bool;
+ fn ts_query_cursor_remove_match(self_: *mut QueryCursorData, match_id: u32);
+ /// Delete a query cursor, freeing all of the memory that it used
+ pub fn ts_query_cursor_delete(self_: *mut QueryCursorData);
+ /// Create a new cursor for executing a given query.
+ /// The cursor stores the state that is needed to iteratively search
+ /// for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
+ /// to start running a given query on a given syntax node. Then, there are
+ /// two options for consuming the results of the query:
+ /// 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
+ /// *matches* in the order that they were found. Each match contains the
+ /// index of the pattern that matched, and an array of captures. Because
+ /// multiple patterns can match the same set of nodes, one match may contain
+ /// captures that appear *before* some of the captures from a previous match.
+ /// 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
+ /// individual *captures* in the order that they appear. This is useful if
+ /// don't care about which pattern matched, and just want a single ordered
+ /// sequence of captures.
+ /// If you don't care about consuming all of the results, you can stop calling
+ /// [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
+ /// You can then start executing another query on another node by calling
+ /// [`ts_query_cursor_exec`] again."]
+ pub fn ts_query_cursor_new() -> *mut QueryCursorData;
+
+ /// Start running a given query on a given node.
+ pub fn ts_query_cursor_exec(
+ self_: *mut QueryCursorData,
+ query: &QueryData,
+ node: SyntaxTreeNodeRaw,
+ );
+ /// Manage the maximum number of in-progress matches allowed by this query
+ /// cursor.
+ ///
+ /// Query cursors have an optional maximum capacity for storing lists of
+ /// in-progress captures. If this capacity is exceeded, then the
+ /// earliest-starting match will silently be dropped to make room for further
+ /// matches. This maximum capacity is optional — by default, query cursors allow
+ /// any number of pending matches, dynamically allocating new space for them as
+ /// needed as the query is executed.
+ pub fn ts_query_cursor_did_exceed_match_limit(self_: *const QueryCursorData) -> bool;
+ pub fn ts_query_cursor_match_limit(self_: *const QueryCursorData) -> u32;
+ pub fn ts_query_cursor_set_match_limit(self_: *mut QueryCursorData, limit: u32);
+ /// Set the range of bytes or (row, column) positions in which the query
+ /// will be executed.
+ pub fn ts_query_cursor_set_byte_range(
+ self_: *mut QueryCursorData,
+ start_byte: u32,
+ end_byte: u32,
+ );
+
+}
diff --git a/helix-syntax/src/tree_sitter/query_match.rs b/helix-syntax/src/tree_sitter/query_match.rs
new file mode 100644
index 00000000..b0f1eaaa
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/query_match.rs
@@ -0,0 +1 @@
+pub struct QueryMatch {}
diff --git a/helix-syntax/src/tree_sitter/ropey.rs b/helix-syntax/src/tree_sitter/ropey.rs
index aa59b2f2..3e2354da 100644
--- a/helix-syntax/src/tree_sitter/ropey.rs
+++ b/helix-syntax/src/tree_sitter/ropey.rs
@@ -1,38 +1,54 @@
+use std::ops;
+
use regex_cursor::{Cursor, RopeyCursor};
use ropey::RopeSlice;
-use crate::tree_sitter::parser::{IntoParserInput, ParserInput};
+use crate::tree_sitter::{IntoTsInput, TsInput};
-pub struct RopeParserInput<'a> {
+pub struct RopeTsInput<'a> {
src: RopeSlice<'a>,
cursor: regex_cursor::RopeyCursor<'a>,
}
-impl<'a> IntoParserInput for RopeSlice<'a> {
- type ParserInput = RopeParserInput<'a>;
+impl<'a> RopeTsInput<'a> {
+ pub fn new(src: RopeSlice<'a>) -> Self {
+ RopeTsInput {
+ src,
+ cursor: regex_cursor::RopeyCursor::new(src),
+ }
+ }
+}
- fn into_parser_input(self) -> Self::ParserInput {
- RopeParserInput {
+impl<'a> IntoTsInput for RopeSlice<'a> {
+ type TsInput = RopeTsInput<'a>;
+
+ fn into_ts_input(self) -> Self::TsInput {
+ RopeTsInput {
src: self,
cursor: RopeyCursor::new(self),
}
}
}
-impl ParserInput for RopeParserInput<'_> {
- fn read(&mut self, offset: usize) -> &[u8] {
+impl<'a> TsInput for RopeTsInput<'a> {
+ type Cursor = RopeyCursor<'a>;
+ fn cursor_at(&mut self, offset: usize) -> &mut RopeyCursor<'a> {
// this cursor is optimized for contigous reads which are by far the most common during parsing
// very far jumps (like injections at the other end of the document) are handelde
- // by restarting a new cursor (new chunks iterator)
- if offset < self.cursor.offset() && self.cursor.offset() - offset > 4906 {
+ // by starting a new cursor (new chunks iterator)
+ if offset < self.cursor.offset() || self.cursor.offset() - offset > 4906 {
self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
} else {
while self.cursor.offset() + self.cursor.chunk().len() >= offset {
if !self.cursor.advance() {
- return &[];
+ break;
}
}
}
- self.cursor.chunk()
+ &mut self.cursor
+ }
+
+ fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool {
+ self.src.byte_slice(range1) == self.src.byte_slice(range2)
}
}
diff --git a/helix-syntax/src/tree_sitter/syntax_tree_node.rs b/helix-syntax/src/tree_sitter/syntax_tree_node.rs
index b50c7969..1afca0e3 100644
--- a/helix-syntax/src/tree_sitter/syntax_tree_node.rs
+++ b/helix-syntax/src/tree_sitter/syntax_tree_node.rs
@@ -25,6 +25,7 @@ impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
}
#[derive(Debug, Clone)]
+#[repr(C)]
pub struct SyntaxTreeNode<'tree> {
context: [u32; 4],
id: NonNull<c_void>,
@@ -44,7 +45,7 @@ impl<'tree> SyntaxTreeNode<'tree> {
}
#[inline]
- fn as_raw(&self) -> SyntaxTreeNodeRaw {
+ pub(crate) fn as_raw(&self) -> SyntaxTreeNodeRaw {
SyntaxTreeNodeRaw {
context: self.context,
id: self.id.as_ptr(),