Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--Cargo.lock18
-rw-r--r--Cargo.toml1
-rw-r--r--helix-core/Cargo.toml1
-rw-r--r--helix-core/src/indent.rs15
-rw-r--r--helix-core/src/syntax.rs1972
-rw-r--r--helix-core/tests/indent.rs10
-rw-r--r--helix-syntax/Cargo.toml28
-rw-r--r--helix-syntax/src/config.rs331
-rw-r--r--helix-syntax/src/highlighter.rs439
-rw-r--r--helix-syntax/src/lib.rs342
-rw-r--r--helix-syntax/src/merge.rs135
-rw-r--r--helix-syntax/src/parse.rs429
-rw-r--r--helix-syntax/src/pretty_print.rs65
-rw-r--r--helix-syntax/src/ropey.rs29
-rw-r--r--helix-syntax/src/tree_cursor.rs264
-rw-r--r--helix-term/src/ui/markdown.rs9
-rw-r--r--helix-term/src/ui/picker/handlers.rs7
-rw-r--r--helix-view/src/document.rs21
18 files changed, 2245 insertions, 1871 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f1cd1632..aa8b613c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1311,6 +1311,7 @@ dependencies = [
"hashbrown 0.14.5",
"helix-loader",
"helix-stdx",
+ "helix-syntax",
"imara-diff",
"indoc",
"log",
@@ -1426,6 +1427,23 @@ dependencies = [
]
[[package]]
+name = "helix-syntax"
+version = "24.7.0"
+dependencies = [
+ "ahash",
+ "arc-swap",
+ "bitflags 2.6.0",
+ "hashbrown 0.14.5",
+ "helix-stdx",
+ "log",
+ "once_cell",
+ "regex",
+ "ropey",
+ "slotmap",
+ "tree-sitter",
+]
+
+[[package]]
name = "helix-term"
version = "24.7.0"
dependencies = [
diff --git a/Cargo.toml b/Cargo.toml
index e7f78442..c04e4ca2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ members = [
"helix-vcs",
"helix-parsec",
"helix-stdx",
+ "helix-syntax",
"xtask",
]
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 392b4a4c..d2eda758 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -17,6 +17,7 @@ integration = []
[dependencies]
helix-stdx = { path = "../helix-stdx" }
+helix-syntax = { path = "../helix-syntax" }
helix-loader = { path = "../helix-loader" }
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index fd2b6c95..55685d0a 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -738,18 +738,9 @@ fn init_indent_query<'a, 'b>(
.map(|prec| prec.byte_range().end - 1..byte_pos + 1)
.unwrap_or(byte_pos..byte_pos + 1);
- crate::syntax::PARSER.with(|ts_parser| {
- let mut ts_parser = ts_parser.borrow_mut();
- let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
- let query_result = query_indents(
- query,
- syntax,
- &mut cursor,
- text,
- query_range,
- new_line_byte_pos,
- );
- ts_parser.cursors.push(cursor);
+ crate::syntax::with_cursor(|cursor| {
+ let query_result =
+ query_indents(query, syntax, cursor, text, query_range, new_line_byte_pos);
(query_result, deepest_preceding)
})
};
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index 93f618c0..e9451ed4 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,41 +1,32 @@
-mod tree_cursor;
-
-use crate::{
- auto_pairs::AutoPairs,
- chars::char_is_line_ending,
- diagnostic::Severity,
- regex::Regex,
- transaction::{ChangeSet, Operation},
- RopeSlice, Tendril,
-};
+use std::borrow::Cow;
+use std::fmt::{self, Display};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use std::sync::Arc;
-use ahash::RandomState;
use arc_swap::{ArcSwap, Guard};
-use bitflags::bitflags;
use globset::GlobSet;
-use hashbrown::raw::RawTable;
-use helix_stdx::rope::{self, RopeSliceExt};
-use slotmap::{DefaultKey as LayerId, HopSlotMap};
-
-use std::{
- borrow::Cow,
- cell::RefCell,
- collections::{HashMap, HashSet, VecDeque},
- fmt::{self, Display, Write},
- hash::{Hash, Hasher},
- mem::replace,
- path::{Path, PathBuf},
- str::FromStr,
- sync::Arc,
+pub use helix_syntax::highlighter::{Highlight, HighlightEvent};
+pub use helix_syntax::{
+ merge, pretty_print_tree, HighlightConfiguration, InjectionLanguageMarker, RopeProvider,
+ TextObjectQuery, TreeCursor,
};
-
+pub use helix_syntax::{with_cursor, Syntax};
use once_cell::sync::{Lazy, OnceCell};
-use serde::{ser::SerializeSeq, Deserialize, Serialize};
+use regex::Regex;
+use ropey::RopeSlice;
+use serde::ser::SerializeSeq;
+use serde::{Deserialize, Serialize};
+use std::collections::{HashMap, HashSet};
+use tree_sitter::{Point, Query};
+
+use crate::auto_pairs::AutoPairs;
+use crate::chars::char_is_line_ending;
+use crate::diagnostic::Severity;
+use crate::{ChangeSet, Operation, Tendril};
use helix_loader::grammar::{get_language, load_runtime_file};
-pub use tree_cursor::TreeCursor;
-
fn deserialize_regex<'de, D>(deserializer: D) -> Result<Option<Regex>, D::Error>
where
D: serde::Deserializer<'de>,
@@ -171,6 +162,104 @@ pub struct LanguageConfiguration {
pub persistent_diagnostic_sources: Vec<String>,
}
+fn read_query(language: &str, filename: &str) -> String {
+ helix_syntax::read_query(language, filename, |lang, filename| {
+ load_runtime_file(lang, filename).unwrap_or_default()
+ })
+}
+impl LanguageConfiguration {
+ fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
+ let highlights_query = read_query(&self.language_id, "highlights.scm");
+ // always highlight syntax errors
+ // highlights_query += "\n(ERROR) @error";
+
+ let injections_query = read_query(&self.language_id, "injections.scm");
+ let locals_query = read_query(&self.language_id, "locals.scm");
+
+ if highlights_query.is_empty() {
+ None
+ } else {
+ let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
+ .map_err(|err| {
+ log::error!(
+ "Failed to load tree-sitter parser for language {:?}: {}",
+ self.language_id,
+ err
+ )
+ })
+ .ok()?;
+ let config = HighlightConfiguration::new(
+ language,
+ &highlights_query,
+ &injections_query,
+ &locals_query,
+ )
+ .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
+ .ok()?;
+
+ config.configure(scopes);
+ Some(Arc::new(config))
+ }
+ }
+
+ pub fn reconfigure(&self, scopes: &[String]) {
+ if let Some(Some(config)) = self.highlight_config.get() {
+ config.configure(scopes);
+ }
+ }
+
+ pub fn get_highlight_config(&self) -> Option<Arc<HighlightConfiguration>> {
+ self.highlight_config.get().cloned().flatten()
+ }
+
+ pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
+ self.highlight_config
+ .get_or_init(|| self.initialize_highlight(scopes))
+ .clone()
+ }
+
+ pub fn is_highlight_initialized(&self) -> bool {
+ self.highlight_config.get().is_some()
+ }
+
+ pub fn indent_query(&self) -> Option<&Query> {
+ self.indent_query
+ .get_or_init(|| self.load_query("indents.scm"))
+ .as_ref()
+ }
+
+ pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
+ self.textobject_query
+ .get_or_init(|| {
+ self.load_query("textobjects.scm")
+ .map(|query| TextObjectQuery { query })
+ })
+ .as_ref()
+ }
+
+ pub fn scope(&self) -> &str {
+ &self.scope
+ }
+
+ fn load_query(&self, kind: &str) -> Option<Query> {
+ let query_text = read_query(&self.language_id, kind);
+ if query_text.is_empty() {
+ return None;
+ }
+ let lang = &self.highlight_config.get()?.as_ref()?.language;
+ Query::new(lang, &query_text)
+ .map_err(|e| {
+ log::error!(
+ "Failed to parse {} queries for {}: {}",
+ kind,
+ self.language_id,
+ e
+ )
+ })
+ .ok()
+ }
+}
+
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
@@ -604,228 +693,6 @@ impl FromStr for AutoPairConfig {
}
}
-#[derive(Debug)]
-pub struct TextObjectQuery {
- pub query: Query,
-}
-
-#[derive(Debug)]
-pub enum CapturedNode<'a> {
- Single(Node<'a>),
- /// Guaranteed to be not empty
- Grouped(Vec<Node<'a>>),
-}
-
-impl<'a> CapturedNode<'a> {
- pub fn start_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.start_byte(),
- Self::Grouped(ns) => ns[0].start_byte(),
- }
- }
-
- pub fn end_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.end_byte(),
- Self::Grouped(ns) => ns.last().unwrap().end_byte(),
- }
- }
-
- pub fn byte_range(&self) -> std::ops::Range<usize> {
- self.start_byte()..self.end_byte()
- }
-}
-
-/// The maximum number of in-progress matches a TS cursor can consider at once.
-/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
-/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
-///
-///
-/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
-/// However, this causes performance issues for medium to large files.
-/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
-///
-///
-/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
-/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
-/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
-///
-///
-/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
-/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
-/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
-const TREE_SITTER_MATCH_LIMIT: u32 = 256;
-
-impl TextObjectQuery {
- /// Run the query on the given node and return sub nodes which match given
- /// capture ("function.inside", "class.around", etc).
- ///
- /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
- /// and support for this is partial and could use improvement.
- ///
- /// ```query
- /// (comment)+ @capture
- ///
- /// ; OR
- /// (
- /// (comment)*
- /// .
- /// (function)
- /// ) @capture
- /// ```
- pub fn capture_nodes<'a>(
- &'a self,
- capture_name: &str,
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- self.capture_nodes_any(&[capture_name], node, slice, cursor)
- }
-
- /// Find the first capture that exists out of all given `capture_names`
- /// and return sub nodes that match this capture.
- pub fn capture_nodes_any<'a>(
- &'a self,
- capture_names: &[&str],
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- let capture_idx = capture_names
- .iter()
- .find_map(|cap| self.query.capture_index_for_name(cap))?;
-
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let nodes = cursor
- .captures(&self.query, node, RopeProvider(slice))
- .filter_map(move |(mat, _)| {
- let nodes: Vec<_> = mat
- .captures
- .iter()
- .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
- .collect();
-
- if nodes.len() > 1 {
- Some(CapturedNode::Grouped(nodes))
- } else {
- nodes.into_iter().map(CapturedNode::Single).next()
- }
- });
-
- Some(nodes)
- }
-}
-
-pub fn read_query(language: &str, filename: &str) -> String {
- static INHERITS_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
-
- let query = load_runtime_file(language, filename).unwrap_or_default();
-
- // replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
- INHERITS_REGEX
- .replace_all(&query, |captures: &regex::Captures| {
- captures[1]
- .split(',')
- .fold(String::new(), |mut output, language| {
- // `write!` to a String cannot fail.
- write!(output, "\n{}\n", read_query(language, filename)).unwrap();
- output
- })
- })
- .to_string()
-}
-
-impl LanguageConfiguration {
- fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
- let highlights_query = read_query(&self.language_id, "highlights.scm");
- // always highlight syntax errors
- // highlights_query += "\n(ERROR) @error";
-
- let injections_query = read_query(&self.language_id, "injections.scm");
- let locals_query = read_query(&self.language_id, "locals.scm");
-
- if highlights_query.is_empty() {
- None
- } else {
- let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
- .map_err(|err| {
- log::error!(
- "Failed to load tree-sitter parser for language {:?}: {}",
- self.language_id,
- err
- )
- })
- .ok()?;
- let config = HighlightConfiguration::new(
- language,
- &highlights_query,
- &injections_query,
- &locals_query,
- )
- .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
- .ok()?;
-
- config.configure(scopes);
- Some(Arc::new(config))
- }
- }
-
- pub fn reconfigure(&self, scopes: &[String]) {
- if let Some(Some(config)) = self.highlight_config.get() {
- config.configure(scopes);
- }
- }
-
- pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
- self.highlight_config
- .get_or_init(|| self.initialize_highlight(scopes))
- .clone()
- }
-
- pub fn is_highlight_initialized(&self) -> bool {
- self.highlight_config.get().is_some()
- }
-
- pub fn indent_query(&self) -> Option<&Query> {
- self.indent_query
- .get_or_init(|| self.load_query("indents.scm"))
- .as_ref()
- }
-
- pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
- self.textobject_query
- .get_or_init(|| {
- self.load_query("textobjects.scm")
- .map(|query| TextObjectQuery { query })
- })
- .as_ref()
- }
-
- pub fn scope(&self) -> &str {
- &self.scope
- }
-
- fn load_query(&self, kind: &str) -> Option<Query> {
- let query_text = read_query(&self.language_id, kind);
- if query_text.is_empty() {
- return None;
- }
- let lang = &self.highlight_config.get()?.as_ref()?.language;
- Query::new(lang, &query_text)
- .map_err(|e| {
- log::error!(
- "Failed to parse {} queries for {}: {}",
- kind,
- self.language_id,
- e
- )
- })
- .ok()
- }
-}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default, rename_all = "kebab-case", deny_unknown_fields)]
pub struct SoftWrap {
@@ -975,6 +842,8 @@ impl Loader {
source: RopeSlice,
) -> Option<Arc<LanguageConfiguration>> {
let line = Cow::from(source.line(0));
+ // TODO: resue detection from helix-syntax
+ const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap());
let configuration_id = SHEBANG_REGEX
@@ -1057,583 +926,7 @@ impl Loader {
}
}
-pub struct TsParser {
- parser: tree_sitter::Parser,
- pub cursors: Vec<QueryCursor>,
-}
-
-// could also just use a pool, or a single instance?
-thread_local! {
- pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
- parser: Parser::new(),
- cursors: Vec::new(),
- })
-}
-
-#[derive(Debug)]
-pub struct Syntax {
- layers: HopSlotMap<LayerId, LanguageLayer>,
- root: LayerId,
- loader: Arc<ArcSwap<Loader>>,
-}
-
-fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
- Cow::from(source.byte_slice(range))
-}
-
-impl Syntax {
- pub fn new(
- source: RopeSlice,
- config: Arc<HighlightConfiguration>,
- loader: Arc<ArcSwap<Loader>>,
- ) -> Option<Self> {
- let root_layer = LanguageLayer {
- tree: None,
- config,
- depth: 0,
- flags: LayerUpdateFlags::empty(),
- ranges: vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- parent: None,
- };
-
- // track scope_descriptor: a Vec of scopes for item in tree
-
- let mut layers = HopSlotMap::default();
- let root = layers.insert(root_layer);
-
- let mut syntax = Self {
- root,
- layers,
- loader,
- };
-
- let res = syntax.update(source, source, &ChangeSet::new(source));
-
- if res.is_err() {
- log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
- return None;
- }
- Some(syntax)
- }
-
- pub fn update(
- &mut self,
- old_source: RopeSlice,
- source: RopeSlice,
- changeset: &ChangeSet,
- ) -> Result<(), Error> {
- let mut queue = VecDeque::new();
- queue.push_back(self.root);
-
- let loader = self.loader.load();
- let scopes = loader.scopes.load();
- let injection_callback = |language: &InjectionLanguageMarker| {
- loader
- .language_configuration_for_injection_string(language)
- .and_then(|language_config| language_config.highlight_config(&scopes))
- };
-
- // Convert the changeset into tree sitter edits.
- let edits = generate_edits(old_source, changeset);
-
- // This table allows inverse indexing of `layers`.
- // That is by hashing a `Layer` you can find
- // the `LayerId` of an existing equivalent `Layer` in `layers`.
- //
- // It is used to determine if a new layer exists for an injection
- // or if an existing layer needs to be updated.
- let mut layers_table = RawTable::with_capacity(self.layers.len());
- let layers_hasher = RandomState::new();
- // Use the edits to update all layers markers
- fn point_add(a: Point, b: Point) -> Point {
- if b.row > 0 {
- Point::new(a.row.saturating_add(b.row), b.column)
- } else {
- Point::new(0, a.column.saturating_add(b.column))
- }
- }
- fn point_sub(a: Point, b: Point) -> Point {
- if a.row > b.row {
- Point::new(a.row.saturating_sub(b.row), a.column)
- } else {
- Point::new(0, a.column.saturating_sub(b.column))
- }
- }
-
- for (layer_id, layer) in self.layers.iter_mut() {
- // The root layer always covers the whole range (0..usize::MAX)
- if layer.depth == 0 {
- layer.flags = LayerUpdateFlags::MODIFIED;
- continue;
- }
-
- if !edits.is_empty() {
- for range in &mut layer.ranges {
- // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
- for edit in edits.iter().rev() {
- let is_pure_insertion = edit.old_end_byte == edit.start_byte;
-
- // if edit is after range, skip
- if edit.start_byte > range.end_byte {
- // TODO: || (is_noop && edit.start_byte == range.end_byte)
- continue;
- }
-
- // if edit is before range, shift entire range by len
- if edit.old_end_byte < range.start_byte {
- range.start_byte =
- edit.new_end_byte + (range.start_byte - edit.old_end_byte);
- range.start_point = point_add(
- edit.new_end_position,
- point_sub(range.start_point, edit.old_end_position),
- );
-
- range.end_byte = edit
- .new_end_byte
- .saturating_add(range.end_byte - edit.old_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
-
- layer.flags |= LayerUpdateFlags::MOVED;
- }
- // if the edit starts in the space before and extends into the range
- else if edit.start_byte < range.start_byte {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
-
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- // If the edit is an insertion at the start of the tree, shift
- else if edit.start_byte == range.start_byte && is_pure_insertion {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
- layer.flags |= LayerUpdateFlags::MOVED;
- } else {
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- }
- }
- }
-
- let hash = layers_hasher.hash_one(layer);
- // Safety: insert_no_grow is unsafe because it assumes that the table
- // has enough capacity to hold additional elements.
- // This is always the case as we reserved enough capacity above.
- unsafe { layers_table.insert_no_grow(hash, layer_id) };
- }
-
- PARSER.with(|ts_parser| {
- let ts_parser = &mut ts_parser.borrow_mut();
- ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
- let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
- // TODO: might need to set cursor range
- cursor.set_byte_range(0..usize::MAX);
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let source_slice = source.slice(..);
-
- while let Some(layer_id) = queue.pop_front() {
- let layer = &mut self.layers[layer_id];
-
- // Mark the layer as touched
- layer.flags |= LayerUpdateFlags::TOUCHED;
-
- // If a tree already exists, notify it of changes.
- if let Some(tree) = &mut layer.tree {
- if layer
- .flags
- .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
- {
- for edit in edits.iter().rev() {
- // Apply the edits in reverse.
- // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
- tree.edit(edit);
- }
- }
-
- if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
- // Re-parse the tree.
- layer.parse(&mut ts_parser.parser, source)?;
- }
- } else {
- // always parse if this layer has never been parsed before
- layer.parse(&mut ts_parser.parser, source)?;
- }
-
- // Switch to an immutable borrow.
- let layer = &self.layers[layer_id];
-
- // Process injections.
- let matches = cursor.matches(
- &layer.config.injections_query,
- layer.tree().root_node(),
- RopeProvider(source_slice),
- );
- let mut combined_injections = vec![
- (None, Vec::new(), IncludedChildren::default());
- layer.config.combined_injections_patterns.len()
- ];
- let mut injections = Vec::new();
- let mut last_injection_end = 0;
- for mat in matches {
- let (injection_capture, content_node, included_children) = layer
- .config
- .injection_for_match(&layer.config.injections_query, &mat, source_slice);
-
- // in case this is a combined injection save it for more processing later
- if let Some(combined_injection_idx) = layer
- .config
- .combined_injections_patterns
- .iter()
- .position(|&pattern| pattern == mat.pattern_index)
- {
- let entry = &mut combined_injections[combined_injection_idx];
- if injection_capture.is_some() {
- entry.0 = injection_capture;
- }
- if let Some(content_node) = content_node {
- if content_node.start_byte() >= last_injection_end {
- entry.1.push(content_node);
- last_injection_end = content_node.end_byte();
- }
- }
- entry.2 = included_children;
- continue;
- }
-
- // Explicitly remove this match so that none of its other captures will remain
- // in the stream of captures.
- mat.remove();
-
- // If a language is found with the given name, then add a new language layer
- // to the highlighted document.
- if let (Some(injection_capture), Some(content_node)) =
- (injection_capture, content_node)
- {
- if let Some(config) = (injection_callback)(&injection_capture) {
- let ranges =
- intersect_ranges(&layer.ranges, &[content_node], included_children);
-
- if !ranges.is_empty() {
- if content_node.start_byte() < last_injection_end {
- continue;
- }
- last_injection_end = content_node.end_byte();
- injections.push((config, ranges));
- }
- }
- }
- }
-
- for (lang_name, content_nodes, included_children) in combined_injections {
- if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
- if let Some(config) = (injection_callback)(&lang_name) {
- let ranges =
- intersect_ranges(&layer.ranges, &content_nodes, included_children);
- if !ranges.is_empty() {
- injections.push((config, ranges));
- }
- }
- }
- }
-
- let depth = layer.depth + 1;
- // TODO: can't inline this since matches borrows self.layers
- for (config, ranges) in injections {
- let parent = Some(layer_id);
- let new_layer = LanguageLayer {
- tree: None,
- config,
- depth,
- ranges,
- flags: LayerUpdateFlags::empty(),
- parent: None,
- };
-
- // Find an identical existing layer
- let layer = layers_table
- .get(layers_hasher.hash_one(&new_layer), |&it| {
- self.layers[it] == new_layer
- })
- .copied();
-
- // ...or insert a new one.
- let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
- self.layers[layer_id].parent = parent;
-
- queue.push_back(layer_id);
- }
-
- // TODO: pre-process local scopes at this time, rather than highlight?
- // would solve problems with locals not working across boundaries
- }
-
- // Return the cursor back in the pool.
- ts_parser.cursors.push(cursor);
-
- // Reset all `LayerUpdateFlags` and remove all untouched layers
- self.layers.retain(|_, layer| {
- replace(&mut layer.flags, LayerUpdateFlags::empty())
- .contains(LayerUpdateFlags::TOUCHED)
- });
-
- Ok(())
- })
- }
-
- pub fn tree(&self) -> &Tree {
- self.layers[self.root].tree()
- }
-
- /// Iterate over the highlighted regions for a given slice of source code.
- pub fn highlight_iter<'a>(
- &'a self,
- source: RopeSlice<'a>,
- range: Option<std::ops::Range<usize>>,
- cancellation_flag: Option<&'a AtomicUsize>,
- ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
- let mut layers = self
- .layers
- .iter()
- .filter_map(|(_, layer)| {
- // TODO: if range doesn't overlap layer range, skip it
-
- // Reuse a cursor from the pool if available.
- let mut cursor = PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.pop().unwrap_or_default()
- });
-
- // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
- // prevents them from being moved. But both of these values are really just
- // pointers, so it's actually ok to move them.
- let cursor_ref =
- unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
-
- // if reusing cursors & no range this resets to whole range
- cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
- cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let mut captures = cursor_ref
- .captures(
- &layer.config.query,
- layer.tree().root_node(),
- RopeProvider(source),
- )
- .peekable();
-
- // If there's no captures, skip the layer
- captures.peek()?;
-
- Some(HighlightIterLayer {
- highlight_end_stack: Vec::new(),
- scope_stack: vec![LocalScope {
- inherits: false,
- range: 0..usize::MAX,
- local_defs: Vec::new(),
- }],
- cursor,
- _tree: None,
- captures: RefCell::new(captures),
- config: layer.config.as_ref(), // TODO: just reuse `layer`
- depth: layer.depth, // TODO: just reuse `layer`
- })
- })
- .collect::<Vec<_>>();
-
- layers.sort_unstable_by_key(|layer| layer.sort_key());
-
- let mut result = HighlightIter {
- source,
- byte_offset: range.map_or(0, |r| r.start),
- cancellation_flag,
- iter_count: 0,
- layers,
- next_event: None,
- last_highlight_range: None,
- };
- result.sort_layers();
- result
- }
-
- pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
- let mut container_id = self.root;
-
- for (layer_id, layer) in self.layers.iter() {
- if layer.depth > self.layers[container_id].depth
- && layer.contains_byte_range(start, end)
- {
- container_id = layer_id;
- }
- }
-
- self.layers[container_id].tree()
- }
-
- pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
- self.tree_for_byte_range(start, end)
- .root_node()
- .named_descendant_for_byte_range(start, end)
- }
-
- pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
- self.tree_for_byte_range(start, end)
- .root_node()
- .descendant_for_byte_range(start, end)
- }
-
- pub fn walk(&self) -> TreeCursor<'_> {
- // data structure to find the smallest range that contains a point
- // when some of the ranges in the structure can overlap.
- TreeCursor::new(&self.layers, self.root)
- }
-
- // Commenting
- // comment_strings_for_pos
- // is_commented
-
- // Indentation
- // suggested_indent_for_line_at_buffer_row
- // suggested_indent_for_buffer_row
- // indent_level_for_line
-
- // TODO: Folding
-}
-
-bitflags! {
- /// Flags that track the status of a layer
- /// in the `Sytaxn::update` function
- #[derive(Debug)]
- struct LayerUpdateFlags : u32{
- const MODIFIED = 0b001;
- const MOVED = 0b010;
- const TOUCHED = 0b100;
- }
-}
-
-#[derive(Debug)]
-pub struct LanguageLayer {
- // mode
- // grammar
- pub config: Arc<HighlightConfiguration>,
- pub(crate) tree: Option<Tree>,
- pub ranges: Vec<Range>,
- pub depth: u32,
- flags: LayerUpdateFlags,
- parent: Option<LayerId>,
-}
-
-/// This PartialEq implementation only checks if that
-/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
-/// It does not check whether the layers have the same internal treesitter
-/// state.
-impl PartialEq for LanguageLayer {
- fn eq(&self, other: &Self) -> bool {
- self.depth == other.depth
- && self.config.language == other.config.language
- && self.ranges == other.ranges
- }
-}
-
-/// Hash implementation belongs to PartialEq implementation above.
-/// See its documentation for details.
-impl Hash for LanguageLayer {
- fn hash<H: Hasher>(&self, state: &mut H) {
- self.depth.hash(state);
- self.config.language.hash(state);
- self.ranges.hash(state);
- }
-}
-
-impl LanguageLayer {
- pub fn tree(&self) -> &Tree {
- // TODO: no unwrap
- self.tree.as_ref().unwrap()
- }
-
- fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
- parser
- .set_included_ranges(&self.ranges)
- .map_err(|_| Error::InvalidRanges)?;
-
- parser
- .set_language(&self.config.language)
- .map_err(|_| Error::InvalidLanguage)?;
-
- // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
- let tree = parser
- .parse_with(
- &mut |byte, _| {
- if byte <= source.len_bytes() {
- let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
- &chunk.as_bytes()[byte - start_byte..]
- } else {
- // out of range
- &[]
- }
- },
- self.tree.as_ref(),
- )
- .ok_or(Error::Cancelled)?;
- // unsafe { ts_parser.parser.set_cancellation_flag(None) };
- self.tree = Some(tree);
- Ok(())
- }
-
- /// Whether the layer contains the given byte range.
- ///
- /// If the layer has multiple ranges (i.e. combined injections), the
- /// given range is considered contained if it is within the start and
- /// end bytes of the first and last ranges **and** if the given range
- /// starts or ends within any of the layer's ranges.
- fn contains_byte_range(&self, start: usize, end: usize) -> bool {
- let layer_start = self
- .ranges
- .first()
- .expect("ranges should not be empty")
- .start_byte;
- let layer_end = self
- .ranges
- .last()
- .expect("ranges should not be empty")
- .end_byte;
-
- layer_start <= start
- && layer_end >= end
- && self.ranges.iter().any(|range| {
- let byte_range = range.start_byte..range.end_byte;
- byte_range.contains(&start) || byte_range.contains(&end)
- })
- }
-}
-
-pub(crate) fn generate_edits(
- old_text: RopeSlice,
- changeset: &ChangeSet,
-) -> Vec<tree_sitter::InputEdit> {
+pub fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<tree_sitter::InputEdit> {
use Operation::*;
let mut old_pos = 0;
@@ -1735,990 +1028,10 @@ pub(crate) fn generate_edits(
edits
}
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{iter, mem, ops, str, usize};
-use tree_sitter::{
- Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
- QueryMatch, Range, TextProvider, Tree,
-};
-
-const CANCELLATION_CHECK_INTERVAL: usize = 100;
-
-/// Indicates which highlight should be applied to a region of source code.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct Highlight(pub usize);
-
-/// Represents the reason why syntax highlighting failed.
-#[derive(Debug, PartialEq, Eq)]
-pub enum Error {
- Cancelled,
- InvalidLanguage,
- InvalidRanges,
- Unknown,
-}
-
-/// Represents a single step in rendering a syntax-highlighted document.
-#[derive(Copy, Clone, Debug)]
-pub enum HighlightEvent {
- Source { start: usize, end: usize },
- HighlightStart(Highlight),
- HighlightEnd,
-}
-
-/// Contains the data needed to highlight code written in a particular language.
-///
-/// This struct is immutable and can be shared between threads.
-#[derive(Debug)]
-pub struct HighlightConfiguration {
- pub language: Grammar,
- pub query: Query,
- injections_query: Query,
- combined_injections_patterns: Vec<usize>,
- highlights_pattern_index: usize,
- highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
- non_local_variable_patterns: Vec<bool>,
- injection_content_capture_index: Option<u32>,
- injection_language_capture_index: Option<u32>,
- injection_filename_capture_index: Option<u32>,
- injection_shebang_capture_index: Option<u32>,
- local_scope_capture_index: Option<u32>,
- local_def_capture_index: Option<u32>,
- local_def_value_capture_index: Option<u32>,
- local_ref_capture_index: Option<u32>,
-}
-
-#[derive(Debug)]
-struct LocalDef<'a> {
- name: Cow<'a, str>,
- value_range: ops::Range<usize>,
- highlight: Option<Highlight>,
-}
-
-#[derive(Debug)]
-struct LocalScope<'a> {
- inherits: bool,
- range: ops::Range<usize>,
- local_defs: Vec<LocalDef<'a>>,
-}
-
-#[derive(Debug)]
-struct HighlightIter<'a> {
- source: RopeSlice<'a>,
- byte_offset: usize,
- cancellation_flag: Option<&'a AtomicUsize>,
- layers: Vec<HighlightIterLayer<'a>>,
- iter_count: usize,
- next_event: Option<HighlightEvent>,
- last_highlight_range: Option<(usize, usize, u32)>,
-}
-
-// Adapter to convert rope chunks to bytes
-pub struct ChunksBytes<'a> {
- chunks: ropey::iter::Chunks<'a>,
-}
-impl<'a> Iterator for ChunksBytes<'a> {
- type Item = &'a [u8];
- fn next(&mut self) -> Option<Self::Item> {
- self.chunks.next().map(str::as_bytes)
- }
-}
-
-pub struct RopeProvider<'a>(pub RopeSlice<'a>);
-impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
- type I = ChunksBytes<'a>;
-
- fn text(&mut self, node: Node) -> Self::I {
- let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
- ChunksBytes {
- chunks: fragment.chunks(),
- }
- }
-}
-
-struct HighlightIterLayer<'a> {
- _tree: Option<Tree>,
- cursor: QueryCursor,
- captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
- config: &'a HighlightConfiguration,
- highlight_end_stack: Vec<usize>,
- scope_stack: Vec<LocalScope<'a>>,
- depth: u32,
-}
-
-impl<'a> fmt::Debug for HighlightIterLayer<'a> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("HighlightIterLayer").finish()
- }
-}
-
-impl HighlightConfiguration {
- /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
- /// queries.
- ///
- /// # Parameters
- ///
- /// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
- /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
- /// should be non-empty, otherwise no syntax highlights will be added.
- /// * `injections_query` - A string containing tree patterns for injecting other languages
- /// into the document. This can be empty if no injections are desired.
- /// * `locals_query` - A string containing tree patterns for tracking local variable
- /// definitions and references. This can be empty if local variable tracking is not needed.
- ///
- /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
- pub fn new(
- language: Grammar,
- highlights_query: &str,
- injection_query: &str,
- locals_query: &str,
- ) -> Result<Self, QueryError> {
- // Concatenate the query strings, keeping track of the start offset of each section.
- let mut query_source = String::new();
- query_source.push_str(locals_query);
- let highlights_query_offset = query_source.len();
- query_source.push_str(highlights_query);
-
- // Construct a single query by concatenating the three query strings, but record the
- // range of pattern indices that belong to each individual string.
- let query = Query::new(&language, &query_source)?;
- let mut highlights_pattern_index = 0;
- for i in 0..(query.pattern_count()) {
- let pattern_offset = query.start_byte_for_pattern(i);
- if pattern_offset < highlights_query_offset {
- highlights_pattern_index += 1;
- }
- }
-
- let injections_query = Query::new(&language, injection_query)?;
- let combined_injections_patterns = (0..injections_query.pattern_count())
- .filter(|&i| {
- injections_query
- .property_settings(i)
- .iter()
- .any(|s| &*s.key == "injection.combined")
- })
- .collect();
-
- // Find all of the highlighting patterns that are disabled for nodes that
- // have been identified as local variables.
- let non_local_variable_patterns = (0..query.pattern_count())
- .map(|i| {
- query
- .property_predicates(i)
- .iter()
- .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
- })
- .collect();
-
- // Store the numeric ids for all of the special captures.
- let mut injection_content_capture_index = None;
- let mut injection_language_capture_index = None;
- let mut injection_filename_capture_index = None;
- let mut injection_shebang_capture_index = None;
- let mut local_def_capture_index = None;
- let mut local_def_value_capture_index = None;
- let mut local_ref_capture_index = None;
- let mut local_scope_capture_index = None;
- for (i, name) in query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "local.definition" => local_def_capture_index = i,
- "local.definition-value" => local_def_value_capture_index = i,
- "local.reference" => local_ref_capture_index = i,
- "local.scope" => local_scope_capture_index = i,
- _ => {}
- }
- }
-
- for (i, name) in injections_query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "injection.content" => injection_content_capture_index = i,
- "injection.language" => injection_language_capture_index = i,
- "injection.filename" => injection_filename_capture_index = i,
- "injection.shebang" => injection_shebang_capture_index = i,
- _ => {}
- }
- }
-
- let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
- Ok(Self {
- language,
- query,
- injections_query,
- combined_injections_patterns,
- highlights_pattern_index,
- highlight_indices,
- non_local_variable_patterns,
- injection_content_capture_index,
- injection_language_capture_index,
- injection_filename_capture_index,
- injection_shebang_capture_index,
- local_scope_capture_index,
- local_def_capture_index,
- local_def_value_capture_index,
- local_ref_capture_index,
- })
- }
-
- /// Get a slice containing all of the highlight names used in the configuration.
- pub fn names(&self) -> &[&str] {
- self.query.capture_names()
- }
-
- /// Set the list of recognized highlight names.
- ///
- /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
- /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
- /// these queries can choose to recognize highlights with different levels of specificity.
- /// For example, the string `function.builtin` will match against `function.builtin.constructor`
- /// but will not match `function.method.builtin` and `function.method`.
- ///
- /// When highlighting, results are returned as `Highlight` values, which contain the index
- /// of the matched highlight this list of highlight names.
- pub fn configure(&self, recognized_names: &[String]) {
- let mut capture_parts = Vec::new();
- let indices: Vec<_> = self
- .query
- .capture_names()
- .iter()
- .map(move |capture_name| {
- capture_parts.clear();
- capture_parts.extend(capture_name.split('.'));
-
- let mut best_index = None;
- let mut best_match_len = 0;
- for (i, recognized_name) in recognized_names.iter().enumerate() {
- let mut len = 0;
- let mut matches = true;
- for (i, part) in recognized_name.split('.').enumerate() {
- match capture_parts.get(i) {
- Some(capture_part) if *capture_part == part => len += 1,
- _ => {
- matches = false;
- break;
- }
- }
- }
- if matches && len > best_match_len {
- best_index = Some(i);
- best_match_len = len;
- }
- }
- best_index.map(Highlight)
- })
- .collect();
-
- self.highlight_indices.store(Arc::new(indices));
- }
-
- fn injection_pair<'a>(
- &self,
- query_match: &QueryMatch<'a, 'a>,
- source: RopeSlice<'a>,
- ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
- let mut injection_capture = None;
- let mut content_node = None;
-
- for capture in query_match.captures {
- let index = Some(capture.index);
- if index == self.injection_language_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
- injection_capture = Some(InjectionLanguageMarker::Name(name));
- } else if index == self.injection_filename_capture_index {
- let name = byte_range_to_str(capture.node.byte_range(), source);
- let path = Path::new(name.as_ref()).to_path_buf();
- injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
- } else if index == self.injection_shebang_capture_index {
- let node_slice = source.byte_slice(capture.node.byte_range());
-
- // some languages allow space and newlines before the actual string content
- // so a shebang could be on either the first or second line
- let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
- node_slice.byte_slice(..end)
- } else {
- node_slice
- };
-
- static SHEBANG_REGEX: Lazy<rope::Regex> =
- Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
-
- injection_capture = SHEBANG_REGEX
- .captures_iter(lines.regex_input())
- .map(|cap| {
- let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
- InjectionLanguageMarker::Shebang(cap.into())
- })
- .next()
- } else if index == self.injection_content_capture_index {
- content_node = Some(capture.node);
- }
- }
- (injection_capture, content_node)
- }
-
- fn injection_for_match<'a>(
- &self,
- query: &'a Query,
- query_match: &QueryMatch<'a, 'a>,
- source: RopeSlice<'a>,
- ) -> (
- Option<InjectionLanguageMarker<'a>>,
- Option<Node<'a>>,
- IncludedChildren,
- ) {
- let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
-
- let mut included_children = IncludedChildren::default();
- for prop in query.property_settings(query_match.pattern_index) {
- match prop.key.as_ref() {
- // In addition to specifying the language name via the text of a
- // captured node, it can also be hard-coded via a `#set!` predicate
- // that sets the injection.language key.
- "injection.language" if injection_capture.is_none() => {
- injection_capture = prop
- .value
- .as_ref()
- .map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
- }
-
- // By default, injections do not include the *children* of an
- // `injection.content` node - only the ranges that belong to the
- // node itself. This can be changed using a `#set!` predicate that
- // sets the `injection.include-children` key.
- "injection.include-children" => included_children = IncludedChildren::All,
-
- // Some queries might only exclude named children but include unnamed
- // children in their `injection.content` node. This can be enabled using
- // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
- "injection.include-unnamed-children" => {
- included_children = IncludedChildren::Unnamed
- }
- _ => {}
- }
- }
-
- (injection_capture, content_node, included_children)
- }
-}
-
-impl<'a> HighlightIterLayer<'a> {
- // First, sort scope boundaries by their byte offset in the document. At a
- // given position, emit scope endings before scope beginnings. Finally, emit
- // scope boundaries from deeper layers first.
- fn sort_key(&self) -> Option<(usize, bool, isize)> {
- let depth = -(self.depth as isize);
- let next_start = self
- .captures
- .borrow_mut()
- .peek()
- .map(|(m, i)| m.captures[*i].node.start_byte());
- let next_end = self.highlight_end_stack.last().cloned();
- match (next_start, next_end) {
- (Some(start), Some(end)) => {
- if start < end {
- Some((start, true, depth))
- } else {
- Some((end, false, depth))
- }
- }
- (Some(i), None) => Some((i, true, depth)),
- (None, Some(j)) => Some((j, false, depth)),
- _ => None,
- }
- }
-}
-
-#[derive(Clone)]
-enum IncludedChildren {
- None,
- All,
- Unnamed,
-}
-
-impl Default for IncludedChildren {
- fn default() -> Self {
- Self::None
- }
-}
-
-// Compute the ranges that should be included when parsing an injection.
-// This takes into account three things:
-// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
-// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
-// are the ranges of those nodes.
-// * `includes_children` - For some injections, the content nodes' children should be
-// excluded from the nested document, so that only the content nodes' *own* content
-// is reparsed. For other injections, the content nodes' entire ranges should be
-// reparsed, including the ranges of their children.
-fn intersect_ranges(
- parent_ranges: &[Range],
- nodes: &[Node],
- included_children: IncludedChildren,
-) -> Vec<Range> {
- let mut cursor = nodes[0].walk();
- let mut result = Vec::new();
- let mut parent_range_iter = parent_ranges.iter();
- let mut parent_range = parent_range_iter
- .next()
- .expect("Layers should only be constructed with non-empty ranges vectors");
- for node in nodes.iter() {
- let mut preceding_range = Range {
- start_byte: 0,
- start_point: Point::new(0, 0),
- end_byte: node.start_byte(),
- end_point: node.start_position(),
- };
- let following_range = Range {
- start_byte: node.end_byte(),
- start_point: node.end_position(),
- end_byte: usize::MAX,
- end_point: Point::new(usize::MAX, usize::MAX),
- };
-
- for excluded_range in node
- .children(&mut cursor)
- .filter_map(|child| match included_children {
- IncludedChildren::None => Some(child.range()),
- IncludedChildren::All => None,
- IncludedChildren::Unnamed => {
- if child.is_named() {
- Some(child.range())
- } else {
- None
- }
- }
- })
- .chain([following_range].iter().cloned())
- {
- let mut range = Range {
- start_byte: preceding_range.end_byte,
- start_point: preceding_range.end_point,
- end_byte: excluded_range.start_byte,
- end_point: excluded_range.start_point,
- };
- preceding_range = excluded_range;
-
- if range.end_byte < parent_range.start_byte {
- continue;
- }
-
- while parent_range.start_byte <= range.end_byte {
- if parent_range.end_byte > range.start_byte {
- if range.start_byte < parent_range.start_byte {
- range.start_byte = parent_range.start_byte;
- range.start_point = parent_range.start_point;
- }
-
- if parent_range.end_byte < range.end_byte {
- if range.start_byte < parent_range.end_byte {
- result.push(Range {
- start_byte: range.start_byte,
- start_point: range.start_point,
- end_byte: parent_range.end_byte,
- end_point: parent_range.end_point,
- });
- }
- range.start_byte = parent_range.end_byte;
- range.start_point = parent_range.end_point;
- } else {
- if range.start_byte < range.end_byte {
- result.push(range);
- }
- break;
- }
- }
-
- if let Some(next_range) = parent_range_iter.next() {
- parent_range = next_range;
- } else {
- return result;
- }
- }
- }
- }
- result
-}
-
-impl<'a> HighlightIter<'a> {
- fn emit_event(
- &mut self,
- offset: usize,
- event: Option<HighlightEvent>,
- ) -> Option<Result<HighlightEvent, Error>> {
- let result;
- if self.byte_offset < offset {
- result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: offset,
- }));
- self.byte_offset = offset;
- self.next_event = event;
- } else {
- result = event.map(Ok);
- }
- self.sort_layers();
- result
- }
-
- fn sort_layers(&mut self) {
- while !self.layers.is_empty() {
- if let Some(sort_key) = self.layers[0].sort_key() {
- let mut i = 0;
- while i + 1 < self.layers.len() {
- if let Some(next_offset) = self.layers[i + 1].sort_key() {
- if next_offset < sort_key {
- i += 1;
- continue;
- }
- } else {
- let layer = self.layers.remove(i + 1);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- break;
- }
- if i > 0 {
- self.layers[0..(i + 1)].rotate_left(1);
- }
- break;
- } else {
- let layer = self.layers.remove(0);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- }
- }
-}
-
-impl<'a> Iterator for HighlightIter<'a> {
- type Item = Result<HighlightEvent, Error>;
-
- fn next(&mut self) -> Option<Self::Item> {
- 'main: loop {
- // If we've already determined the next highlight boundary, just return it.
- if let Some(e) = self.next_event.take() {
- return Some(Ok(e));
- }
-
- // Periodically check for cancellation, returning `Cancelled` error if the
- // cancellation flag was flipped.
- if let Some(cancellation_flag) = self.cancellation_flag {
- self.iter_count += 1;
- if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
- self.iter_count = 0;
- if cancellation_flag.load(Ordering::Relaxed) != 0 {
- return Some(Err(Error::Cancelled));
- }
- }
- }
-
- // If none of the layers have any more highlight boundaries, terminate.
- if self.layers.is_empty() {
- let len = self.source.len_bytes();
- return if self.byte_offset < len {
- let result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: len,
- }));
- self.byte_offset = len;
- result
- } else {
- None
- };
- }
-
- // Get the next capture from whichever layer has the earliest highlight boundary.
- let range;
- let layer = &mut self.layers[0];
- let captures = layer.captures.get_mut();
- if let Some((next_match, capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*capture_index];
- range = next_capture.node.byte_range();
-
- // If any previous highlight ends before this node starts, then before
- // processing this capture, emit the source code up until the end of the
- // previous highlight, and an end event for that highlight.
- if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- if end_byte <= range.start {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- }
- }
- }
- // If there are no more captures, then emit any remaining highlight end events.
- // And if there are none of those, then just advance to the end of the document.
- else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- } else {
- return self.emit_event(self.source.len_bytes(), None);
- };
-
- let (mut match_, capture_index) = captures.next().unwrap();
- let mut capture = match_.captures[capture_index];
-
- // Remove from the local scope stack any local scopes that have already ended.
- while range.start > layer.scope_stack.last().unwrap().range.end {
- layer.scope_stack.pop();
- }
-
- // If this capture is for tracking local variables, then process the
- // local variable info.
- let mut reference_highlight = None;
- let mut definition_highlight = None;
- while match_.pattern_index < layer.config.highlights_pattern_index {
- // If the node represents a local scope, push a new local scope onto
- // the scope stack.
- if Some(capture.index) == layer.config.local_scope_capture_index {
- definition_highlight = None;
- let mut scope = LocalScope {
- inherits: true,
- range: range.clone(),
- local_defs: Vec::new(),
- };
- for prop in layer.config.query.property_settings(match_.pattern_index) {
- if let "local.scope-inherits" = prop.key.as_ref() {
- scope.inherits =
- prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
- }
- }
- layer.scope_stack.push(scope);
- }
- // If the node represents a definition, add a new definition to the
- // local scope at the top of the scope stack.
- else if Some(capture.index) == layer.config.local_def_capture_index {
- reference_highlight = None;
- let scope = layer.scope_stack.last_mut().unwrap();
-
- let mut value_range = 0..0;
- for capture in match_.captures {
- if Some(capture.index) == layer.config.local_def_value_capture_index {
- value_range = capture.node.byte_range();
- }
- }
-
- let name = byte_range_to_str(range.clone(), self.source);
- scope.local_defs.push(LocalDef {
- name,
- value_range,
- highlight: None,
- });
- definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
- }
- // If the node represents a reference, then try to find the corresponding
- // definition in the scope stack.
- else if Some(capture.index) == layer.config.local_ref_capture_index
- && definition_highlight.is_none()
- {
- definition_highlight = None;
- let name = byte_range_to_str(range.clone(), self.source);
- for scope in layer.scope_stack.iter().rev() {
- if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
- if def.name == name && range.start >= def.value_range.end {
- Some(def.highlight)
- } else {
- None
- }
- }) {
- reference_highlight = highlight;
- break;
- }
- if !scope.inherits {
- break;
- }
- }
- }
-
- // Continue processing any additional matches for the same node.
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
-
- // Otherwise, this capture must represent a highlight.
- // If this exact range has already been highlighted by an earlier pattern, or by
- // a different layer, then skip over this one.
- if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
- if range.start == last_start && range.end == last_end && layer.depth < last_depth {
- self.sort_layers();
- continue 'main;
- }
- }
-
- // If the current node was found to be a local variable, then skip over any
- // highlighting patterns that are disabled for local variables.
- if definition_highlight.is_some() || reference_highlight.is_some() {
- while layer.config.non_local_variable_patterns[match_.pattern_index] {
- match_.remove();
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
- }
-
- // Once a highlighting pattern is found for the current node, skip over
- // any later highlighting patterns that also match this node. Captures
- // for a given node are ordered by pattern index, so these subsequent
- // captures are guaranteed to be for highlighting, not injections or
- // local variables.
- while let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- captures.next();
- } else {
- break;
- }
- }
-
- let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
-
- // If this node represents a local definition, then store the current
- // highlight value on the local scope entry representing this node.
- if let Some(definition_highlight) = definition_highlight {
- *definition_highlight = current_highlight;
- }
-
- // Emit a scope start event and push the node's end position to the stack.
- if let Some(highlight) = reference_highlight.or(current_highlight) {
- self.last_highlight_range = Some((range.start, range.end, layer.depth));
- layer.highlight_end_stack.push(range.end);
- return self
- .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
- }
-
- self.sort_layers();
- }
- }
-}
-
-#[derive(Debug, Clone)]
-pub enum InjectionLanguageMarker<'a> {
- Name(Cow<'a, str>),
- Filename(Cow<'a, Path>),
- Shebang(String),
-}
-
-const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
-
-pub struct Merge<I> {
- iter: I,
- spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
-
- next_event: Option<HighlightEvent>,
- next_span: Option<(usize, std::ops::Range<usize>)>,
-
- queue: Vec<HighlightEvent>,
-}
-
-/// Merge a list of spans into the highlight event stream.
-pub fn merge<I: Iterator<Item = HighlightEvent>>(
- iter: I,
- spans: Vec<(usize, std::ops::Range<usize>)>,
-) -> Merge<I> {
- let spans = Box::new(spans.into_iter());
- let mut merge = Merge {
- iter,
- spans,
- next_event: None,
- next_span: None,
- queue: Vec::new(),
- };
- merge.next_event = merge.iter.next();
- merge.next_span = merge.spans.next();
- merge
-}
-
-impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
- type Item = HighlightEvent;
- fn next(&mut self) -> Option<Self::Item> {
- use HighlightEvent::*;
- if let Some(event) = self.queue.pop() {
- return Some(event);
- }
-
- loop {
- match (self.next_event, &self.next_span) {
- // this happens when range is partially or fully offscreen
- (Some(Source { start, .. }), Some((span, range))) if start > range.start => {
- if start > range.end {
- self.next_span = self.spans.next();
- } else {
- self.next_span = Some((*span, start..range.end));
- };
- }
- _ => break,
- }
- }
-
- match (self.next_event, &self.next_span) {
- (Some(HighlightStart(i)), _) => {
- self.next_event = self.iter.next();
- Some(HighlightStart(i))
- }
- (Some(HighlightEnd), _) => {
- self.next_event = self.iter.next();
- Some(HighlightEnd)
- }
- (Some(Source { start, end }), Some((_, range))) if start < range.start => {
- let intersect = range.start.min(end);
- let event = Source {
- start,
- end: intersect,
- };
-
- if end == intersect {
- // the event is complete
- self.next_event = self.iter.next();
- } else {
- // subslice the event
- self.next_event = Some(Source {
- start: intersect,
- end,
- });
- };
-
- Some(event)
- }
- (Some(Source { start, end }), Some((span, range))) if start == range.start => {
- let intersect = range.end.min(end);
- let event = HighlightStart(Highlight(*span));
-
- // enqueue in reverse order
- self.queue.push(HighlightEnd);
- self.queue.push(Source {
- start,
- end: intersect,
- });
-
- if end == intersect {
- // the event is complete
- self.next_event = self.iter.next();
- } else {
- // subslice the event
- self.next_event = Some(Source {
- start: intersect,
- end,
- });
- };
-
- if intersect == range.end {
- self.next_span = self.spans.next();
- } else {
- self.next_span = Some((*span, intersect..range.end));
- }
-
- Some(event)
- }
- (Some(event), None) => {
- self.next_event = self.iter.next();
- Some(event)
- }
- // Can happen if cursor at EOF and/or diagnostic reaches past the end.
- // We need to actually emit events for the cursor-at-EOF situation,
- // even though the range is past the end of the text. This needs to be
- // handled appropriately by the drawing code by not assuming that
- // all `Source` events point to valid indices in the rope.
- (None, Some((span, range))) => {
- let event = HighlightStart(Highlight(*span));
- self.queue.push(HighlightEnd);
- self.queue.push(Source {
- start: range.start,
- end: range.end,
- });
- self.next_span = self.spans.next();
- Some(event)
- }
- (None, None) => None,
- e => unreachable!("{:?}", e),
- }
- }
-}
-
-fn node_is_visible(node: &Node) -> bool {
- node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
-}
-
-pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
- if node.child_count() == 0 {
- if node_is_visible(&node) {
- write!(fmt, "({})", node.kind())
- } else {
- write!(fmt, "\"{}\"", node.kind())
- }
- } else {
- pretty_print_tree_impl(fmt, &mut node.walk(), 0)
- }
-}
-
-fn pretty_print_tree_impl<W: fmt::Write>(
- fmt: &mut W,
- cursor: &mut tree_sitter::TreeCursor,
- depth: usize,
-) -> fmt::Result {
- let node = cursor.node();
- let visible = node_is_visible(&node);
-
- if visible {
- let indentation_columns = depth * 2;
- write!(fmt, "{:indentation_columns$}", "")?;
-
- if let Some(field_name) = cursor.field_name() {
- write!(fmt, "{}: ", field_name)?;
- }
-
- write!(fmt, "({}", node.kind())?;
- }
-
- // Handle children.
- if cursor.goto_first_child() {
- loop {
- if node_is_visible(&cursor.node()) {
- fmt.write_char('\n')?;
- }
-
- pretty_print_tree_impl(fmt, cursor, depth + 1)?;
-
- if !cursor.goto_next_sibling() {
- break;
- }
- }
-
- let moved = cursor.goto_parent();
- // The parent of the first child must exist, and must be `node`.
- debug_assert!(moved);
- debug_assert!(cursor.node() == node);
- }
-
- if visible {
- fmt.write_char(')')?;
- }
-
- Ok(())
-}
-
#[cfg(test)]
mod test {
+ use tree_sitter::QueryCursor;
+
use super::*;
use crate::{Rope, Transaction};
@@ -2736,11 +1049,6 @@ mod test {
"#,
);
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
let language = get_language("rust").unwrap();
let query = Query::new(&language, query_str).unwrap();
@@ -2748,12 +1056,7 @@ mod test {
let mut cursor = QueryCursor::new();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let root = syntax.tree().root_node();
let mut test = |capture, range| {
@@ -2804,12 +1107,6 @@ mod test {
.map(String::from)
.collect();
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
-
let language = get_language("rust").unwrap();
let config = HighlightConfiguration::new(
language,
@@ -2828,12 +1125,7 @@ mod test {
fn main() {}
",
);
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let tree = syntax.tree();
let root = tree.root_node();
assert_eq!(root.kind(), "source_file");
@@ -2916,20 +1208,10 @@ mod test {
) {
let source = Rope::from_str(source);
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
let language = get_language(language_name).unwrap();
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let syntax = Syntax::new(source.slice(..), Arc::new(config), |_| None).unwrap();
let root = syntax
.tree()
diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs
index 56b4d2ba..87a847a1 100644
--- a/helix-core/tests/indent.rs
+++ b/helix-core/tests/indent.rs
@@ -1,4 +1,3 @@
-use arc_swap::ArcSwap;
use helix_core::{
indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle},
syntax::{Configuration, Loader},
@@ -6,7 +5,7 @@ use helix_core::{
};
use helix_stdx::rope::RopeSliceExt;
use ropey::Rope;
-use std::{ops::Range, path::PathBuf, process::Command, sync::Arc};
+use std::{ops::Range, path::PathBuf, process::Command};
#[test]
fn test_treesitter_indent_rust() {
@@ -200,12 +199,7 @@ fn test_treesitter_indent(
let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit);
let highlight_config = language_config.highlight_config(&[]).unwrap();
let text = doc.slice(..);
- let syntax = Syntax::new(
- text,
- highlight_config,
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let syntax = Syntax::new(text, highlight_config, |_| None).unwrap();
let indent_query = language_config.indent_query().unwrap();
for i in 0..doc.len_lines() {
diff --git a/helix-syntax/Cargo.toml b/helix-syntax/Cargo.toml
new file mode 100644
index 00000000..3ba12ddd
--- /dev/null
+++ b/helix-syntax/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "helix-syntax"
+description = "Helix syntax highlighting "
+include = ["src/**/*", "README.md"]
+version.workspace = true
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+rust-version.workspace = true
+categories.workspace = true
+repository.workspace = true
+homepage.workspace = true
+
+[features]
+
+[dependencies]
+helix-stdx = { path = "../helix-stdx" }
+
+ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
+slotmap = "1.0"
+tree-sitter.workspace = true
+once_cell = "1.19"
+arc-swap = "1"
+regex = "1"
+bitflags = "2.4"
+ahash = "0.8.9"
+hashbrown = { version = "0.14.3", features = ["raw"] }
+log = "0.4"
diff --git a/helix-syntax/src/config.rs b/helix-syntax/src/config.rs
new file mode 100644
index 00000000..a983d293
--- /dev/null
+++ b/helix-syntax/src/config.rs
@@ -0,0 +1,331 @@
+use std::path::Path;
+use std::sync::Arc;
+
+use arc_swap::ArcSwap;
+use helix_stdx::rope::{self, RopeSliceExt};
+use once_cell::sync::Lazy;
+use regex::Regex;
+use ropey::RopeSlice;
+use tree_sitter::{Language as Grammar, Node, Query, QueryError, QueryMatch};
+
+use crate::highlighter::Highlight;
+use crate::{byte_range_to_str, IncludedChildren, InjectionLanguageMarker, SHEBANG};
+
+/// Contains the data needed to highlight code written in a particular language.
+///
+/// This struct is immutable and can be shared between threads.
+#[derive(Debug)]
+pub struct HighlightConfiguration {
+ pub language: Grammar,
+ pub query: Query,
+ pub(crate) injections_query: Query,
+ pub(crate) combined_injections_patterns: Vec<usize>,
+ pub(crate) highlights_pattern_index: usize,
+ pub(crate) highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
+ pub(crate) non_local_variable_patterns: Vec<bool>,
+ pub(crate) injection_content_capture_index: Option<u32>,
+ pub(crate) injection_language_capture_index: Option<u32>,
+ pub(crate) injection_filename_capture_index: Option<u32>,
+ pub(crate) injection_shebang_capture_index: Option<u32>,
+ pub(crate) local_scope_capture_index: Option<u32>,
+ pub(crate) local_def_capture_index: Option<u32>,
+ pub(crate) local_def_value_capture_index: Option<u32>,
+ pub(crate) local_ref_capture_index: Option<u32>,
+}
+
+impl HighlightConfiguration {
+ /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
+ /// queries.
+ ///
+ /// # Parameters
+ ///
+ /// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
+ /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
+ /// should be non-empty, otherwise no syntax highlights will be added.
+ /// * `injections_query` - A string containing tree patterns for injecting other languages
+ /// into the document. This can be empty if no injections are desired.
+ /// * `locals_query` - A string containing tree patterns for tracking local variable
+ /// definitions and references. This can be empty if local variable tracking is not needed.
+ ///
+ /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
+ pub fn new(
+ language: Grammar,
+ highlights_query: &str,
+ injection_query: &str,
+ locals_query: &str,
+ ) -> Result<Self, QueryError> {
+ // Concatenate the query strings, keeping track of the start offset of each section.
+ let mut query_source = String::new();
+ query_source.push_str(locals_query);
+ let highlights_query_offset = query_source.len();
+ query_source.push_str(highlights_query);
+
+ // Construct a single query by concatenating the three query strings, but record the
+ // range of pattern indices that belong to each individual string.
+ let query = Query::new(&language, &query_source)?;
+ let mut highlights_pattern_index = 0;
+ for i in 0..(query.pattern_count()) {
+ let pattern_offset = query.start_byte_for_pattern(i);
+ if pattern_offset < highlights_query_offset {
+ highlights_pattern_index += 1;
+ }
+ }
+
+ let injections_query = Query::new(&language, injection_query)?;
+ let combined_injections_patterns = (0..injections_query.pattern_count())
+ .filter(|&i| {
+ injections_query
+ .property_settings(i)
+ .iter()
+ .any(|s| &*s.key == "injection.combined")
+ })
+ .collect();
+
+ // Find all of the highlighting patterns that are disabled for nodes that
+ // have been identified as local variables.
+ let non_local_variable_patterns = (0..query.pattern_count())
+ .map(|i| {
+ query
+ .property_predicates(i)
+ .iter()
+ .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
+ })
+ .collect();
+
+ // Store the numeric ids for all of the special captures.
+ let mut injection_content_capture_index = None;
+ let mut injection_language_capture_index = None;
+ let mut injection_filename_capture_index = None;
+ let mut injection_shebang_capture_index = None;
+ let mut local_def_capture_index = None;
+ let mut local_def_value_capture_index = None;
+ let mut local_ref_capture_index = None;
+ let mut local_scope_capture_index = None;
+ for (i, name) in query.capture_names().iter().enumerate() {
+ let i = Some(i as u32);
+ match *name {
+ "local.definition" => local_def_capture_index = i,
+ "local.definition-value" => local_def_value_capture_index = i,
+ "local.reference" => local_ref_capture_index = i,
+ "local.scope" => local_scope_capture_index = i,
+ _ => {}
+ }
+ }
+
+ for (i, name) in injections_query.capture_names().iter().enumerate() {
+ let i = Some(i as u32);
+ match *name {
+ "injection.content" => injection_content_capture_index = i,
+ "injection.language" => injection_language_capture_index = i,
+ "injection.filename" => injection_filename_capture_index = i,
+ "injection.shebang" => injection_shebang_capture_index = i,
+ _ => {}
+ }
+ }
+
+ let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
+ Ok(Self {
+ language,
+ query,
+ injections_query,
+ combined_injections_patterns,
+ highlights_pattern_index,
+ highlight_indices,
+ non_local_variable_patterns,
+ injection_content_capture_index,
+ injection_language_capture_index,
+ injection_filename_capture_index,
+ injection_shebang_capture_index,
+ local_scope_capture_index,
+ local_def_capture_index,
+ local_def_value_capture_index,
+ local_ref_capture_index,
+ })
+ }
+
+ /// Get a slice containing all of the highlight names used in the configuration.
+ pub fn names(&self) -> &[&str] {
+ self.query.capture_names()
+ }
+
+ /// Set the list of recognized highlight names.
+ ///
+ /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
+ /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
+ /// these queries can choose to recognize highlights with different levels of specificity.
+ /// For example, the string `function.builtin` will match against `function.builtin.constructor`
+ /// but will not match `function.method.builtin` and `function.method`.
+ ///
+ /// When highlighting, results are returned as `Highlight` values, which contain the index
+ /// of the matched highlight this list of highlight names.
+ pub fn configure(&self, recognized_names: &[String]) {
+ let mut capture_parts = Vec::new();
+ let indices: Vec<_> = self
+ .query
+ .capture_names()
+ .iter()
+ .map(move |capture_name| {
+ capture_parts.clear();
+ capture_parts.extend(capture_name.split('.'));
+
+ let mut best_index = None;
+ let mut best_match_len = 0;
+ for (i, recognized_name) in recognized_names.iter().enumerate() {
+ let mut len = 0;
+ let mut matches = true;
+ for (i, part) in recognized_name.split('.').enumerate() {
+ match capture_parts.get(i) {
+ Some(capture_part) if *capture_part == part => len += 1,
+ _ => {
+ matches = false;
+ break;
+ }
+ }
+ }
+ if matches && len > best_match_len {
+ best_index = Some(i);
+ best_match_len = len;
+ }
+ }
+ best_index.map(Highlight)
+ })
+ .collect();
+
+ self.highlight_indices.store(Arc::new(indices));
+ }
+
+ fn injection_pair<'a>(
+ &self,
+ query_match: &QueryMatch<'a, 'a>,
+ source: RopeSlice<'a>,
+ ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
+ let mut injection_capture = None;
+ let mut content_node = None;
+
+ for capture in query_match.captures {
+ let index = Some(capture.index);
+ if index == self.injection_language_capture_index {
+ let name = byte_range_to_str(capture.node.byte_range(), source);
+ injection_capture = Some(InjectionLanguageMarker::Name(name));
+ } else if index == self.injection_filename_capture_index {
+ let name = byte_range_to_str(capture.node.byte_range(), source);
+ let path = Path::new(name.as_ref()).to_path_buf();
+ injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
+ } else if index == self.injection_shebang_capture_index {
+ let node_slice = source.byte_slice(capture.node.byte_range());
+
+ // some languages allow space and newlines before the actual string content
+ // so a shebang could be on either the first or second line
+ let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
+ node_slice.byte_slice(..end)
+ } else {
+ node_slice
+ };
+
+ static SHEBANG_REGEX: Lazy<rope::Regex> =
+ Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
+
+ injection_capture = SHEBANG_REGEX
+ .captures_iter(lines.regex_input())
+ .map(|cap| {
+ let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
+ InjectionLanguageMarker::Shebang(cap.into())
+ })
+ .next()
+ } else if index == self.injection_content_capture_index {
+ content_node = Some(capture.node);
+ }
+ }
+ (injection_capture, content_node)
+ }
+
+ pub(super) fn injection_for_match<'a>(
+ &self,
+ query: &'a Query,
+ query_match: &QueryMatch<'a, 'a>,
+ source: RopeSlice<'a>,
+ ) -> (
+ Option<InjectionLanguageMarker<'a>>,
+ Option<Node<'a>>,
+ IncludedChildren,
+ ) {
+ let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
+
+ let mut included_children = IncludedChildren::default();
+ for prop in query.property_settings(query_match.pattern_index) {
+ match prop.key.as_ref() {
+ // In addition to specifying the language name via the text of a
+ // captured node, it can also be hard-coded via a `#set!` predicate
+ // that sets the injection.language key.
+ "injection.language" if injection_capture.is_none() => {
+ injection_capture = prop
+ .value
+ .as_ref()
+ .map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
+ }
+
+ // By default, injections do not include the *children* of an
+ // `injection.content` node - only the ranges that belong to the
+ // node itself. This can be changed using a `#set!` predicate that
+ // sets the `injection.include-children` key.
+ "injection.include-children" => included_children = IncludedChildren::All,
+
+ // Some queries might only exclude named children but include unnamed
+ // children in their `injection.content` node. This can be enabled using
+ // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
+ "injection.include-unnamed-children" => {
+ included_children = IncludedChildren::Unnamed
+ }
+ _ => {}
+ }
+ }
+
+ (injection_capture, content_node, included_children)
+ }
+ pub fn load_query(
+ &self,
+ language: &str,
+ filename: &str,
+ read_query_text: impl FnMut(&str, &str) -> String,
+ ) -> Result<Option<Query>, QueryError> {
+ let query_text = read_query(language, filename, read_query_text);
+ if query_text.is_empty() {
+ return Ok(None);
+ }
+ Query::new(&self.language, &query_text).map(Some)
+ }
+}
+
+/// reads a query by invoking `read_query_text`, handeles any `inherits` directives
+pub fn read_query(
+ language: &str,
+ filename: &str,
+ mut read_query_text: impl FnMut(&str, &str) -> String,
+) -> String {
+ fn read_query_impl(
+ language: &str,
+ filename: &str,
+ read_query_text: &mut impl FnMut(&str, &str) -> String,
+ ) -> String {
+ static INHERITS_REGEX: Lazy<Regex> =
+ Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
+
+ let query = read_query_text(language, filename);
+
+ // replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
+ INHERITS_REGEX
+ .replace_all(&query, |captures: &regex::Captures| {
+ captures[1]
+ .split(',')
+ .map(|language| {
+ format!(
+ "\n{}\n",
+ read_query_impl(language, filename, &mut *read_query_text)
+ )
+ })
+ .collect::<String>()
+ })
+ .to_string()
+ }
+ read_query_impl(language, filename, &mut read_query_text)
+}
diff --git a/helix-syntax/src/highlighter.rs b/helix-syntax/src/highlighter.rs
new file mode 100644
index 00000000..1b53672f
--- /dev/null
+++ b/helix-syntax/src/highlighter.rs
@@ -0,0 +1,439 @@
+use std::borrow::Cow;
+use std::cell::RefCell;
+use std::sync::atomic::{self, AtomicUsize};
+use std::{fmt, iter, mem, ops};
+
+use ropey::RopeSlice;
+use tree_sitter::{QueryCaptures, QueryCursor, Tree};
+
+use crate::ropey::RopeProvider;
+use crate::{
+ byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
+};
+
+const CANCELLATION_CHECK_INTERVAL: usize = 100;
+
+/// Indicates which highlight should be applied to a region of source code.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Highlight(pub usize);
+
+/// Represents a single step in rendering a syntax-highlighted document.
+#[derive(Copy, Clone, Debug)]
+pub enum HighlightEvent {
+ Source { start: usize, end: usize },
+ HighlightStart(Highlight),
+ HighlightEnd,
+}
+
+#[derive(Debug)]
+struct LocalDef<'a> {
+ name: Cow<'a, str>,
+ value_range: ops::Range<usize>,
+ highlight: Option<Highlight>,
+}
+
+#[derive(Debug)]
+struct LocalScope<'a> {
+ inherits: bool,
+ range: ops::Range<usize>,
+ local_defs: Vec<LocalDef<'a>>,
+}
+
+#[derive(Debug)]
+struct HighlightIter<'a> {
+ source: RopeSlice<'a>,
+ byte_offset: usize,
+ cancellation_flag: Option<&'a AtomicUsize>,
+ layers: Vec<HighlightIterLayer<'a>>,
+ iter_count: usize,
+ next_event: Option<HighlightEvent>,
+ last_highlight_range: Option<(usize, usize, u32)>,
+}
+
+struct HighlightIterLayer<'a> {
+ _tree: Option<Tree>,
+ cursor: QueryCursor,
+ captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
+ config: &'a HighlightConfiguration,
+ highlight_end_stack: Vec<usize>,
+ scope_stack: Vec<LocalScope<'a>>,
+ depth: u32,
+}
+
+impl<'a> fmt::Debug for HighlightIterLayer<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("HighlightIterLayer").finish()
+ }
+}
+
+impl<'a> HighlightIterLayer<'a> {
+ // First, sort scope boundaries by their byte offset in the document. At a
+ // given position, emit scope endings before scope beginnings. Finally, emit
+ // scope boundaries from deeper layers first.
+ fn sort_key(&self) -> Option<(usize, bool, isize)> {
+ let depth = -(self.depth as isize);
+ let next_start = self
+ .captures
+ .borrow_mut()
+ .peek()
+ .map(|(m, i)| m.captures[*i].node.start_byte());
+ let next_end = self.highlight_end_stack.last().cloned();
+ match (next_start, next_end) {
+ (Some(start), Some(end)) => {
+ if start < end {
+ Some((start, true, depth))
+ } else {
+ Some((end, false, depth))
+ }
+ }
+ (Some(i), None) => Some((i, true, depth)),
+ (None, Some(j)) => Some((j, false, depth)),
+ _ => None,
+ }
+ }
+}
+
+impl<'a> HighlightIter<'a> {
+ fn emit_event(
+ &mut self,
+ offset: usize,
+ event: Option<HighlightEvent>,
+ ) -> Option<Result<HighlightEvent, Error>> {
+ let result;
+ if self.byte_offset < offset {
+ result = Some(Ok(HighlightEvent::Source {
+ start: self.byte_offset,
+ end: offset,
+ }));
+ self.byte_offset = offset;
+ self.next_event = event;
+ } else {
+ result = event.map(Ok);
+ }
+ self.sort_layers();
+ result
+ }
+
+ fn sort_layers(&mut self) {
+ while !self.layers.is_empty() {
+ if let Some(sort_key) = self.layers[0].sort_key() {
+ let mut i = 0;
+ while i + 1 < self.layers.len() {
+ if let Some(next_offset) = self.layers[i + 1].sort_key() {
+ if next_offset < sort_key {
+ i += 1;
+ continue;
+ }
+ } else {
+ let layer = self.layers.remove(i + 1);
+ PARSER.with(|ts_parser| {
+ let highlighter = &mut ts_parser.borrow_mut();
+ highlighter.cursors.push(layer.cursor);
+ });
+ }
+ break;
+ }
+ if i > 0 {
+ self.layers[0..(i + 1)].rotate_left(1);
+ }
+ break;
+ } else {
+ let layer = self.layers.remove(0);
+ PARSER.with(|ts_parser| {
+ let highlighter = &mut ts_parser.borrow_mut();
+ highlighter.cursors.push(layer.cursor);
+ });
+ }
+ }
+ }
+}
+
+impl<'a> Iterator for HighlightIter<'a> {
+ type Item = Result<HighlightEvent, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ 'main: loop {
+ // If we've already determined the next highlight boundary, just return it.
+ if let Some(e) = self.next_event.take() {
+ return Some(Ok(e));
+ }
+
+ // Periodically check for cancellation, returning `Cancelled` error if the
+ // cancellation flag was flipped.
+ if let Some(cancellation_flag) = self.cancellation_flag {
+ self.iter_count += 1;
+ if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
+ self.iter_count = 0;
+ if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
+ return Some(Err(Error::Cancelled));
+ }
+ }
+ }
+
+ // If none of the layers have any more highlight boundaries, terminate.
+ if self.layers.is_empty() {
+ let len = self.source.len_bytes();
+ return if self.byte_offset < len {
+ let result = Some(Ok(HighlightEvent::Source {
+ start: self.byte_offset,
+ end: len,
+ }));
+ self.byte_offset = len;
+ result
+ } else {
+ None
+ };
+ }
+
+ // Get the next capture from whichever layer has the earliest highlight boundary.
+ let range;
+ let layer = &mut self.layers[0];
+ let captures = layer.captures.get_mut();
+ if let Some((next_match, capture_index)) = captures.peek() {
+ let next_capture = next_match.captures[*capture_index];
+ range = next_capture.node.byte_range();
+
+ // If any previous highlight ends before this node starts, then before
+ // processing this capture, emit the source code up until the end of the
+ // previous highlight, and an end event for that highlight.
+ if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+ if end_byte <= range.start {
+ layer.highlight_end_stack.pop();
+ return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+ }
+ }
+ }
+ // If there are no more captures, then emit any remaining highlight end events.
+ // And if there are none of those, then just advance to the end of the document.
+ else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
+ layer.highlight_end_stack.pop();
+ return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
+ } else {
+ return self.emit_event(self.source.len_bytes(), None);
+ };
+
+ let (mut match_, capture_index) = captures.next().unwrap();
+ let mut capture = match_.captures[capture_index];
+
+ // Remove from the local scope stack any local scopes that have already ended.
+ while range.start > layer.scope_stack.last().unwrap().range.end {
+ layer.scope_stack.pop();
+ }
+
+ // If this capture is for tracking local variables, then process the
+ // local variable info.
+ let mut reference_highlight = None;
+ let mut definition_highlight = None;
+ while match_.pattern_index < layer.config.highlights_pattern_index {
+ // If the node represents a local scope, push a new local scope onto
+ // the scope stack.
+ if Some(capture.index) == layer.config.local_scope_capture_index {
+ definition_highlight = None;
+ let mut scope = LocalScope {
+ inherits: true,
+ range: range.clone(),
+ local_defs: Vec::new(),
+ };
+ for prop in layer.config.query.property_settings(match_.pattern_index) {
+ if let "local.scope-inherits" = prop.key.as_ref() {
+ scope.inherits =
+ prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
+ }
+ }
+ layer.scope_stack.push(scope);
+ }
+ // If the node represents a definition, add a new definition to the
+ // local scope at the top of the scope stack.
+ else if Some(capture.index) == layer.config.local_def_capture_index {
+ reference_highlight = None;
+ let scope = layer.scope_stack.last_mut().unwrap();
+
+ let mut value_range = 0..0;
+ for capture in match_.captures {
+ if Some(capture.index) == layer.config.local_def_value_capture_index {
+ value_range = capture.node.byte_range();
+ }
+ }
+
+ let name = byte_range_to_str(range.clone(), self.source);
+ scope.local_defs.push(LocalDef {
+ name,
+ value_range,
+ highlight: None,
+ });
+ definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
+ }
+ // If the node represents a reference, then try to find the corresponding
+ // definition in the scope stack.
+ else if Some(capture.index) == layer.config.local_ref_capture_index
+ && definition_highlight.is_none()
+ {
+ definition_highlight = None;
+ let name = byte_range_to_str(range.clone(), self.source);
+ for scope in layer.scope_stack.iter().rev() {
+ if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
+ if def.name == name && range.start >= def.value_range.end {
+ Some(def.highlight)
+ } else {
+ None
+ }
+ }) {
+ reference_highlight = highlight;
+ break;
+ }
+ if !scope.inherits {
+ break;
+ }
+ }
+ }
+
+ // Continue processing any additional matches for the same node.
+ if let Some((next_match, next_capture_index)) = captures.peek() {
+ let next_capture = next_match.captures[*next_capture_index];
+ if next_capture.node == capture.node {
+ capture = next_capture;
+ match_ = captures.next().unwrap().0;
+ continue;
+ }
+ }
+
+ self.sort_layers();
+ continue 'main;
+ }
+
+ // Otherwise, this capture must represent a highlight.
+ // If this exact range has already been highlighted by an earlier pattern, or by
+ // a different layer, then skip over this one.
+ if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
+ if range.start == last_start && range.end == last_end && layer.depth < last_depth {
+ self.sort_layers();
+ continue 'main;
+ }
+ }
+
+ // If the current node was found to be a local variable, then skip over any
+ // highlighting patterns that are disabled for local variables.
+ if definition_highlight.is_some() || reference_highlight.is_some() {
+ while layer.config.non_local_variable_patterns[match_.pattern_index] {
+ match_.remove();
+ if let Some((next_match, next_capture_index)) = captures.peek() {
+ let next_capture = next_match.captures[*next_capture_index];
+ if next_capture.node == capture.node {
+ capture = next_capture;
+ match_ = captures.next().unwrap().0;
+ continue;
+ }
+ }
+
+ self.sort_layers();
+ continue 'main;
+ }
+ }
+
+ // Once a highlighting pattern is found for the current node, skip over
+ // any later highlighting patterns that also match this node. Captures
+ // for a given node are ordered by pattern index, so these subsequent
+ // captures are guaranteed to be for highlighting, not injections or
+ // local variables.
+ while let Some((next_match, next_capture_index)) = captures.peek() {
+ let next_capture = next_match.captures[*next_capture_index];
+ if next_capture.node == capture.node {
+ captures.next();
+ } else {
+ break;
+ }
+ }
+
+ let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
+
+ // If this node represents a local definition, then store the current
+ // highlight value on the local scope entry representing this node.
+ if let Some(definition_highlight) = definition_highlight {
+ *definition_highlight = current_highlight;
+ }
+
+ // Emit a scope start event and push the node's end position to the stack.
+ if let Some(highlight) = reference_highlight.or(current_highlight) {
+ self.last_highlight_range = Some((range.start, range.end, layer.depth));
+ layer.highlight_end_stack.push(range.end);
+ return self
+ .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
+ }
+
+ self.sort_layers();
+ }
+ }
+}
+
+impl Syntax {
+ /// Iterate over the highlighted regions for a given slice of source code.
+ pub fn highlight_iter<'a>(
+ &'a self,
+ source: RopeSlice<'a>,
+ range: Option<std::ops::Range<usize>>,
+ cancellation_flag: Option<&'a AtomicUsize>,
+ ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
+ let mut layers = self
+ .layers
+ .iter()
+ .filter_map(|(_, layer)| {
+ // TODO: if range doesn't overlap layer range, skip it
+
+ // Reuse a cursor from the pool if available.
+ let mut cursor = PARSER.with(|ts_parser| {
+ let highlighter = &mut ts_parser.borrow_mut();
+ highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
+ });
+
+ // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
+ // prevents them from being moved. But both of these values are really just
+ // pointers, so it's actually ok to move them.
+ let cursor_ref =
+ unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
+
+ // if reusing cursors & no range this resets to whole range
+ cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
+ cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+
+ let mut captures = cursor_ref
+ .captures(
+ &layer.config.query,
+ layer.tree().root_node(),
+ RopeProvider(source),
+ )
+ .peekable();
+
+ // If there's no captures, skip the layer
+ captures.peek()?;
+
+ Some(HighlightIterLayer {
+ highlight_end_stack: Vec::new(),
+ scope_stack: vec![LocalScope {
+ inherits: false,
+ range: 0..usize::MAX,
+ local_defs: Vec::new(),
+ }],
+ cursor,
+ _tree: None,
+ captures: RefCell::new(captures),
+ config: layer.config.as_ref(), // TODO: just reuse `layer`
+ depth: layer.depth, // TODO: just reuse `layer`
+ })
+ })
+ .collect::<Vec<_>>();
+
+ layers.sort_unstable_by_key(|layer| layer.sort_key());
+
+ let mut result = HighlightIter {
+ source,
+ byte_offset: range.map_or(0, |r| r.start),
+ cancellation_flag,
+ iter_count: 0,
+ layers,
+ next_event: None,
+ last_highlight_range: None,
+ };
+ result.sort_layers();
+ result
+ }
+}
diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
new file mode 100644
index 00000000..04a2d27d
--- /dev/null
+++ b/helix-syntax/src/lib.rs
@@ -0,0 +1,342 @@
+use ::ropey::RopeSlice;
+use slotmap::{DefaultKey as LayerId, HopSlotMap};
+use tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};
+
+use std::borrow::Cow;
+use std::cell::RefCell;
+use std::hash::{Hash, Hasher};
+use std::path::Path;
+use std::str;
+use std::sync::Arc;
+
+use crate::parse::LayerUpdateFlags;
+
+pub use crate::config::{read_query, HighlightConfiguration};
+pub use crate::ropey::RopeProvider;
+pub use merge::merge;
+pub use pretty_print::pretty_print_tree;
+pub use tree_cursor::TreeCursor;
+
+mod config;
+pub mod highlighter;
+mod merge;
+mod parse;
+mod pretty_print;
+mod ropey;
+mod tree_cursor;
+
+#[derive(Debug)]
+pub struct Syntax {
+ layers: HopSlotMap<LayerId, LanguageLayer>,
+ root: LayerId,
+}
+
+impl Syntax {
+ pub fn new(
+ source: RopeSlice,
+ config: Arc<HighlightConfiguration>,
+ injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
+ ) -> Option<Self> {
+ let root_layer = LanguageLayer {
+ tree: None,
+ config,
+ depth: 0,
+ flags: LayerUpdateFlags::empty(),
+ ranges: vec![Range {
+ start_byte: 0,
+ end_byte: usize::MAX,
+ start_point: Point::new(0, 0),
+ end_point: Point::new(usize::MAX, usize::MAX),
+ }],
+ parent: None,
+ };
+
+ // track scope_descriptor: a Vec of scopes for item in tree
+
+ let mut layers = HopSlotMap::default();
+ let root = layers.insert(root_layer);
+
+ let mut syntax = Self { root, layers };
+
+ let res = syntax.update(source, Vec::new(), injection_callback);
+
+ if res.is_err() {
+ log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
+ return None;
+ }
+ Some(syntax)
+ }
+
+ pub fn tree(&self) -> &Tree {
+ self.layers[self.root].tree()
+ }
+
+ pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
+ let mut container_id = self.root;
+
+ for (layer_id, layer) in self.layers.iter() {
+ if layer.depth > self.layers[container_id].depth
+ && layer.contains_byte_range(start, end)
+ {
+ container_id = layer_id;
+ }
+ }
+
+ self.layers[container_id].tree()
+ }
+
+ pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
+ self.tree_for_byte_range(start, end)
+ .root_node()
+ .named_descendant_for_byte_range(start, end)
+ }
+
+ pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
+ self.tree_for_byte_range(start, end)
+ .root_node()
+ .descendant_for_byte_range(start, end)
+ }
+
+ pub fn walk(&self) -> TreeCursor<'_> {
+ TreeCursor::new(&self.layers, self.root)
+ }
+}
+
+#[derive(Debug)]
+pub struct LanguageLayer {
+ // mode
+ // grammar
+ pub config: Arc<HighlightConfiguration>,
+ pub(crate) tree: Option<Tree>,
+ pub ranges: Vec<Range>,
+ pub depth: u32,
+ flags: LayerUpdateFlags,
+ parent: Option<LayerId>,
+}
+
+/// This PartialEq implementation only checks if that
+/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
+/// It does not check whether the layers have the same internal treesitter
+/// state.
+impl PartialEq for LanguageLayer {
+ fn eq(&self, other: &Self) -> bool {
+ self.depth == other.depth
+ && self.config.language == other.config.language
+ && self.ranges == other.ranges
+ }
+}
+
+/// Hash implementation belongs to PartialEq implementation above.
+/// See its documentation for details.
+impl Hash for LanguageLayer {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.depth.hash(state);
+ self.config.language.hash(state);
+ self.ranges.hash(state);
+ }
+}
+
+impl LanguageLayer {
+ pub fn tree(&self) -> &Tree {
+ // TODO: no unwrap
+ self.tree.as_ref().unwrap()
+ }
+
+ /// Whether the layer contains the given byte range.
+ ///
+ /// If the layer has multiple ranges (i.e. combined injections), the
+ /// given range is considered contained if it is within the start and
+ /// end bytes of the first and last ranges **and** if the given range
+ /// starts or ends within any of the layer's ranges.
+ fn contains_byte_range(&self, start: usize, end: usize) -> bool {
+ let layer_start = self
+ .ranges
+ .first()
+ .expect("ranges should not be empty")
+ .start_byte;
+ let layer_end = self
+ .ranges
+ .last()
+ .expect("ranges should not be empty")
+ .end_byte;
+
+ layer_start <= start
+ && layer_end >= end
+ && self.ranges.iter().any(|range| {
+ let byte_range = range.start_byte..range.end_byte;
+ byte_range.contains(&start) || byte_range.contains(&end)
+ })
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum InjectionLanguageMarker<'a> {
+ Name(Cow<'a, str>),
+ Filename(Cow<'a, Path>),
+ Shebang(String),
+}
+
+const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
+
+#[derive(Debug)]
+pub enum CapturedNode<'a> {
+ Single(Node<'a>),
+ /// Guaranteed to be not empty
+ Grouped(Vec<Node<'a>>),
+}
+
+impl<'a> CapturedNode<'a> {
+ pub fn start_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.start_byte(),
+ Self::Grouped(ns) => ns[0].start_byte(),
+ }
+ }
+
+ pub fn end_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.end_byte(),
+ Self::Grouped(ns) => ns.last().unwrap().end_byte(),
+ }
+ }
+
+ pub fn byte_range(&self) -> std::ops::Range<usize> {
+ self.start_byte()..self.end_byte()
+ }
+}
+
+/// The maximum number of in-progress matches a TS cursor can consider at once.
+/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
+/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
+///
+///
+/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
+/// However, this causes performance issues for medium to large files.
+/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
+///
+///
+/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
+/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
+/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
+///
+///
+/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
+/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
+/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
+const TREE_SITTER_MATCH_LIMIT: u32 = 256;
+
+#[derive(Debug)]
+pub struct TextObjectQuery {
+ pub query: Query,
+}
+
+impl TextObjectQuery {
+ /// Run the query on the given node and return sub nodes which match given
+ /// capture ("function.inside", "class.around", etc).
+ ///
+ /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
+ /// and support for this is partial and could use improvement.
+ ///
+ /// ```query
+ /// (comment)+ @capture
+ ///
+ /// ; OR
+ /// (
+ /// (comment)*
+ /// .
+ /// (function)
+ /// ) @capture
+ /// ```
+ pub fn capture_nodes<'a>(
+ &'a self,
+ capture_name: &str,
+ node: Node<'a>,
+ slice: RopeSlice<'a>,
+ cursor: &'a mut QueryCursor,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ self.capture_nodes_any(&[capture_name], node, slice, cursor)
+ }
+
+ /// Find the first capture that exists out of all given `capture_names`
+ /// and return sub nodes that match this capture.
+ pub fn capture_nodes_any<'a>(
+ &'a self,
+ capture_names: &[&str],
+ node: Node<'a>,
+ slice: RopeSlice<'a>,
+ cursor: &'a mut QueryCursor,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ let capture_idx = capture_names
+ .iter()
+ .find_map(|cap| self.query.capture_index_for_name(cap))?;
+
+ cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+
+ let nodes = cursor
+ .captures(&self.query, node, RopeProvider(slice))
+ .filter_map(move |(mat, _)| {
+ let nodes: Vec<_> = mat
+ .captures
+ .iter()
+ .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
+ .collect();
+
+ if nodes.len() > 1 {
+ Some(CapturedNode::Grouped(nodes))
+ } else {
+ nodes.into_iter().map(CapturedNode::Single).next()
+ }
+ });
+
+ Some(nodes)
+ }
+}
+
+/// Represents the reason why syntax highlighting failed.
+#[derive(Debug, PartialEq, Eq)]
+pub enum Error {
+ Cancelled,
+ InvalidLanguage,
+ InvalidRanges,
+ Unknown,
+}
+
+#[derive(Clone)]
+enum IncludedChildren {
+ None,
+ All,
+ Unnamed,
+}
+
+impl Default for IncludedChildren {
+ fn default() -> Self {
+ Self::None
+ }
+}
+
+fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
+ Cow::from(source.byte_slice(range))
+}
+
+struct TsParser {
+ parser: tree_sitter::Parser,
+ pub cursors: Vec<QueryCursor>,
+}
+
+// could also just use a pool, or a single instance?
+thread_local! {
+ static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
+ parser: Parser::new(),
+ cursors: Vec::new(),
+ })
+}
+
+pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
+ PARSER.with(|parser| {
+ let mut parser = parser.borrow_mut();
+ let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+ let res = f(&mut cursor);
+ parser.cursors.push(cursor);
+ res
+ })
+}
diff --git a/helix-syntax/src/merge.rs b/helix-syntax/src/merge.rs
new file mode 100644
index 00000000..2b9063a0
--- /dev/null
+++ b/helix-syntax/src/merge.rs
@@ -0,0 +1,135 @@
+use crate::highlighter::{Highlight, HighlightEvent};
+
+pub struct Merge<I> {
+ iter: I,
+ spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
+
+ next_event: Option<HighlightEvent>,
+ next_span: Option<(usize, std::ops::Range<usize>)>,
+
+ queue: Vec<HighlightEvent>,
+}
+
+/// Merge a list of spans into the highlight event stream.
+pub fn merge<I: Iterator<Item = HighlightEvent>>(
+ iter: I,
+ spans: Vec<(usize, std::ops::Range<usize>)>,
+) -> Merge<I> {
+ let spans = Box::new(spans.into_iter());
+ let mut merge = Merge {
+ iter,
+ spans,
+ next_event: None,
+ next_span: None,
+ queue: Vec::new(),
+ };
+ merge.next_event = merge.iter.next();
+ merge.next_span = merge.spans.next();
+ merge
+}
+
+impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
+ type Item = HighlightEvent;
+ fn next(&mut self) -> Option<Self::Item> {
+ use HighlightEvent::*;
+ if let Some(event) = self.queue.pop() {
+ return Some(event);
+ }
+
+ loop {
+ match (self.next_event, &self.next_span) {
+ // this happens when range is partially or fully offscreen
+ (Some(Source { start, .. }), Some((span, range))) if start > range.start => {
+ if start > range.end {
+ self.next_span = self.spans.next();
+ } else {
+ self.next_span = Some((*span, start..range.end));
+ };
+ }
+ _ => break,
+ }
+ }
+
+ match (self.next_event, &self.next_span) {
+ (Some(HighlightStart(i)), _) => {
+ self.next_event = self.iter.next();
+ Some(HighlightStart(i))
+ }
+ (Some(HighlightEnd), _) => {
+ self.next_event = self.iter.next();
+ Some(HighlightEnd)
+ }
+ (Some(Source { start, end }), Some((_, range))) if start < range.start => {
+ let intersect = range.start.min(end);
+ let event = Source {
+ start,
+ end: intersect,
+ };
+
+ if end == intersect {
+ // the event is complete
+ self.next_event = self.iter.next();
+ } else {
+ // subslice the event
+ self.next_event = Some(Source {
+ start: intersect,
+ end,
+ });
+ };
+
+ Some(event)
+ }
+ (Some(Source { start, end }), Some((span, range))) if start == range.start => {
+ let intersect = range.end.min(end);
+ let event = HighlightStart(Highlight(*span));
+
+ // enqueue in reverse order
+ self.queue.push(HighlightEnd);
+ self.queue.push(Source {
+ start,
+ end: intersect,
+ });
+
+ if end == intersect {
+ // the event is complete
+ self.next_event = self.iter.next();
+ } else {
+ // subslice the event
+ self.next_event = Some(Source {
+ start: intersect,
+ end,
+ });
+ };
+
+ if intersect == range.end {
+ self.next_span = self.spans.next();
+ } else {
+ self.next_span = Some((*span, intersect..range.end));
+ }
+
+ Some(event)
+ }
+ (Some(event), None) => {
+ self.next_event = self.iter.next();
+ Some(event)
+ }
+ // Can happen if cursor at EOF and/or diagnostic reaches past the end.
+ // We need to actually emit events for the cursor-at-EOF situation,
+ // even though the range is past the end of the text. This needs to be
+ // handled appropriately by the drawing code by not assuming that
+ // all `Source` events point to valid indices in the rope.
+ (None, Some((span, range))) => {
+ let event = HighlightStart(Highlight(*span));
+ self.queue.push(HighlightEnd);
+ self.queue.push(Source {
+ start: range.start,
+ end: range.end,
+ });
+ self.next_span = self.spans.next();
+ Some(event)
+ }
+ (None, None) => None,
+ e => unreachable!("{:?}", e),
+ }
+ }
+}
diff --git a/helix-syntax/src/parse.rs b/helix-syntax/src/parse.rs
new file mode 100644
index 00000000..de70f2a1
--- /dev/null
+++ b/helix-syntax/src/parse.rs
@@ -0,0 +1,429 @@
+use std::collections::VecDeque;
+use std::mem::replace;
+use std::sync::Arc;
+
+use ahash::RandomState;
+use bitflags::bitflags;
+use hashbrown::raw::RawTable;
+use ropey::RopeSlice;
+use tree_sitter::{Node, Parser, Point, QueryCursor, Range};
+
+use crate::ropey::RopeProvider;
+use crate::{
+ Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer,
+ Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
+};
+
+bitflags! {
+ /// Flags that track the status of a layer
+ /// in the `Sytaxn::update` function
+ #[derive(Debug)]
+ pub(crate) struct LayerUpdateFlags : u32{
+ const MODIFIED = 0b001;
+ const MOVED = 0b010;
+ const TOUCHED = 0b100;
+ }
+}
+
+impl Syntax {
+ pub fn update(
+ &mut self,
+ source: RopeSlice,
+ edits: Vec<tree_sitter::InputEdit>,
+ injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
+ ) -> Result<(), Error> {
+ let mut queue = VecDeque::new();
+ queue.push_back(self.root);
+
+ // This table allows inverse indexing of `layers`.
+ // That is by hashing a `Layer` you can find
+ // the `LayerId` of an existing equivalent `Layer` in `layers`.
+ //
+ // It is used to determine if a new layer exists for an injection
+ // or if an existing layer needs to be updated.
+ let mut layers_table = RawTable::with_capacity(self.layers.len());
+ let layers_hasher = RandomState::new();
+ // Use the edits to update all layers markers
+ fn point_add(a: Point, b: Point) -> Point {
+ if b.row > 0 {
+ Point::new(a.row.saturating_add(b.row), b.column)
+ } else {
+ Point::new(0, a.column.saturating_add(b.column))
+ }
+ }
+ fn point_sub(a: Point, b: Point) -> Point {
+ if a.row > b.row {
+ Point::new(a.row.saturating_sub(b.row), a.column)
+ } else {
+ Point::new(0, a.column.saturating_sub(b.column))
+ }
+ }
+
+ for (layer_id, layer) in self.layers.iter_mut() {
+ // The root layer always covers the whole range (0..usize::MAX)
+ if layer.depth == 0 {
+ layer.flags = LayerUpdateFlags::MODIFIED;
+ continue;
+ }
+
+ if !edits.is_empty() {
+ for range in &mut layer.ranges {
+ // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
+ for edit in edits.iter().rev() {
+ let is_pure_insertion = edit.old_end_byte == edit.start_byte;
+
+ // if edit is after range, skip
+ if edit.start_byte > range.end_byte {
+ // TODO: || (is_noop && edit.start_byte == range.end_byte)
+ continue;
+ }
+
+ // if edit is before range, shift entire range by len
+ if edit.old_end_byte < range.start_byte {
+ range.start_byte =
+ edit.new_end_byte + (range.start_byte - edit.old_end_byte);
+ range.start_point = point_add(
+ edit.new_end_position,
+ point_sub(range.start_point, edit.old_end_position),
+ );
+
+ range.end_byte = edit
+ .new_end_byte
+ .saturating_add(range.end_byte - edit.old_end_byte);
+ range.end_point = point_add(
+ edit.new_end_position,
+ point_sub(range.end_point, edit.old_end_position),
+ );
+
+ layer.flags |= LayerUpdateFlags::MOVED;
+ }
+ // if the edit starts in the space before and extends into the range
+ else if edit.start_byte < range.start_byte {
+ range.start_byte = edit.new_end_byte;
+ range.start_point = edit.new_end_position;
+
+ range.end_byte = range
+ .end_byte
+ .saturating_sub(edit.old_end_byte)
+ .saturating_add(edit.new_end_byte);
+ range.end_point = point_add(
+ edit.new_end_position,
+ point_sub(range.end_point, edit.old_end_position),
+ );
+ layer.flags = LayerUpdateFlags::MODIFIED;
+ }
+ // If the edit is an insertion at the start of the tree, shift
+ else if edit.start_byte == range.start_byte && is_pure_insertion {
+ range.start_byte = edit.new_end_byte;
+ range.start_point = edit.new_end_position;
+ layer.flags |= LayerUpdateFlags::MOVED;
+ } else {
+ range.end_byte = range
+ .end_byte
+ .saturating_sub(edit.old_end_byte)
+ .saturating_add(edit.new_end_byte);
+ range.end_point = point_add(
+ edit.new_end_position,
+ point_sub(range.end_point, edit.old_end_position),
+ );
+ layer.flags = LayerUpdateFlags::MODIFIED;
+ }
+ }
+ }
+ }
+
+ let hash = layers_hasher.hash_one(layer);
+ // Safety: insert_no_grow is unsafe because it assumes that the table
+ // has enough capacity to hold additional elements.
+ // This is always the case as we reserved enough capacity above.
+ unsafe { layers_table.insert_no_grow(hash, layer_id) };
+ }
+
+ PARSER.with(|ts_parser| {
+ let ts_parser = &mut ts_parser.borrow_mut();
+ ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
+ let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
+ // TODO: might need to set cursor range
+ cursor.set_byte_range(0..usize::MAX);
+ cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+
+ let source_slice = source.slice(..);
+
+ while let Some(layer_id) = queue.pop_front() {
+ let layer = &mut self.layers[layer_id];
+
+ // Mark the layer as touched
+ layer.flags |= LayerUpdateFlags::TOUCHED;
+
+ // If a tree already exists, notify it of changes.
+ if let Some(tree) = &mut layer.tree {
+ if layer
+ .flags
+ .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
+ {
+ for edit in edits.iter().rev() {
+ // Apply the edits in reverse.
+ // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
+ tree.edit(edit);
+ }
+ }
+
+ if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
+ // Re-parse the tree.
+ layer.parse(&mut ts_parser.parser, source)?;
+ }
+ } else {
+ // always parse if this layer has never been parsed before
+ layer.parse(&mut ts_parser.parser, source)?;
+ }
+
+ // Switch to an immutable borrow.
+ let layer = &self.layers[layer_id];
+
+ // Process injections.
+ let matches = cursor.matches(
+ &layer.config.injections_query,
+ layer.tree().root_node(),
+ RopeProvider(source_slice),
+ );
+ let mut combined_injections = vec![
+ (None, Vec::new(), IncludedChildren::default());
+ layer.config.combined_injections_patterns.len()
+ ];
+ let mut injections = Vec::new();
+ let mut last_injection_end = 0;
+ for mat in matches {
+ let (injection_capture, content_node, included_children) = layer
+ .config
+ .injection_for_match(&layer.config.injections_query, &mat, source_slice);
+
+ // in case this is a combined injection save it for more processing later
+ if let Some(combined_injection_idx) = layer
+ .config
+ .combined_injections_patterns
+ .iter()
+ .position(|&pattern| pattern == mat.pattern_index)
+ {
+ let entry = &mut combined_injections[combined_injection_idx];
+ if injection_capture.is_some() {
+ entry.0 = injection_capture;
+ }
+ if let Some(content_node) = content_node {
+ if content_node.start_byte() >= last_injection_end {
+ entry.1.push(content_node);
+ last_injection_end = content_node.end_byte();
+ }
+ }
+ entry.2 = included_children;
+ continue;
+ }
+
+ // Explicitly remove this match so that none of its other captures will remain
+ // in the stream of captures.
+ mat.remove();
+
+ // If a language is found with the given name, then add a new language layer
+ // to the highlighted document.
+ if let (Some(injection_capture), Some(content_node)) =
+ (injection_capture, content_node)
+ {
+ if let Some(config) = (injection_callback)(&injection_capture) {
+ let ranges =
+ intersect_ranges(&layer.ranges, &[content_node], included_children);
+
+ if !ranges.is_empty() {
+ if content_node.start_byte() < last_injection_end {
+ continue;
+ }
+ last_injection_end = content_node.end_byte();
+ injections.push((config, ranges));
+ }
+ }
+ }
+ }
+
+ for (lang_name, content_nodes, included_children) in combined_injections {
+ if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
+ if let Some(config) = (injection_callback)(&lang_name) {
+ let ranges =
+ intersect_ranges(&layer.ranges, &content_nodes, included_children);
+ if !ranges.is_empty() {
+ injections.push((config, ranges));
+ }
+ }
+ }
+ }
+
+ let depth = layer.depth + 1;
+ // TODO: can't inline this since matches borrows self.layers
+ for (config, ranges) in injections {
+ let parent = Some(layer_id);
+ let new_layer = LanguageLayer {
+ tree: None,
+ config,
+ depth,
+ ranges,
+ flags: LayerUpdateFlags::empty(),
+ parent: None,
+ };
+
+ // Find an identical existing layer
+ let layer = layers_table
+ .get(layers_hasher.hash_one(&new_layer), |&it| {
+ self.layers[it] == new_layer
+ })
+ .copied();
+
+ // ...or insert a new one.
+ let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
+ self.layers[layer_id].parent = parent;
+
+ queue.push_back(layer_id);
+ }
+
+ // TODO: pre-process local scopes at this time, rather than highlight?
+ // would solve problems with locals not working across boundaries
+ }
+
+ // Return the cursor back in the pool.
+ ts_parser.cursors.push(cursor);
+
+ // Reset all `LayerUpdateFlags` and remove all untouched layers
+ self.layers.retain(|_, layer| {
+ replace(&mut layer.flags, LayerUpdateFlags::empty())
+ .contains(LayerUpdateFlags::TOUCHED)
+ });
+
+ Ok(())
+ })
+ }
+}
+
+/// Compute the ranges that should be included when parsing an injection.
+/// This takes into account three things:
+/// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
+/// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
+/// are the ranges of those nodes.
+/// * `includes_children` - For some injections, the content nodes' children should be
+/// excluded from the nested document, so that only the content nodes' *own* content
+/// is reparsed. For other injections, the content nodes' entire ranges should be
+/// reparsed, including the ranges of their children.
+fn intersect_ranges(
+ parent_ranges: &[Range],
+ nodes: &[Node],
+ included_children: IncludedChildren,
+) -> Vec<Range> {
+ let mut cursor = nodes[0].walk();
+ let mut result = Vec::new();
+ let mut parent_range_iter = parent_ranges.iter();
+ let mut parent_range = parent_range_iter
+ .next()
+ .expect("Layers should only be constructed with non-empty ranges vectors");
+ for node in nodes.iter() {
+ let mut preceding_range = Range {
+ start_byte: 0,
+ start_point: Point::new(0, 0),
+ end_byte: node.start_byte(),
+ end_point: node.start_position(),
+ };
+ let following_range = Range {
+ start_byte: node.end_byte(),
+ start_point: node.end_position(),
+ end_byte: usize::MAX,
+ end_point: Point::new(usize::MAX, usize::MAX),
+ };
+
+ for excluded_range in node
+ .children(&mut cursor)
+ .filter_map(|child| match included_children {
+ IncludedChildren::None => Some(child.range()),
+ IncludedChildren::All => None,
+ IncludedChildren::Unnamed => {
+ if child.is_named() {
+ Some(child.range())
+ } else {
+ None
+ }
+ }
+ })
+ .chain([following_range].iter().cloned())
+ {
+ let mut range = Range {
+ start_byte: preceding_range.end_byte,
+ start_point: preceding_range.end_point,
+ end_byte: excluded_range.start_byte,
+ end_point: excluded_range.start_point,
+ };
+ preceding_range = excluded_range;
+
+ if range.end_byte < parent_range.start_byte {
+ continue;
+ }
+
+ while parent_range.start_byte <= range.end_byte {
+ if parent_range.end_byte > range.start_byte {
+ if range.start_byte < parent_range.start_byte {
+ range.start_byte = parent_range.start_byte;
+ range.start_point = parent_range.start_point;
+ }
+
+ if parent_range.end_byte < range.end_byte {
+ if range.start_byte < parent_range.end_byte {
+ result.push(Range {
+ start_byte: range.start_byte,
+ start_point: range.start_point,
+ end_byte: parent_range.end_byte,
+ end_point: parent_range.end_point,
+ });
+ }
+ range.start_byte = parent_range.end_byte;
+ range.start_point = parent_range.end_point;
+ } else {
+ if range.start_byte < range.end_byte {
+ result.push(range);
+ }
+ break;
+ }
+ }
+
+ if let Some(next_range) = parent_range_iter.next() {
+ parent_range = next_range;
+ } else {
+ return result;
+ }
+ }
+ }
+ }
+ result
+}
+
+impl LanguageLayer {
+ fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
+ parser
+ .set_included_ranges(&self.ranges)
+ .map_err(|_| Error::InvalidRanges)?;
+
+ parser
+ .set_language(&self.config.language)
+ .map_err(|_| Error::InvalidLanguage)?;
+
+ // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
+ let tree = parser
+ .parse_with(
+ &mut |byte, _| {
+ if byte <= source.len_bytes() {
+ let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
+ &chunk.as_bytes()[byte - start_byte..]
+ } else {
+ // out of range
+ &[]
+ }
+ },
+ self.tree.as_ref(),
+ )
+ .ok_or(Error::Cancelled)?;
+ // unsafe { ts_parser.parser.set_cancellation_flag(None) };
+ self.tree = Some(tree);
+ Ok(())
+ }
+}
diff --git a/helix-syntax/src/pretty_print.rs b/helix-syntax/src/pretty_print.rs
new file mode 100644
index 00000000..bd108229
--- /dev/null
+++ b/helix-syntax/src/pretty_print.rs
@@ -0,0 +1,65 @@
+use std::fmt;
+
+use tree_sitter::{Node, TreeCursor};
+
+pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
+ if node.child_count() == 0 {
+ if node_is_visible(&node) {
+ write!(fmt, "({})", node.kind())
+ } else {
+ write!(fmt, "\"{}\"", node.kind())
+ }
+ } else {
+ pretty_print_tree_impl(fmt, &mut node.walk(), 0)
+ }
+}
+
+fn pretty_print_tree_impl<W: fmt::Write>(
+ fmt: &mut W,
+ cursor: &mut TreeCursor,
+ depth: usize,
+) -> fmt::Result {
+ let node = cursor.node();
+ let visible = node_is_visible(&node);
+
+ if visible {
+ let indentation_columns = depth * 2;
+ write!(fmt, "{:indentation_columns$}", "")?;
+
+ if let Some(field_name) = cursor.field_name() {
+ write!(fmt, "{}: ", field_name)?;
+ }
+
+ write!(fmt, "({}", node.kind())?;
+ }
+
+ // Handle children.
+ if cursor.goto_first_child() {
+ loop {
+ if node_is_visible(&cursor.node()) {
+ fmt.write_char('\n')?;
+ }
+
+ pretty_print_tree_impl(fmt, cursor, depth + 1)?;
+
+ if !cursor.goto_next_sibling() {
+ break;
+ }
+ }
+
+ let moved = cursor.goto_parent();
+ // The parent of the first child must exist, and must be `node`.
+ debug_assert!(moved);
+ debug_assert!(cursor.node() == node);
+ }
+
+ if visible {
+ fmt.write_char(')')?;
+ }
+
+ Ok(())
+}
+
+fn node_is_visible(node: &Node) -> bool {
+ node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
+}
diff --git a/helix-syntax/src/ropey.rs b/helix-syntax/src/ropey.rs
new file mode 100644
index 00000000..650fcfb9
--- /dev/null
+++ b/helix-syntax/src/ropey.rs
@@ -0,0 +1,29 @@
+// glue code for using TS with ropey, this should be put behind a feature flag
+// in the future (and potentially be partially removed)
+
+use ropey::RopeSlice;
+use tree_sitter::{Node, TextProvider};
+
+// Adapter to convert rope chunks to bytes
+pub struct ChunksBytes<'a> {
+ chunks: ropey::iter::Chunks<'a>,
+}
+impl<'a> Iterator for ChunksBytes<'a> {
+ type Item = &'a [u8];
+ fn next(&mut self) -> Option<Self::Item> {
+ self.chunks.next().map(str::as_bytes)
+ }
+}
+
+pub struct RopeProvider<'a>(pub RopeSlice<'a>);
+
+impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
+ type I = ChunksBytes<'a>;
+
+ fn text(&mut self, node: Node) -> Self::I {
+ let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
+ ChunksBytes {
+ chunks: fragment.chunks(),
+ }
+ }
+}
diff --git a/helix-syntax/src/tree_cursor.rs b/helix-syntax/src/tree_cursor.rs
new file mode 100644
index 00000000..692d5890
--- /dev/null
+++ b/helix-syntax/src/tree_cursor.rs
@@ -0,0 +1,264 @@
+use std::{cmp::Reverse, ops::Range};
+
+use super::{LanguageLayer, LayerId};
+
+use slotmap::HopSlotMap;
+use tree_sitter::Node;
+
+/// The byte range of an injection layer.
+///
+/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
+/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
+/// contains a point with maximum depth.
+#[derive(Debug)]
+struct InjectionRange {
+ start: usize,
+ end: usize,
+ layer_id: LayerId,
+ depth: u32,
+}
+
+pub struct TreeCursor<'a> {
+ layers: &'a HopSlotMap<LayerId, LanguageLayer>,
+ root: LayerId,
+ current: LayerId,
+ injection_ranges: Vec<InjectionRange>,
+ // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
+ // that returns very surprising results in testing.
+ cursor: Node<'a>,
+}
+
+impl<'a> TreeCursor<'a> {
+ pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
+ let mut injection_ranges = Vec::new();
+
+ for (layer_id, layer) in layers.iter() {
+ // Skip the root layer
+ if layer.parent.is_none() {
+ continue;
+ }
+ for byte_range in layer.ranges.iter() {
+ let range = InjectionRange {
+ start: byte_range.start_byte,
+ end: byte_range.end_byte,
+ layer_id,
+ depth: layer.depth,
+ };
+ injection_ranges.push(range);
+ }
+ }
+
+ injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));
+
+ let cursor = layers[root].tree().root_node();
+
+ Self {
+ layers,
+ root,
+ current: root,
+ injection_ranges,
+ cursor,
+ }
+ }
+
+ pub fn node(&self) -> Node<'a> {
+ self.cursor
+ }
+
+ pub fn goto_parent(&mut self) -> bool {
+ if let Some(parent) = self.node().parent() {
+ self.cursor = parent;
+ return true;
+ }
+
+ // If we are already on the root layer, we cannot ascend.
+ if self.current == self.root {
+ return false;
+ }
+
+ // Ascend to the parent layer.
+ let range = self.node().byte_range();
+ let parent_id = self.layers[self.current]
+ .parent
+ .expect("non-root layers have a parent");
+ self.current = parent_id;
+ let root = self.layers[self.current].tree().root_node();
+ self.cursor = root
+ .descendant_for_byte_range(range.start, range.end)
+ .unwrap_or(root);
+
+ true
+ }
+
+ pub fn goto_parent_with<P>(&mut self, predicate: P) -> bool
+ where
+ P: Fn(&Node) -> bool,
+ {
+ while self.goto_parent() {
+ if predicate(&self.node()) {
+ return true;
+ }
+ }
+
+ false
+ }
+
+ /// Finds the injection layer that has exactly the same range as the given `range`.
+ fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
+ let start_idx = self
+ .injection_ranges
+ .partition_point(|range| range.end < search_range.end);
+
+ self.injection_ranges[start_idx..]
+ .iter()
+ .take_while(|range| range.end == search_range.end)
+ .find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
+ }
+
+ fn goto_first_child_impl(&mut self, named: bool) -> bool {
+ // Check if the current node's range is an exact injection layer range.
+ if let Some(layer_id) = self
+ .layer_id_of_byte_range(self.node().byte_range())
+ .filter(|&layer_id| layer_id != self.current)
+ {
+ // Switch to the child layer.
+ self.current = layer_id;
+ self.cursor = self.layers[self.current].tree().root_node();
+ return true;
+ }
+
+ let child = if named {
+ self.cursor.named_child(0)
+ } else {
+ self.cursor.child(0)
+ };
+
+ if let Some(child) = child {
+ // Otherwise descend in the current tree.
+ self.cursor = child;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn goto_first_child(&mut self) -> bool {
+ self.goto_first_child_impl(false)
+ }
+
+ pub fn goto_first_named_child(&mut self) -> bool {
+ self.goto_first_child_impl(true)
+ }
+
+ fn goto_next_sibling_impl(&mut self, named: bool) -> bool {
+ let sibling = if named {
+ self.cursor.next_named_sibling()
+ } else {
+ self.cursor.next_sibling()
+ };
+
+ if let Some(sibling) = sibling {
+ self.cursor = sibling;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn goto_next_sibling(&mut self) -> bool {
+ self.goto_next_sibling_impl(false)
+ }
+
+ pub fn goto_next_named_sibling(&mut self) -> bool {
+ self.goto_next_sibling_impl(true)
+ }
+
+ fn goto_prev_sibling_impl(&mut self, named: bool) -> bool {
+ let sibling = if named {
+ self.cursor.prev_named_sibling()
+ } else {
+ self.cursor.prev_sibling()
+ };
+
+ if let Some(sibling) = sibling {
+ self.cursor = sibling;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn goto_prev_sibling(&mut self) -> bool {
+ self.goto_prev_sibling_impl(false)
+ }
+
+ pub fn goto_prev_named_sibling(&mut self) -> bool {
+ self.goto_prev_sibling_impl(true)
+ }
+
+ /// Finds the injection layer that contains the given start-end range.
+ fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
+ let start_idx = self
+ .injection_ranges
+ .partition_point(|range| range.end < end);
+
+ self.injection_ranges[start_idx..]
+ .iter()
+ .take_while(|range| range.start < end)
+ .find_map(|range| (range.start <= start).then_some(range.layer_id))
+ .unwrap_or(self.root)
+ }
+
+ pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
+ self.current = self.layer_id_containing_byte_range(start, end);
+ let root = self.layers[self.current].tree().root_node();
+ self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
+ }
+
+ /// Returns an iterator over the children of the node the TreeCursor is on
+ /// at the time this is called.
+ pub fn children(&'a mut self) -> ChildIter {
+ let parent = self.node();
+
+ ChildIter {
+ cursor: self,
+ parent,
+ named: false,
+ }
+ }
+
+ /// Returns an iterator over the named children of the node the TreeCursor is on
+ /// at the time this is called.
+ pub fn named_children(&'a mut self) -> ChildIter {
+ let parent = self.node();
+
+ ChildIter {
+ cursor: self,
+ parent,
+ named: true,
+ }
+ }
+}
+
+pub struct ChildIter<'n> {
+ cursor: &'n mut TreeCursor<'n>,
+ parent: Node<'n>,
+ named: bool,
+}
+
+impl<'n> Iterator for ChildIter<'n> {
+ type Item = Node<'n>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // first iteration, just visit the first child
+ if self.cursor.node() == self.parent {
+ self.cursor
+ .goto_first_child_impl(self.named)
+ .then(|| self.cursor.node())
+ } else {
+ self.cursor
+ .goto_next_sibling_impl(self.named)
+ .then(|| self.cursor.node())
+ }
+ }
+}
diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index 96614443..d3fdec02 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs
@@ -54,7 +54,14 @@ pub fn highlighted_code_block<'a>(
language.into(),
))
.and_then(|config| config.highlight_config(theme.scopes()))
- .and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader)));
+ .and_then(|config| {
+ Syntax::new(ropeslice, config, |injection| {
+ config_loader
+ .load()
+ .language_configuration_for_injection_string(injection)
+ .and_then(|config| config.get_highlight_config())
+ })
+ });
let syntax = match syntax {
Some(s) => s,
diff --git a/helix-term/src/ui/picker/handlers.rs b/helix-term/src/ui/picker/handlers.rs
index 4896ccbc..ac34e893 100644
--- a/helix-term/src/ui/picker/handlers.rs
+++ b/helix-term/src/ui/picker/handlers.rs
@@ -82,7 +82,12 @@ impl<T: 'static + Send + Sync, D: 'static + Send + Sync> AsyncHook
let Some(syntax) = language_config
.highlight_config(&loader.load().scopes())
.and_then(|highlight_config| {
- helix_core::Syntax::new(text.slice(..), highlight_config, loader)
+ helix_core::Syntax::new(text.slice(..), highlight_config, |injection| {
+ loader
+ .load()
+ .language_configuration_for_injection_string(injection)
+ .and_then(|config| config.get_highlight_config())
+ })
})
else {
log::info!("highlighting picker item failed");
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 3314a243..b45f80ca 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -7,7 +7,7 @@ use helix_core::auto_pairs::AutoPairs;
use helix_core::chars::char_is_word;
use helix_core::doc_formatter::TextFormat;
use helix_core::encoding::Encoding;
-use helix_core::syntax::{Highlight, LanguageServerFeature};
+use helix_core::syntax::{generate_edits, Highlight, LanguageServerFeature};
use helix_core::text_annotations::{InlineAnnotation, Overlay};
use helix_lsp::util::lsp_pos_to_pos;
use helix_stdx::faccess::{copy_metadata, readonly};
@@ -156,6 +156,7 @@ pub struct Document {
pub syntax: Option<Syntax>,
/// Corresponding language scope name. Usually `source.<lang>`.
pub language: Option<Arc<LanguageConfiguration>>,
+ loader: Option<Arc<ArcSwap<helix_core::syntax::Loader>>>,
/// Pending changes since last history commit.
changes: ChangeSet,
@@ -678,6 +679,7 @@ impl Document {
focused_at: std::time::Instant::now(),
readonly: false,
jump_labels: HashMap::new(),
+ loader: None,
}
}
@@ -1131,9 +1133,15 @@ impl Document {
if let Some(highlight_config) =
language_config.highlight_config(&(*loader).load().scopes())
{
- self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader);
+ let loader_ = loader.load_full();
+ self.syntax = Syntax::new(self.text.slice(..), highlight_config, |injection| {
+ loader_
+ .language_configuration_for_injection_string(injection)
+ .and_then(|config| config.get_highlight_config())
+ });
}
+ self.loader = Some(loader);
self.language = Some(language_config);
} else {
self.syntax = None;
@@ -1275,11 +1283,16 @@ impl Document {
// update tree-sitter syntax tree
if let Some(syntax) = &mut self.syntax {
+ let loader = self.loader.as_ref().unwrap().load_full();
// TODO: no unwrap
let res = syntax.update(
- old_doc.slice(..),
self.text.slice(..),
- transaction.changes(),
+ generate_edits(old_doc.slice(..), transaction.changes()),
+ |injection| {
+ loader
+ .language_configuration_for_injection_string(injection)
+ .and_then(|config| config.get_highlight_config())
+ },
);
if res.is_err() {
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");