Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--Cargo.lock37
-rw-r--r--Cargo.toml2
-rw-r--r--helix-core/Cargo.toml5
-rw-r--r--helix-core/src/indent.rs444
-rw-r--r--helix-core/src/lib.rs4
-rw-r--r--helix-core/src/match_brackets.rs14
-rw-r--r--helix-core/src/movement.rs36
-rw-r--r--helix-core/src/object.rs39
-rw-r--r--helix-core/src/position.rs5
-rw-r--r--helix-core/src/selection.rs6
-rw-r--r--helix-core/src/snippets/active.rs4
-rw-r--r--helix-core/src/syntax.rs2582
-rw-r--r--helix-core/src/syntax/config.rs21
-rw-r--r--helix-core/src/syntax/tree_cursor.rs264
-rw-r--r--helix-core/src/text_annotations.rs11
-rw-r--r--helix-core/src/textobject.rs15
-rw-r--r--helix-core/tests/indent.rs16
-rw-r--r--helix-loader/Cargo.toml4
-rw-r--r--helix-loader/src/grammar.rs23
-rw-r--r--helix-term/src/commands.rs35
-rw-r--r--helix-term/src/commands/typed.rs43
-rw-r--r--helix-term/src/ui/document.rs207
-rw-r--r--helix-term/src/ui/editor.rs310
-rw-r--r--helix-term/src/ui/lsp/signature_help.rs19
-rw-r--r--helix-term/src/ui/markdown.rs133
-rw-r--r--helix-term/src/ui/picker.rs21
-rw-r--r--helix-term/src/ui/picker/handlers.rs22
-rw-r--r--helix-term/src/ui/prompt.rs2
-rw-r--r--helix-view/src/document.rs57
-rw-r--r--helix-view/src/editor.rs6
-rw-r--r--helix-view/src/theme.rs73
-rw-r--r--helix-view/src/view.rs18
-rw-r--r--xtask/src/helpers.rs6
-rw-r--r--xtask/src/main.rs44
34 files changed, 1449 insertions, 3079 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 1cce4bc0..3a52e24d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -30,7 +30,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
- "getrandom 0.3.1",
"once_cell",
"version_check",
"zerocopy",
@@ -1400,14 +1399,13 @@ dependencies = [
name = "helix-core"
version = "25.1.1"
dependencies = [
- "ahash",
"anyhow",
"arc-swap",
"bitflags",
"chrono",
"encoding_rs",
+ "foldhash",
"globset",
- "hashbrown 0.14.5",
"helix-loader",
"helix-parsec",
"helix-stdx",
@@ -1428,7 +1426,7 @@ dependencies = [
"smartstring",
"textwrap",
"toml",
- "tree-sitter",
+ "tree-house",
"unicode-general-category",
"unicode-segmentation",
"unicode-width 0.1.12",
@@ -1472,14 +1470,13 @@ dependencies = [
"cc",
"etcetera",
"helix-stdx",
- "libloading",
"log",
"once_cell",
"serde",
"tempfile",
"threadpool",
"toml",
- "tree-sitter",
+ "tree-house",
]
[[package]]
@@ -2802,13 +2799,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076"
[[package]]
-name = "tree-sitter"
-version = "0.22.6"
+name = "tree-house"
+version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
+checksum = "803311306ba3279e87699f7fa16ea18fbcc8889d0ff0c20dc0652317f8b58117"
dependencies = [
- "cc",
+ "arc-swap",
+ "hashbrown 0.15.2",
+ "kstring",
+ "once_cell",
"regex",
+ "regex-cursor",
+ "ropey",
+ "slab",
+ "tree-house-bindings",
+]
+
+[[package]]
+name = "tree-house-bindings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2f6894df414648c56f1f5b129830447140ff1017867773694ba882d093aa140"
+dependencies = [
+ "cc",
+ "libloading",
+ "regex-cursor",
+ "ropey",
+ "thiserror 2.0.12",
]
[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index e01d8f44..1daf1b03 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ package.helix-tui.opt-level = 2
package.helix-term.opt-level = 2
[workspace.dependencies]
-tree-sitter = { version = "0.22" }
+tree-house = { version = "0.1.0", default-features = false }
nucleo = "0.5.0"
slotmap = "1.0.7"
thiserror = "2.0"
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index 48bc635d..b0520761 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -32,13 +32,12 @@ unicode-segmentation.workspace = true
unicode-width = "=0.1.12"
unicode-general-category = "1.0"
slotmap.workspace = true
-tree-sitter.workspace = true
+tree-house.workspace = true
once_cell = "1.21"
arc-swap = "1"
regex = "1"
bitflags.workspace = true
-ahash = "0.8.12"
-hashbrown = { version = "0.14.5", features = ["raw"] }
+foldhash.workspace = true
url = "2.5.4"
log = "0.4"
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 52369bb7..a1e2c864 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -1,17 +1,17 @@
use std::{borrow::Cow, collections::HashMap, iter};
use helix_stdx::rope::RopeSliceExt;
-use tree_sitter::{Query, QueryCursor, QueryPredicateArg};
use crate::{
chars::{char_is_line_ending, char_is_whitespace},
graphemes::{grapheme_width, tab_width_at},
- syntax::{
- config::{IndentationHeuristic, LanguageConfiguration},
- RopeProvider, Syntax,
+ syntax::{self, config::IndentationHeuristic},
+ tree_sitter::{
+ self,
+ query::{InvalidPredicateError, UserPredicate},
+ Capture, Grammar, InactiveQueryCursor, Node, Pattern, Query, QueryMatch, RopeInput,
},
- tree_sitter::Node,
- Position, Rope, RopeSlice, Tendril,
+ Position, Rope, RopeSlice, Syntax, Tendril,
};
/// Enum representing indentation style.
@@ -282,18 +282,164 @@ fn add_indent_level(
/// Return true if only whitespace comes before the node on its line.
/// If given, new_line_byte_pos is treated the same way as any existing newline.
-fn is_first_in_line(node: Node, text: RopeSlice, new_line_byte_pos: Option<usize>) -> bool {
- let mut line_start_byte_pos = text.line_to_byte(node.start_position().row);
+fn is_first_in_line(node: &Node, text: RopeSlice, new_line_byte_pos: Option<u32>) -> bool {
+ let line = text.byte_to_line(node.start_byte() as usize);
+ let mut line_start_byte_pos = text.line_to_byte(line) as u32;
if let Some(pos) = new_line_byte_pos {
if line_start_byte_pos < pos && pos <= node.start_byte() {
line_start_byte_pos = pos;
}
}
- text.byte_slice(line_start_byte_pos..node.start_byte())
+ text.byte_slice(line_start_byte_pos as usize..node.start_byte() as usize)
.chars()
.all(|c| c.is_whitespace())
}
+#[derive(Debug, Default)]
+pub struct IndentQueryPredicates {
+ not_kind_eq: Option<(Capture, Box<str>)>,
+ same_line: Option<(Capture, Capture, bool)>,
+ one_line: Option<(Capture, bool)>,
+}
+
+impl IndentQueryPredicates {
+ fn are_satisfied(
+ &self,
+ match_: &QueryMatch,
+ text: RopeSlice,
+ new_line_byte_pos: Option<u32>,
+ ) -> bool {
+ if let Some((capture, not_expected_kind)) = self.not_kind_eq.as_ref() {
+ if !match_
+ .nodes_for_capture(*capture)
+ .next()
+ .is_some_and(|node| node.kind() != not_expected_kind.as_ref())
+ {
+ return false;
+ }
+ }
+
+ if let Some((capture1, capture2, negated)) = self.same_line {
+ let n1 = match_.nodes_for_capture(capture1).next();
+ let n2 = match_.nodes_for_capture(capture2).next();
+ let satisfied = n1.zip(n2).is_some_and(|(n1, n2)| {
+ let n1_line = get_node_start_line(text, n1, new_line_byte_pos);
+ let n2_line = get_node_start_line(text, n2, new_line_byte_pos);
+ let same_line = n1_line == n2_line;
+ same_line != negated
+ });
+
+ if !satisfied {
+ return false;
+ }
+ }
+
+ if let Some((capture, negated)) = self.one_line {
+ let node = match_.nodes_for_capture(capture).next();
+ let satisfied = node.is_some_and(|node| {
+ let start_line = get_node_start_line(text, node, new_line_byte_pos);
+ let end_line = get_node_end_line(text, node, new_line_byte_pos);
+ let one_line = end_line == start_line;
+ one_line != negated
+ });
+
+ if !satisfied {
+ return false;
+ }
+ }
+
+ true
+ }
+}
+
+#[derive(Debug)]
+pub struct IndentQuery {
+ query: Query,
+ properties: HashMap<Pattern, IndentScope>,
+ predicates: HashMap<Pattern, IndentQueryPredicates>,
+ indent_capture: Option<Capture>,
+ indent_always_capture: Option<Capture>,
+ outdent_capture: Option<Capture>,
+ outdent_always_capture: Option<Capture>,
+ align_capture: Option<Capture>,
+ anchor_capture: Option<Capture>,
+ extend_capture: Option<Capture>,
+ extend_prevent_once_capture: Option<Capture>,
+}
+
+impl IndentQuery {
+ pub fn new(grammar: Grammar, source: &str) -> Result<Self, tree_sitter::query::ParseError> {
+ let mut properties = HashMap::new();
+ let mut predicates: HashMap<Pattern, IndentQueryPredicates> = HashMap::new();
+ let query = Query::new(grammar, source, |pattern, predicate| match predicate {
+ UserPredicate::SetProperty { key: "scope", val } => {
+ let scope = match val {
+ Some("all") => IndentScope::All,
+ Some("tail") => IndentScope::Tail,
+ Some(other) => {
+ return Err(format!("unknown scope (#set! scope \"{other}\")").into())
+ }
+ None => return Err("missing scope value (#set! scope ...)".into()),
+ };
+
+ properties.insert(pattern, scope);
+
+ Ok(())
+ }
+ UserPredicate::Other(predicate) => {
+ let name = predicate.name();
+ match name {
+ "not-kind-eq?" => {
+ predicate.check_arg_count(2)?;
+ let capture = predicate.capture_arg(0)?;
+ let not_expected_kind = predicate.str_arg(1)?;
+
+ predicates.entry(pattern).or_default().not_kind_eq =
+ Some((capture, not_expected_kind.to_string().into_boxed_str()));
+ Ok(())
+ }
+ "same-line?" | "not-same-line?" => {
+ predicate.check_arg_count(2)?;
+ let capture1 = predicate.capture_arg(0)?;
+ let capture2 = predicate.capture_arg(1)?;
+ let negated = name == "not-same-line?";
+
+ predicates.entry(pattern).or_default().same_line =
+ Some((capture1, capture2, negated));
+ Ok(())
+ }
+ "one-line?" | "not-one-line?" => {
+ predicate.check_arg_count(1)?;
+ let capture = predicate.capture_arg(0)?;
+ let negated = name == "not-one-line?";
+
+ predicates.entry(pattern).or_default().one_line = Some((capture, negated));
+ Ok(())
+ }
+ _ => Err(InvalidPredicateError::unknown(UserPredicate::Other(
+ predicate,
+ ))),
+ }
+ }
+ _ => Err(InvalidPredicateError::unknown(predicate)),
+ })?;
+
+ Ok(Self {
+ properties,
+ predicates,
+ indent_capture: query.get_capture("indent"),
+ indent_always_capture: query.get_capture("indent.always"),
+ outdent_capture: query.get_capture("outdent"),
+ outdent_always_capture: query.get_capture("outdent.always"),
+ align_capture: query.get_capture("align"),
+ anchor_capture: query.get_capture("anchor"),
+ extend_capture: query.get_capture("extend"),
+ extend_prevent_once_capture: query.get_capture("extend.prevent-once"),
+ query,
+ })
+ }
+}
+
/// The total indent for some line of code.
/// This is usually constructed in one of 2 ways:
/// - Successively add indent captures to get the (added) indent from a single line
@@ -456,16 +602,16 @@ struct IndentQueryResult<'a> {
extend_captures: HashMap<usize, Vec<ExtendCapture>>,
}
-fn get_node_start_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
- let mut node_line = node.start_position().row;
+fn get_node_start_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option<u32>) -> usize {
+ let mut node_line = text.byte_to_line(node.start_byte() as usize);
// Adjust for the new line that will be inserted
if new_line_byte_pos.is_some_and(|pos| node.start_byte() >= pos) {
node_line += 1;
}
node_line
}
-fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
- let mut node_line = node.end_position().row;
+fn get_node_end_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option<u32>) -> usize {
+ let mut node_line = text.byte_to_line(node.end_byte() as usize);
// Adjust for the new line that will be inserted (with a strict inequality since end_byte is exclusive)
if new_line_byte_pos.is_some_and(|pos| node.end_byte() > pos) {
node_line += 1;
@@ -474,175 +620,98 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option<usize>) -> usize {
}
fn query_indents<'a>(
- query: &Query,
+ query: &IndentQuery,
syntax: &Syntax,
- cursor: &mut QueryCursor,
text: RopeSlice<'a>,
- range: std::ops::Range<usize>,
- new_line_byte_pos: Option<usize>,
+ range: std::ops::Range<u32>,
+ new_line_byte_pos: Option<u32>,
) -> IndentQueryResult<'a> {
let mut indent_captures: HashMap<usize, Vec<IndentCapture>> = HashMap::new();
let mut extend_captures: HashMap<usize, Vec<ExtendCapture>> = HashMap::new();
+
+ let mut cursor = InactiveQueryCursor::new();
cursor.set_byte_range(range);
+ let mut cursor = cursor.execute_query(
+ &query.query,
+ &syntax.tree().root_node(),
+ RopeInput::new(text),
+ );
// Iterate over all captures from the query
- for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) {
+ while let Some(m) = cursor.next_match() {
// Skip matches where not all custom predicates are fulfilled
- if !query.general_predicates(m.pattern_index).iter().all(|pred| {
- match pred.operator.as_ref() {
- "not-kind-eq?" => match (pred.args.first(), pred.args.get(1)) {
- (
- Some(QueryPredicateArg::Capture(capture_idx)),
- Some(QueryPredicateArg::String(kind)),
- ) => {
- let node = m.nodes_for_capture_index(*capture_idx).next();
- match node {
- Some(node) => node.kind()!=kind.as_ref(),
- _ => true,
- }
- }
- _ => {
- panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
- }
- },
- "same-line?" | "not-same-line?" => {
- match (pred.args.first(), pred.args.get(1)) {
- (
- Some(QueryPredicateArg::Capture(capt1)),
- Some(QueryPredicateArg::Capture(capt2))
- ) => {
- let n1 = m.nodes_for_capture_index(*capt1).next();
- let n2 = m.nodes_for_capture_index(*capt2).next();
- match (n1, n2) {
- (Some(n1), Some(n2)) => {
- let n1_line = get_node_start_line(n1, new_line_byte_pos);
- let n2_line = get_node_start_line(n2, new_line_byte_pos);
- let same_line = n1_line == n2_line;
- same_line==(pred.operator.as_ref()=="same-line?")
- }
- _ => true,
- }
- }
- _ => {
- panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator);
- }
- }
- }
- "one-line?" | "not-one-line?" => match pred.args.first() {
- Some(QueryPredicateArg::Capture(capture_idx)) => {
- let node = m.nodes_for_capture_index(*capture_idx).next();
-
- match node {
- Some(node) => {
- let (start_line, end_line) = (get_node_start_line(node,new_line_byte_pos), get_node_end_line(node, new_line_byte_pos));
- let one_line = end_line == start_line;
- one_line != (pred.operator.as_ref() == "not-one-line?")
- },
- _ => true,
- }
- }
- _ => {
- panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string");
- }
- },
- _ => {
- panic!(
- "Invalid indent query: Unknown predicate (\"{}\")",
- pred.operator
- );
- }
- }
- }) {
+ if query
+ .predicates
+ .get(&m.pattern())
+ .is_some_and(|preds| !preds.are_satisfied(&m, text, new_line_byte_pos))
+ {
continue;
}
// A list of pairs (node_id, indent_capture) that are added by this match.
// They cannot be added to indent_captures immediately since they may depend on other captures (such as an @anchor).
let mut added_indent_captures: Vec<(usize, IndentCapture)> = Vec::new();
// The row/column position of the optional anchor in this query
- let mut anchor: Option<tree_sitter::Node> = None;
- for capture in m.captures {
- let capture_name = query.capture_names()[capture.index as usize];
- let capture_type = match capture_name {
- "indent" => IndentCaptureType::Indent,
- "indent.always" => IndentCaptureType::IndentAlways,
- "outdent" => IndentCaptureType::Outdent,
- "outdent.always" => IndentCaptureType::OutdentAlways,
- // The alignment will be updated to the correct value at the end, when the anchor is known.
- "align" => IndentCaptureType::Align(RopeSlice::from("")),
- "anchor" => {
- if anchor.is_some() {
- log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
- } else {
- anchor = Some(capture.node);
- }
- continue;
- }
- "extend" => {
- extend_captures
- .entry(capture.node.id())
- .or_insert_with(|| Vec::with_capacity(1))
- .push(ExtendCapture::Extend);
- continue;
- }
- "extend.prevent-once" => {
- extend_captures
- .entry(capture.node.id())
- .or_insert_with(|| Vec::with_capacity(1))
- .push(ExtendCapture::PreventOnce);
- continue;
- }
- _ => {
- // Ignore any unknown captures (these may be needed for predicates such as #match?)
- continue;
+ let mut anchor: Option<&Node> = None;
+ for matched_node in m.matched_nodes() {
+ let node_id = matched_node.node.id();
+ let capture = Some(matched_node.capture);
+ let capture_type = if capture == query.indent_capture {
+ IndentCaptureType::Indent
+ } else if capture == query.indent_always_capture {
+ IndentCaptureType::IndentAlways
+ } else if capture == query.outdent_capture {
+ IndentCaptureType::Outdent
+ } else if capture == query.outdent_always_capture {
+ IndentCaptureType::OutdentAlways
+ } else if capture == query.align_capture {
+ IndentCaptureType::Align(RopeSlice::from(""))
+ } else if capture == query.anchor_capture {
+ if anchor.is_some() {
+ log::error!("Invalid indent query: Encountered more than one @anchor in the same match.")
+ } else {
+ anchor = Some(&matched_node.node);
}
+ continue;
+ } else if capture == query.extend_capture {
+ extend_captures
+ .entry(node_id)
+ .or_insert_with(|| Vec::with_capacity(1))
+ .push(ExtendCapture::Extend);
+ continue;
+ } else if capture == query.extend_prevent_once_capture {
+ extend_captures
+ .entry(node_id)
+ .or_insert_with(|| Vec::with_capacity(1))
+ .push(ExtendCapture::PreventOnce);
+ continue;
+ } else {
+ // Ignore any unknown captures (these may be needed for predicates such as #match?)
+ continue;
};
- let scope = capture_type.default_scope();
- let mut indent_capture = IndentCapture {
+
+ // Apply additional settings for this capture
+ let scope = query
+ .properties
+ .get(&m.pattern())
+ .copied()
+ .unwrap_or_else(|| capture_type.default_scope());
+ let indent_capture = IndentCapture {
capture_type,
scope,
};
- // Apply additional settings for this capture
- for property in query.property_settings(m.pattern_index) {
- match property.key.as_ref() {
- "scope" => {
- indent_capture.scope = match property.value.as_deref() {
- Some("all") => IndentScope::All,
- Some("tail") => IndentScope::Tail,
- Some(s) => {
- panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s);
- }
- None => {
- panic!(
- "Invalid indent query: Missing value for \"scope\" property"
- );
- }
- }
- }
- _ => {
- panic!(
- "Invalid indent query: Unknown property \"{}\"",
- property.key
- );
- }
- }
- }
- added_indent_captures.push((capture.node.id(), indent_capture))
+ added_indent_captures.push((node_id, indent_capture))
}
for (node_id, mut capture) in added_indent_captures {
// Set the anchor for all align queries.
if let IndentCaptureType::Align(_) = capture.capture_type {
- let anchor = match anchor {
- None => {
- log::error!(
- "Invalid indent query: @align requires an accompanying @anchor."
- );
- continue;
- }
- Some(anchor) => anchor,
+ let Some(anchor) = anchor else {
+ log::error!("Invalid indent query: @align requires an accompanying @anchor.");
+ continue;
};
+ let line = text.byte_to_line(anchor.start_byte() as usize);
+ let line_start = text.line_to_byte(line);
capture.capture_type = IndentCaptureType::Align(
- text.line(anchor.start_position().row)
- .byte_slice(0..anchor.start_position().column),
+ text.byte_slice(line_start..anchor.start_byte() as usize),
);
}
indent_captures
@@ -694,13 +763,15 @@ fn extend_nodes<'a>(
// - the cursor is on the same line as the end of the node OR
// - the line that the cursor is on is more indented than the
// first line of the node
- if deepest_preceding.end_position().row == line {
+ if text.byte_to_line(deepest_preceding.end_byte() as usize) == line {
extend_node = true;
} else {
let cursor_indent =
indent_level_for_line(text.line(line), tab_width, indent_width);
let node_indent = indent_level_for_line(
- text.line(deepest_preceding.start_position().row),
+ text.line(
+ text.byte_to_line(deepest_preceding.start_byte() as usize),
+ ),
tab_width,
indent_width,
);
@@ -717,7 +788,7 @@ fn extend_nodes<'a>(
if node_captured && stop_extend {
stop_extend = false;
} else if extend_node && !stop_extend {
- *node = deepest_preceding;
+ *node = deepest_preceding.clone();
break;
}
// If the tree contains a syntax error, `deepest_preceding` may not
@@ -734,17 +805,17 @@ fn extend_nodes<'a>(
/// - The indent captures for all relevant nodes.
#[allow(clippy::too_many_arguments)]
fn init_indent_query<'a, 'b>(
- query: &Query,
+ query: &IndentQuery,
syntax: &'a Syntax,
text: RopeSlice<'b>,
tab_width: usize,
indent_width: usize,
line: usize,
- byte_pos: usize,
- new_line_byte_pos: Option<usize>,
+ byte_pos: u32,
+ new_line_byte_pos: Option<u32>,
) -> Option<(Node<'a>, HashMap<usize, Vec<IndentCapture<'b>>>)> {
// The innermost tree-sitter node which is considered for the indent
- // computation. It may change if some predeceding node is extended
+ // computation. It may change if some preceding node is extended
let mut node = syntax
.tree()
.root_node()
@@ -754,37 +825,25 @@ fn init_indent_query<'a, 'b>(
// The query range should intersect with all nodes directly preceding
// the position of the indent query in case one of them is extended.
let mut deepest_preceding = None; // The deepest node preceding the indent query position
- let mut tree_cursor = node.walk();
- for child in node.children(&mut tree_cursor) {
+ for child in node.children() {
if child.byte_range().end <= byte_pos {
- deepest_preceding = Some(child);
+ deepest_preceding = Some(child.clone());
}
}
deepest_preceding = deepest_preceding.map(|mut prec| {
// Get the deepest directly preceding node
while prec.child_count() > 0 {
- prec = prec.child(prec.child_count() - 1).unwrap();
+ prec = prec.child(prec.child_count() - 1).unwrap().clone();
}
prec
});
let query_range = deepest_preceding
+ .as_ref()
.map(|prec| prec.byte_range().end - 1..byte_pos + 1)
.unwrap_or(byte_pos..byte_pos + 1);
- crate::syntax::PARSER.with(|ts_parser| {
- let mut ts_parser = ts_parser.borrow_mut();
- let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
- let query_result = query_indents(
- query,
- syntax,
- &mut cursor,
- text,
- query_range,
- new_line_byte_pos,
- );
- ts_parser.cursors.push(cursor);
- (query_result, deepest_preceding)
- })
+ let query_result = query_indents(query, syntax, text, query_range, new_line_byte_pos);
+ (query_result, deepest_preceding)
};
let extend_captures = query_result.extend_captures;
@@ -842,7 +901,7 @@ fn init_indent_query<'a, 'b>(
/// ```
#[allow(clippy::too_many_arguments)]
pub fn treesitter_indent_for_pos<'a>(
- query: &Query,
+ query: &IndentQuery,
syntax: &Syntax,
tab_width: usize,
indent_width: usize,
@@ -851,7 +910,7 @@ pub fn treesitter_indent_for_pos<'a>(
pos: usize,
new_line: bool,
) -> Option<Indentation<'a>> {
- let byte_pos = text.char_to_byte(pos);
+ let byte_pos = text.char_to_byte(pos) as u32;
let new_line_byte_pos = new_line.then_some(byte_pos);
let (mut node, mut indent_captures) = init_indent_query(
query,
@@ -871,7 +930,7 @@ pub fn treesitter_indent_for_pos<'a>(
let mut indent_for_line_below = Indentation::default();
loop {
- let is_first = is_first_in_line(node, text, new_line_byte_pos);
+ let is_first = is_first_in_line(&node, text, new_line_byte_pos);
// Apply all indent definitions for this node.
// Since we only iterate over each node once, we can remove the
@@ -894,8 +953,8 @@ pub fn treesitter_indent_for_pos<'a>(
}
if let Some(parent) = node.parent() {
- let node_line = get_node_start_line(node, new_line_byte_pos);
- let parent_line = get_node_start_line(parent, new_line_byte_pos);
+ let node_line = get_node_start_line(text, &node, new_line_byte_pos);
+ let parent_line = get_node_start_line(text, &parent, new_line_byte_pos);
if node_line != parent_line {
// Don't add indent for the line below the line of the query
@@ -917,8 +976,9 @@ pub fn treesitter_indent_for_pos<'a>(
} else {
// Only add the indentation for the line below if that line
// is not after the line that the indentation is calculated for.
- if (node.start_position().row < line)
- || (new_line && node.start_position().row == line && node.start_byte() < byte_pos)
+ let node_start_line = text.byte_to_line(node.start_byte() as usize);
+ if node_start_line < line
+ || (new_line && node_start_line == line && node.start_byte() < byte_pos)
{
result.add_line(indent_for_line_below);
}
@@ -933,7 +993,7 @@ pub fn treesitter_indent_for_pos<'a>(
/// This is done either using treesitter, or if that's not available by copying the indentation from the current line
#[allow(clippy::too_many_arguments)]
pub fn indent_for_newline(
- language_config: Option<&LanguageConfiguration>,
+ loader: &syntax::Loader,
syntax: Option<&Syntax>,
indent_heuristic: &IndentationHeuristic,
indent_style: &IndentStyle,
@@ -950,7 +1010,7 @@ pub fn indent_for_newline(
Some(syntax),
) = (
indent_heuristic,
- language_config.and_then(|config| config.indent_query()),
+ syntax.and_then(|syntax| loader.indent_query(syntax.root_language())),
syntax,
) {
if let Some(indent) = treesitter_indent_for_pos(
@@ -1018,10 +1078,10 @@ pub fn indent_for_newline(
indent_style.as_str().repeat(indent_level)
}
-pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
+pub fn get_scopes<'a>(syntax: Option<&'a Syntax>, text: RopeSlice, pos: usize) -> Vec<&'a str> {
let mut scopes = Vec::new();
if let Some(syntax) = syntax {
- let pos = text.char_to_byte(pos);
+ let pos = text.char_to_byte(pos) as u32;
let mut node = match syntax
.tree()
.root_node()
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 3fcddfcd..09865ca4 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -53,7 +53,7 @@ pub use smartstring::SmartString;
pub type Tendril = SmartString<smartstring::LazyCompact>;
#[doc(inline)]
-pub use {regex, tree_sitter};
+pub use {regex, tree_house::tree_sitter};
pub use position::{
char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions,
@@ -73,3 +73,5 @@ pub use line_ending::{LineEnding, NATIVE_LINE_ENDING};
pub use transaction::{Assoc, Change, ChangeSet, Deletion, Operation, Transaction};
pub use uri::Uri;
+
+pub use tree_house::Language;
diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs
index 7520d3e4..7f2891f3 100644
--- a/helix-core/src/match_brackets.rs
+++ b/helix-core/src/match_brackets.rs
@@ -1,7 +1,7 @@
use std::iter;
+use crate::tree_sitter::Node;
use ropey::RopeSlice;
-use tree_sitter::Node;
use crate::movement::Direction::{self, Backward, Forward};
use crate::Syntax;
@@ -75,7 +75,7 @@ fn find_pair(
pos_: usize,
traverse_parents: bool,
) -> Option<usize> {
- let pos = doc.char_to_byte(pos_);
+ let pos = doc.char_to_byte(pos_) as u32;
let root = syntax.tree_for_byte_range(pos, pos).root_node();
let mut node = root.descendant_for_byte_range(pos, pos)?;
@@ -128,7 +128,7 @@ fn find_pair(
if find_pair_end(doc, sibling.prev_sibling(), start_char, end_char, Backward)
.is_some()
{
- return doc.try_byte_to_char(sibling.start_byte()).ok();
+ return doc.try_byte_to_char(sibling.start_byte() as usize).ok();
}
}
} else if node.is_named() {
@@ -144,9 +144,9 @@ fn find_pair(
if node.child_count() != 0 {
return None;
}
- let node_start = doc.byte_to_char(node.start_byte());
- find_matching_bracket_plaintext(doc.byte_slice(node.byte_range()), pos_ - node_start)
- .map(|pos| pos + node_start)
+ let node_start = doc.byte_to_char(node.start_byte() as usize);
+ let node_text = doc.byte_slice(node.start_byte() as usize..node.end_byte() as usize);
+ find_matching_bracket_plaintext(node_text, pos_ - node_start).map(|pos| pos + node_start)
}
/// Returns the position of the matching bracket under cursor.
@@ -304,7 +304,7 @@ fn as_char(doc: RopeSlice, node: &Node) -> Option<(usize, char)> {
if node.byte_range().len() != 1 {
return None;
}
- let pos = doc.try_byte_to_char(node.start_byte()).ok()?;
+ let pos = doc.try_byte_to_char(node.start_byte() as usize).ok()?;
Some((pos, doc.char(pos)))
}
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 2a1fa94f..09a99db2 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -1,7 +1,6 @@
-use std::{cmp::Reverse, iter};
+use std::{borrow::Cow, cmp::Reverse, iter};
use ropey::iter::Chars;
-use tree_sitter::{Node, QueryCursor};
use crate::{
char_idx_at_visual_offset,
@@ -13,9 +12,10 @@ use crate::{
},
line_ending::rope_is_line_ending,
position::char_idx_at_visual_block_offset,
- syntax::config::LanguageConfiguration,
+ syntax,
text_annotations::TextAnnotations,
textobject::TextObject,
+ tree_sitter::Node,
visual_offset_from_block, Range, RopeSlice, Selection, Syntax,
};
@@ -560,21 +560,23 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo
/// Finds the range of the next or previous textobject in the syntax sub-tree of `node`.
/// Returns the range in the forwards direction.
+#[allow(clippy::too_many_arguments)]
pub fn goto_treesitter_object(
slice: RopeSlice,
range: Range,
object_name: &str,
dir: Direction,
- slice_tree: Node,
- lang_config: &LanguageConfiguration,
+ slice_tree: &Node,
+ syntax: &Syntax,
+ loader: &syntax::Loader,
count: usize,
) -> Range {
+ let textobject_query = loader.textobject_query(syntax.root_language());
let get_range = move |range: Range| -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));
let cap_name = |t: TextObject| format!("{}.{}", object_name, t);
- let mut cursor = QueryCursor::new();
- let nodes = lang_config.textobject_query()?.capture_nodes_any(
+ let nodes = textobject_query?.capture_nodes_any(
&[
&cap_name(TextObject::Movement),
&cap_name(TextObject::Around),
@@ -582,7 +584,6 @@ pub fn goto_treesitter_object(
],
slice_tree,
slice,
- &mut cursor,
)?;
let node = match dir {
@@ -617,14 +618,15 @@ pub fn goto_treesitter_object(
last_range
}
-fn find_parent_start(mut node: Node) -> Option<Node> {
+fn find_parent_start<'tree>(node: &Node<'tree>) -> Option<Node<'tree>> {
let start = node.start_byte();
+ let mut node = Cow::Borrowed(node);
while node.start_byte() >= start || !node.is_named() {
- node = node.parent()?;
+ node = Cow::Owned(node.parent()?);
}
- Some(node)
+ Some(node.into_owned())
}
pub fn move_parent_node_end(
@@ -635,8 +637,8 @@ pub fn move_parent_node_end(
movement: Movement,
) -> Selection {
selection.transform(|range| {
- let start_from = text.char_to_byte(range.from());
- let start_to = text.char_to_byte(range.to());
+ let start_from = text.char_to_byte(range.from()) as u32;
+ let start_to = text.char_to_byte(range.to()) as u32;
let mut node = match syntax.named_descendant_for_byte_range(start_from, start_to) {
Some(node) => node,
@@ -654,18 +656,18 @@ pub fn move_parent_node_end(
// moving forward, we always want to move one past the end of the
// current node, so use the end byte of the current node, which is an exclusive
// end of the range
- Direction::Forward => text.byte_to_char(node.end_byte()),
+ Direction::Forward => text.byte_to_char(node.end_byte() as usize),
// moving backward, we want the cursor to land on the start char of
// the current node, or if it is already at the start of a node, to traverse up to
// the parent
Direction::Backward => {
- let end_head = text.byte_to_char(node.start_byte());
+ let end_head = text.byte_to_char(node.start_byte() as usize);
// if we're already on the beginning, look up to the parent
if end_head == range.cursor(text) {
- node = find_parent_start(node).unwrap_or(node);
- text.byte_to_char(node.start_byte())
+ node = find_parent_start(&node).unwrap_or(node);
+ text.byte_to_char(node.start_byte() as usize)
} else {
end_head
}
diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs
index 17a393ca..e0c02d0a 100644
--- a/helix-core/src/object.rs
+++ b/helix-core/src/object.rs
@@ -4,8 +4,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection)
let cursor = &mut syntax.walk();
selection.transform(|range| {
- let from = text.char_to_byte(range.from());
- let to = text.char_to_byte(range.to());
+ let from = text.char_to_byte(range.from()) as u32;
+ let to = text.char_to_byte(range.to()) as u32;
let byte_range = from..to;
cursor.reset_to_byte_range(from, to);
@@ -17,8 +17,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection)
}
let node = cursor.node();
- let from = text.byte_to_char(node.start_byte());
- let to = text.byte_to_char(node.end_byte());
+ let from = text.byte_to_char(node.start_byte() as usize);
+ let to = text.byte_to_char(node.end_byte() as usize);
Range::new(to, from).with_direction(range.direction())
})
@@ -53,10 +53,10 @@ pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio
}
pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
- selection.transform_iter(|range| {
- let mut cursor = syntax.walk();
+ let mut cursor = syntax.walk();
+ selection.transform_iter(move |range| {
let (from, to) = range.into_byte_range(text);
- cursor.reset_to_byte_range(from, to);
+ cursor.reset_to_byte_range(from as u32, to as u32);
if !cursor.goto_parent_with(|parent| parent.child_count() > 1) {
return vec![range].into_iter();
@@ -67,21 +67,18 @@ pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selectio
}
pub fn select_all_children(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
- selection.transform_iter(|range| {
- let mut cursor = syntax.walk();
+ let mut cursor = syntax.walk();
+ selection.transform_iter(move |range| {
let (from, to) = range.into_byte_range(text);
- cursor.reset_to_byte_range(from, to);
+ cursor.reset_to_byte_range(from as u32, to as u32);
select_children(&mut cursor, text, range).into_iter()
})
}
-fn select_children<'n>(
- cursor: &'n mut TreeCursor<'n>,
- text: RopeSlice,
- range: Range,
-) -> Vec<Range> {
+fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Vec<Range> {
let children = cursor
- .named_children()
+ .children()
+ .filter(|child| child.is_named())
.map(|child| Range::from_node(child, text, range.direction()))
.collect::<Vec<_>>();
@@ -98,7 +95,7 @@ pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio
text,
selection,
|cursor| {
- while !cursor.goto_prev_sibling() {
+ while !cursor.goto_previous_sibling() {
if !cursor.goto_parent() {
break;
}
@@ -121,16 +118,16 @@ where
let cursor = &mut syntax.walk();
selection.transform(|range| {
- let from = text.char_to_byte(range.from());
- let to = text.char_to_byte(range.to());
+ let from = text.char_to_byte(range.from()) as u32;
+ let to = text.char_to_byte(range.to()) as u32;
cursor.reset_to_byte_range(from, to);
motion(cursor);
let node = cursor.node();
- let from = text.byte_to_char(node.start_byte());
- let to = text.byte_to_char(node.end_byte());
+ let from = text.byte_to_char(node.start_byte() as usize);
+ let to = text.byte_to_char(node.end_byte() as usize);
Range::new(from, to).with_direction(direction.unwrap_or_else(|| range.direction()))
})
diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index cea0b607..3f888c57 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs
@@ -89,11 +89,6 @@ impl From<(usize, usize)> for Position {
}
}
-impl From<Position> for tree_sitter::Point {
- fn from(pos: Position) -> Self {
- Self::new(pos.row, pos.col)
- }
-}
/// Convert a character index to (line, column) coordinates.
///
/// column in `char` count which can be used for row:column display in
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index 1db2d619..5bde08e3 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -9,13 +9,13 @@ use crate::{
},
line_ending::get_line_ending,
movement::Direction,
+ tree_sitter::Node,
Assoc, ChangeSet, RopeSlice,
};
use helix_stdx::range::is_subset;
use helix_stdx::rope::{self, RopeSliceExt};
use smallvec::{smallvec, SmallVec};
use std::{borrow::Cow, iter, slice};
-use tree_sitter::Node;
/// A single selection range.
///
@@ -76,8 +76,8 @@ impl Range {
}
pub fn from_node(node: Node, text: RopeSlice, direction: Direction) -> Self {
- let from = text.byte_to_char(node.start_byte());
- let to = text.byte_to_char(node.end_byte());
+ let from = text.byte_to_char(node.start_byte() as usize);
+ let to = text.byte_to_char(node.end_byte() as usize);
Range::new(from, to).with_direction(direction)
}
diff --git a/helix-core/src/snippets/active.rs b/helix-core/src/snippets/active.rs
index 98007ab6..1c10b76d 100644
--- a/helix-core/src/snippets/active.rs
+++ b/helix-core/src/snippets/active.rs
@@ -1,6 +1,6 @@
use std::ops::{Index, IndexMut};
-use hashbrown::HashSet;
+use foldhash::HashSet;
use helix_stdx::range::{is_exact_subset, is_subset};
use helix_stdx::Range;
use ropey::Rope;
@@ -35,7 +35,7 @@ impl ActiveSnippet {
let snippet = Self {
ranges: snippet.ranges,
tabstops: snippet.tabstops,
- active_tabstops: HashSet::new(),
+ active_tabstops: HashSet::default(),
current_tabstop: TabstopIdx(0),
};
(snippet.tabstops.len() != 1).then_some(snippet)
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index dfc32342..e232ee69 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,323 +1,206 @@
pub mod config;
-mod tree_cursor;
-
-use crate::{
- chars::char_is_line_ending,
- regex::Regex,
- transaction::{ChangeSet, Operation},
- RopeSlice, Tendril,
-};
-
-use ahash::RandomState;
-use arc_swap::{ArcSwap, Guard};
-use bitflags::bitflags;
-use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration};
-use hashbrown::raw::RawTable;
-use helix_stdx::rope::{self, RopeSliceExt};
-use slotmap::{DefaultKey as LayerId, HopSlotMap};
use std::{
borrow::Cow,
- cell::RefCell,
- collections::{HashMap, VecDeque},
- fmt::{self, Write},
- hash::{Hash, Hasher},
- mem::replace,
+ collections::HashMap,
+ fmt, iter,
+ ops::{self, RangeBounds},
path::Path,
sync::Arc,
+ time::Duration,
};
-use once_cell::sync::Lazy;
-
-use helix_loader::grammar::{get_language, load_runtime_file};
+use anyhow::{Context, Result};
+use arc_swap::{ArcSwap, Guard};
+use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration};
+use helix_loader::grammar::get_language;
+use helix_stdx::rope::RopeSliceExt as _;
+use once_cell::sync::OnceCell;
+use ropey::RopeSlice;
+use tree_house::{
+ highlighter,
+ query_iter::QueryIter,
+ tree_sitter::{Grammar, InactiveQueryCursor, InputEdit, Node, Query, RopeInput, Tree},
+ Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer,
+};
-pub use tree_cursor::TreeCursor;
+use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language};
-#[derive(Debug)]
-pub struct TextObjectQuery {
- pub query: Query,
-}
+pub use tree_house::{
+ highlighter::{Highlight, HighlightEvent},
+ Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT,
+};
#[derive(Debug)]
-pub enum CapturedNode<'a> {
- Single(Node<'a>),
- /// Guaranteed to be not empty
- Grouped(Vec<Node<'a>>),
+pub struct LanguageData {
+ config: Arc<LanguageConfiguration>,
+ syntax: OnceCell<Option<SyntaxConfig>>,
+ indent_query: OnceCell<Option<IndentQuery>>,
+ textobject_query: OnceCell<Option<TextObjectQuery>>,
}
-impl CapturedNode<'_> {
- pub fn start_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.start_byte(),
- Self::Grouped(ns) => ns[0].start_byte(),
- }
- }
-
- pub fn end_byte(&self) -> usize {
- match self {
- Self::Single(n) => n.end_byte(),
- Self::Grouped(ns) => ns.last().unwrap().end_byte(),
+impl LanguageData {
+ fn new(config: LanguageConfiguration) -> Self {
+ Self {
+ config: Arc::new(config),
+ syntax: OnceCell::new(),
+ indent_query: OnceCell::new(),
+ textobject_query: OnceCell::new(),
}
}
- pub fn byte_range(&self) -> std::ops::Range<usize> {
- self.start_byte()..self.end_byte()
- }
-}
-
-/// The maximum number of in-progress matches a TS cursor can consider at once.
-/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
-/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
-///
-///
-/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
-/// However, this causes performance issues for medium to large files.
-/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
-///
-///
-/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
-/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
-/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
-///
-///
-/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
-/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
-/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
-const TREE_SITTER_MATCH_LIMIT: u32 = 256;
-
-impl TextObjectQuery {
- /// Run the query on the given node and return sub nodes which match given
- /// capture ("function.inside", "class.around", etc).
- ///
- /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
- /// and support for this is partial and could use improvement.
- ///
- /// ```query
- /// (comment)+ @capture
- ///
- /// ; OR
- /// (
- /// (comment)*
- /// .
- /// (function)
- /// ) @capture
- /// ```
- pub fn capture_nodes<'a>(
- &'a self,
- capture_name: &str,
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- self.capture_nodes_any(&[capture_name], node, slice, cursor)
+ pub fn config(&self) -> &Arc<LanguageConfiguration> {
+ &self.config
}
- /// Find the first capture that exists out of all given `capture_names`
- /// and return sub nodes that match this capture.
- pub fn capture_nodes_any<'a>(
- &'a self,
- capture_names: &[&str],
- node: Node<'a>,
- slice: RopeSlice<'a>,
- cursor: &'a mut QueryCursor,
- ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
- let capture_idx = capture_names
- .iter()
- .find_map(|cap| self.query.capture_index_for_name(cap))?;
-
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+ /// Loads the grammar and compiles the highlights, injections and locals for the language.
+ /// This function should only be used by this module or the xtask crate.
+ pub fn compile_syntax_config(
+ config: &LanguageConfiguration,
+ loader: &Loader,
+ ) -> Result<Option<SyntaxConfig>> {
+ let name = &config.language_id;
+ let parser_name = config.grammar.as_deref().unwrap_or(name);
+ let Some(grammar) = get_language(parser_name)? else {
+ log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist");
+ return Ok(None);
+ };
+ let highlight_query_text = read_query(name, "highlights.scm");
+ let injection_query_text = read_query(name, "injections.scm");
+ let local_query_text = read_query(name, "locals.scm");
+ let config = SyntaxConfig::new(
+ grammar,
+ &highlight_query_text,
+ &injection_query_text,
+ &local_query_text,
+ )
+ .with_context(|| format!("Failed to compile highlights for '{name}'"))?;
- let nodes = cursor
- .captures(&self.query, node, RopeProvider(slice))
- .filter_map(move |(mat, _)| {
- let nodes: Vec<_> = mat
- .captures
- .iter()
- .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))
- .collect();
-
- if nodes.len() > 1 {
- Some(CapturedNode::Grouped(nodes))
- } else {
- nodes.into_iter().map(CapturedNode::Single).next()
- }
- });
+ reconfigure_highlights(&config, &loader.scopes());
- Some(nodes)
+ Ok(Some(config))
}
-}
-
-pub fn read_query(language: &str, filename: &str) -> String {
- static INHERITS_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
-
- let query = load_runtime_file(language, filename).unwrap_or_default();
-
- // replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
- INHERITS_REGEX
- .replace_all(&query, |captures: &regex::Captures| {
- captures[1]
- .split(',')
- .fold(String::new(), |mut output, language| {
- // `write!` to a String cannot fail.
- write!(output, "\n{}\n", read_query(language, filename)).unwrap();
- output
- })
- })
- .to_string()
-}
-
-impl config::LanguageConfiguration {
- fn initialize_highlight(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
- let highlights_query = read_query(&self.language_id, "highlights.scm");
- // always highlight syntax errors
- // highlights_query += "\n(ERROR) @error";
- let injections_query = read_query(&self.language_id, "injections.scm");
- let locals_query = read_query(&self.language_id, "locals.scm");
-
- if highlights_query.is_empty() {
- None
- } else {
- let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id))
- .map_err(|err| {
- log::error!(
- "Failed to load tree-sitter parser for language {:?}: {:#}",
- self.language_id,
- err
- )
- })
- .ok()?;
- let config = HighlightConfiguration::new(
- language,
- &highlights_query,
- &injections_query,
- &locals_query,
- )
- .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err))
- .ok()?;
-
- config.configure(scopes);
- Some(Arc::new(config))
- }
+ fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> {
+ self.syntax
+ .get_or_init(|| {
+ Self::compile_syntax_config(&self.config, loader)
+ .map_err(|err| {
+ log::error!("{err:#}");
+ })
+ .ok()
+ .flatten()
+ })
+ .as_ref()
}
- pub fn reconfigure(&self, scopes: &[String]) {
- if let Some(Some(config)) = self.highlight_config.get() {
- config.configure(scopes);
+ /// Compiles the indents.scm query for a language.
+ /// This function should only be used by this module or the xtask crate.
+ pub fn compile_indent_query(
+ grammar: Grammar,
+ config: &LanguageConfiguration,
+ ) -> Result<Option<IndentQuery>> {
+ let name = &config.language_id;
+ let text = read_query(name, "indents.scm");
+ if text.is_empty() {
+ return Ok(None);
}
+ let indent_query = IndentQuery::new(grammar, &text)
+ .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?;
+ Ok(Some(indent_query))
}
- pub fn highlight_config(&self, scopes: &[String]) -> Option<Arc<HighlightConfiguration>> {
- self.highlight_config
- .get_or_init(|| self.initialize_highlight(scopes))
- .clone()
- }
-
- pub fn is_highlight_initialized(&self) -> bool {
- self.highlight_config.get().is_some()
- }
-
- pub fn indent_query(&self) -> Option<&Query> {
+ fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> {
self.indent_query
- .get_or_init(|| self.load_query("indents.scm"))
- .as_ref()
- }
-
- pub fn textobject_query(&self) -> Option<&TextObjectQuery> {
- self.textobject_query
.get_or_init(|| {
- self.load_query("textobjects.scm")
- .map(|query| TextObjectQuery { query })
+ let grammar = self.syntax_config(loader)?.grammar;
+ Self::compile_indent_query(grammar, &self.config)
+ .map_err(|err| {
+ log::error!("{err}");
+ })
+ .ok()
+ .flatten()
})
.as_ref()
}
- pub fn scope(&self) -> &str {
- &self.scope
- }
-
- fn load_query(&self, kind: &str) -> Option<Query> {
- let query_text = read_query(&self.language_id, kind);
- if query_text.is_empty() {
- return None;
+ /// Compiles the textobjects.scm query for a language.
+ /// This function should only be used by this module or the xtask crate.
+ pub fn compile_textobject_query(
+ grammar: Grammar,
+ config: &LanguageConfiguration,
+ ) -> Result<Option<TextObjectQuery>> {
+ let name = &config.language_id;
+ let text = read_query(name, "textobjects.scm");
+ if text.is_empty() {
+ return Ok(None);
}
- let lang = &self.highlight_config.get()?.as_ref()?.language;
- Query::new(lang, &query_text)
- .map_err(|e| {
- log::error!(
- "Failed to parse {} queries for {}: {}",
- kind,
- self.language_id,
- e
- )
- })
- .ok()
+ let query = Query::new(grammar, &text, |_, _| Ok(()))
+ .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?;
+ Ok(Some(TextObjectQuery::new(query)))
}
-}
-
-#[derive(Debug)]
-struct FileTypeGlob {
- glob: globset::Glob,
- language_id: usize,
-}
-impl FileTypeGlob {
- fn new(glob: globset::Glob, language_id: usize) -> Self {
- Self { glob, language_id }
+ fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> {
+ self.textobject_query
+ .get_or_init(|| {
+ let grammar = self.syntax_config(loader)?.grammar;
+ Self::compile_textobject_query(grammar, &self.config)
+ .map_err(|err| {
+ log::error!("{err}");
+ })
+ .ok()
+ .flatten()
+ })
+ .as_ref()
}
-}
-#[derive(Debug)]
-struct FileTypeGlobMatcher {
- matcher: globset::GlobSet,
- file_types: Vec<FileTypeGlob>,
-}
-
-impl Default for FileTypeGlobMatcher {
- fn default() -> Self {
- Self {
- matcher: globset::GlobSet::empty(),
- file_types: Default::default(),
+ fn reconfigure(&self, scopes: &[String]) {
+ if let Some(Some(config)) = self.syntax.get() {
+ reconfigure_highlights(config, scopes);
}
}
}
-impl FileTypeGlobMatcher {
- fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
- let mut builder = globset::GlobSetBuilder::new();
- for file_type in &file_types {
- builder.add(file_type.glob.clone());
+fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) {
+ config.configure(move |capture_name| {
+ let capture_parts: Vec<_> = capture_name.split('.').collect();
+
+ let mut best_index = None;
+ let mut best_match_len = 0;
+ for (i, recognized_name) in recognized_names.iter().enumerate() {
+ let mut len = 0;
+ let mut matches = true;
+ for (i, part) in recognized_name.split('.').enumerate() {
+ match capture_parts.get(i) {
+ Some(capture_part) if *capture_part == part => len += 1,
+ _ => {
+ matches = false;
+ break;
+ }
+ }
+ }
+ if matches && len > best_match_len {
+ best_index = Some(i);
+ best_match_len = len;
+ }
}
-
- Ok(Self {
- matcher: builder.build()?,
- file_types,
- })
- }
-
- fn language_id_for_path(&self, path: &Path) -> Option<&usize> {
- self.matcher
- .matches(path)
- .iter()
- .filter_map(|idx| self.file_types.get(*idx))
- .max_by_key(|file_type| file_type.glob.glob().len())
- .map(|file_type| &file_type.language_id)
- }
+ best_index.map(|idx| Highlight::new(idx as u32))
+ });
}
-// Expose loader as Lazy<> global since it's always static?
+pub fn read_query(lang: &str, query_filename: &str) -> String {
+ tree_house::read_query(lang, |language| {
+ helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default()
+ })
+}
#[derive(Debug, Default)]
pub struct Loader {
- // highlight_names ?
- language_configs: Vec<Arc<LanguageConfiguration>>,
- language_config_ids_by_extension: HashMap<String, usize>, // Vec<usize>
- language_config_ids_glob_matcher: FileTypeGlobMatcher,
- language_config_ids_by_shebang: HashMap<String, usize>,
-
+ languages: Vec<LanguageData>,
+ languages_by_extension: HashMap<String, Language>,
+ languages_by_shebang: HashMap<String, Language>,
+ languages_glob_matcher: FileTypeGlobMatcher,
language_server_configs: HashMap<String, LanguageServerConfiguration>,
-
scopes: ArcSwap<Vec<String>>,
}
@@ -325,96 +208,72 @@ pub type LoaderError = globset::Error;
impl Loader {
pub fn new(config: Configuration) -> Result<Self, LoaderError> {
- let mut language_configs = Vec::new();
- let mut language_config_ids_by_extension = HashMap::new();
- let mut language_config_ids_by_shebang = HashMap::new();
+ let mut languages = Vec::with_capacity(config.language.len());
+ let mut languages_by_extension = HashMap::new();
+ let mut languages_by_shebang = HashMap::new();
let mut file_type_globs = Vec::new();
- for config in config.language {
- // get the next id
- let language_id = language_configs.len();
+ for mut config in config.language {
+ let language = Language(languages.len() as u32);
+ config.language = Some(language);
for file_type in &config.file_types {
- // entry().or_insert(Vec::new).push(language_id);
match file_type {
FileType::Extension(extension) => {
- language_config_ids_by_extension.insert(extension.clone(), language_id);
+ languages_by_extension.insert(extension.clone(), language);
}
FileType::Glob(glob) => {
- file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id));
+ file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language));
}
};
}
for shebang in &config.shebangs {
- language_config_ids_by_shebang.insert(shebang.clone(), language_id);
+ languages_by_shebang.insert(shebang.clone(), language);
}
- language_configs.push(Arc::new(config));
+ languages.push(LanguageData::new(config));
}
Ok(Self {
- language_configs,
- language_config_ids_by_extension,
- language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
- language_config_ids_by_shebang,
+ languages,
+ languages_by_extension,
+ languages_by_shebang,
+ languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?,
language_server_configs: config.language_server,
scopes: ArcSwap::from_pointee(Vec::new()),
})
}
- pub fn language_config_for_file_name(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
- // Find all the language configurations that match this file name
- // or a suffix of the file name.
- let configuration_id = self
- .language_config_ids_glob_matcher
- .language_id_for_path(path)
- .or_else(|| {
- path.extension()
- .and_then(|extension| extension.to_str())
- .and_then(|extension| self.language_config_ids_by_extension.get(extension))
- });
-
- configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
-
- // TODO: content_regex handling conflict resolution
+ pub fn languages(&self) -> impl ExactSizeIterator<Item = (Language, &LanguageData)> {
+ self.languages
+ .iter()
+ .enumerate()
+ .map(|(idx, data)| (Language(idx as u32), data))
}
- pub fn language_config_for_shebang(
- &self,
- source: RopeSlice,
- ) -> Option<Arc<LanguageConfiguration>> {
- let line = Cow::from(source.line(0));
- static SHEBANG_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap());
- let configuration_id = SHEBANG_REGEX
- .captures(&line)
- .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
+ pub fn language_configs(&self) -> impl ExactSizeIterator<Item = &LanguageConfiguration> {
+ self.languages.iter().map(|language| &*language.config)
+ }
- configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
+ pub fn language(&self, lang: Language) -> &LanguageData {
+ &self.languages[lang.idx()]
}
- pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
- self.language_configs
- .iter()
- .find(|config| config.scope == scope)
- .cloned()
+ pub fn language_for_name(&self, name: impl PartialEq<String>) -> Option<Language> {
+ self.languages.iter().enumerate().find_map(|(idx, config)| {
+ (name == config.config.language_id).then_some(Language(idx as u32))
+ })
}
- pub fn language_config_for_language_id(
- &self,
- id: impl PartialEq<String>,
- ) -> Option<Arc<LanguageConfiguration>> {
- self.language_configs
- .iter()
- .find(|config| id.eq(&config.language_id))
- .cloned()
+ pub fn language_for_scope(&self, scope: &str) -> Option<Language> {
+ self.languages.iter().enumerate().find_map(|(idx, config)| {
+ (scope == config.config.scope).then_some(Language(idx as u32))
+ })
}
- /// Unlike `language_config_for_language_id`, which only returns Some for an exact id, this
- /// function will perform a regex match on the given string to find the closest language match.
- pub fn language_config_for_name(&self, slice: RopeSlice) -> Option<Arc<LanguageConfiguration>> {
+ pub fn language_for_match(&self, text: RopeSlice) -> Option<Language> {
// PERF: If the name matches up with the id, then this saves the need to do expensive regex.
- let shortcircuit = self.language_config_for_language_id(slice);
+ let shortcircuit = self.language_for_name(text);
if shortcircuit.is_some() {
return shortcircuit;
}
@@ -423,129 +282,145 @@ impl Loader {
let mut best_match_length = 0;
let mut best_match_position = None;
- for (i, configuration) in self.language_configs.iter().enumerate() {
- if let Some(injection_regex) = &configuration.injection_regex {
- if let Some(mat) = injection_regex.find(slice.regex_input()) {
+ for (idx, data) in self.languages.iter().enumerate() {
+ if let Some(injection_regex) = &data.config.injection_regex {
+ if let Some(mat) = injection_regex.find(text.regex_input()) {
let length = mat.end() - mat.start();
if length > best_match_length {
- best_match_position = Some(i);
+ best_match_position = Some(idx);
best_match_length = length;
}
}
}
}
- best_match_position.map(|i| self.language_configs[i].clone())
+ best_match_position.map(|i| Language(i as u32))
}
- pub fn language_configuration_for_injection_string(
- &self,
- capture: &InjectionLanguageMarker,
- ) -> Option<Arc<LanguageConfiguration>> {
- match capture {
- InjectionLanguageMarker::LanguageId(id) => self.language_config_for_language_id(*id),
- InjectionLanguageMarker::Name(name) => self.language_config_for_name(*name),
- InjectionLanguageMarker::Filename(file) => {
- let path_str: Cow<str> = (*file).into();
- self.language_config_for_file_name(Path::new(path_str.as_ref()))
- }
- InjectionLanguageMarker::Shebang(shebang) => {
- let shebang_str: Cow<str> = (*shebang).into();
- self.language_config_ids_by_shebang
- .get(shebang_str.as_ref())
- .and_then(|&id| self.language_configs.get(id).cloned())
- }
- }
+ pub fn language_for_filename(&self, path: &Path) -> Option<Language> {
+ // Find all the language configurations that match this file name
+ // or a suffix of the file name.
+
+ // TODO: content_regex handling conflict resolution
+ self.languages_glob_matcher
+ .language_for_path(path)
+ .or_else(|| {
+ path.extension()
+ .and_then(|extension| extension.to_str())
+ .and_then(|extension| self.languages_by_extension.get(extension).copied())
+ })
+ }
+
+ pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> {
+ let shebang: Cow<str> = text.into();
+ self.languages_by_shebang.get(shebang.as_ref()).copied()
+ }
+
+ pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> {
+ self.language(lang).indent_query(self)
}
- pub fn language_configs(&self) -> impl Iterator<Item = &Arc<LanguageConfiguration>> {
- self.language_configs.iter()
+ pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> {
+ self.language(lang).textobject_query(self)
}
pub fn language_server_configs(&self) -> &HashMap<String, LanguageServerConfiguration> {
&self.language_server_configs
}
+ pub fn scopes(&self) -> Guard<Arc<Vec<String>>> {
+ self.scopes.load()
+ }
+
pub fn set_scopes(&self, scopes: Vec<String>) {
self.scopes.store(Arc::new(scopes));
// Reconfigure existing grammars
- for config in self
- .language_configs
- .iter()
- .filter(|cfg| cfg.is_highlight_initialized())
- {
- config.reconfigure(&self.scopes());
+ for data in &self.languages {
+ data.reconfigure(&self.scopes());
}
}
+}
- pub fn scopes(&self) -> Guard<Arc<Vec<String>>> {
- self.scopes.load()
+impl LanguageLoader for Loader {
+ fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<Language> {
+ match marker {
+ InjectionLanguageMarker::Name(name) => self.language_for_name(name),
+ InjectionLanguageMarker::Match(text) => self.language_for_match(text),
+ InjectionLanguageMarker::Filename(text) => {
+ let path: Cow<str> = text.into();
+ self.language_for_filename(Path::new(path.as_ref()))
+ }
+ InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text),
+ }
+ }
+
+ fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> {
+ self.languages[lang.idx()].syntax_config(self)
}
}
-pub struct TsParser {
- parser: tree_sitter::Parser,
- pub cursors: Vec<QueryCursor>,
+#[derive(Debug)]
+struct FileTypeGlob {
+ glob: globset::Glob,
+ language: Language,
}
-// could also just use a pool, or a single instance?
-thread_local! {
- pub static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
- parser: Parser::new(),
- cursors: Vec::new(),
- })
+impl FileTypeGlob {
+ pub fn new(glob: globset::Glob, language: Language) -> Self {
+ Self { glob, language }
+ }
}
#[derive(Debug)]
-pub struct Syntax {
- layers: HopSlotMap<LayerId, LanguageLayer>,
- root: LayerId,
- loader: Arc<ArcSwap<Loader>>,
+struct FileTypeGlobMatcher {
+ matcher: globset::GlobSet,
+ file_types: Vec<FileTypeGlob>,
}
-fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
- Cow::from(source.byte_slice(range))
+impl Default for FileTypeGlobMatcher {
+ fn default() -> Self {
+ Self {
+ matcher: globset::GlobSet::empty(),
+ file_types: Default::default(),
+ }
+ }
}
-impl Syntax {
- pub fn new(
- source: RopeSlice,
- config: Arc<HighlightConfiguration>,
- loader: Arc<ArcSwap<Loader>>,
- ) -> Option<Self> {
- let root_layer = LanguageLayer {
- tree: None,
- config,
- depth: 0,
- flags: LayerUpdateFlags::empty(),
- ranges: vec![Range {
- start_byte: 0,
- end_byte: usize::MAX,
- start_point: Point::new(0, 0),
- end_point: Point::new(usize::MAX, usize::MAX),
- }],
- parent: None,
- };
+impl FileTypeGlobMatcher {
+ fn new(file_types: Vec<FileTypeGlob>) -> Result<Self, globset::Error> {
+ let mut builder = globset::GlobSetBuilder::new();
+ for file_type in &file_types {
+ builder.add(file_type.glob.clone());
+ }
- // track scope_descriptor: a Vec of scopes for item in tree
+ Ok(Self {
+ matcher: builder.build()?,
+ file_types,
+ })
+ }
- let mut layers = HopSlotMap::default();
- let root = layers.insert(root_layer);
+ fn language_for_path(&self, path: &Path) -> Option<Language> {
+ self.matcher
+ .matches(path)
+ .iter()
+ .filter_map(|idx| self.file_types.get(*idx))
+ .max_by_key(|file_type| file_type.glob.glob().len())
+ .map(|file_type| file_type.language)
+ }
+}
- let mut syntax = Self {
- root,
- layers,
- loader,
- };
+#[derive(Debug)]
+pub struct Syntax {
+ inner: tree_house::Syntax,
+}
- let res = syntax.update(source, source, &ChangeSet::new(source));
+const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous
- if res.is_err() {
- log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
- return None;
- }
- Some(syntax)
+impl Syntax {
+ pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result<Self, Error> {
+ let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?;
+ Ok(Self { inner })
}
pub fn update(
@@ -553,518 +428,82 @@ impl Syntax {
old_source: RopeSlice,
source: RopeSlice,
changeset: &ChangeSet,
+ loader: &Loader,
) -> Result<(), Error> {
- let mut queue = VecDeque::new();
- queue.push_back(self.root);
-
- let loader = self.loader.load();
- let scopes = loader.scopes.load();
- let injection_callback = |language: &InjectionLanguageMarker| {
- loader
- .language_configuration_for_injection_string(language)
- .and_then(|language_config| language_config.highlight_config(&scopes))
- };
-
- // Convert the changeset into tree sitter edits.
let edits = generate_edits(old_source, changeset);
-
- // This table allows inverse indexing of `layers`.
- // That is by hashing a `Layer` you can find
- // the `LayerId` of an existing equivalent `Layer` in `layers`.
- //
- // It is used to determine if a new layer exists for an injection
- // or if an existing layer needs to be updated.
- let mut layers_table = RawTable::with_capacity(self.layers.len());
- let layers_hasher = RandomState::new();
- // Use the edits to update all layers markers
- fn point_add(a: Point, b: Point) -> Point {
- if b.row > 0 {
- Point::new(a.row.saturating_add(b.row), b.column)
- } else {
- Point::new(0, a.column.saturating_add(b.column))
- }
- }
- fn point_sub(a: Point, b: Point) -> Point {
- if a.row > b.row {
- Point::new(a.row.saturating_sub(b.row), a.column)
- } else {
- Point::new(0, a.column.saturating_sub(b.column))
- }
- }
-
- for (layer_id, layer) in self.layers.iter_mut() {
- // The root layer always covers the whole range (0..usize::MAX)
- if layer.depth == 0 {
- layer.flags = LayerUpdateFlags::MODIFIED;
- continue;
- }
-
- if !edits.is_empty() {
- for range in &mut layer.ranges {
- // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
- for edit in edits.iter().rev() {
- let is_pure_insertion = edit.old_end_byte == edit.start_byte;
-
- // if edit is after range, skip
- if edit.start_byte > range.end_byte {
- // TODO: || (is_noop && edit.start_byte == range.end_byte)
- continue;
- }
-
- // if edit is before range, shift entire range by len
- if edit.old_end_byte < range.start_byte {
- range.start_byte =
- edit.new_end_byte + (range.start_byte - edit.old_end_byte);
- range.start_point = point_add(
- edit.new_end_position,
- point_sub(range.start_point, edit.old_end_position),
- );
-
- range.end_byte = edit
- .new_end_byte
- .saturating_add(range.end_byte - edit.old_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
-
- layer.flags |= LayerUpdateFlags::MOVED;
- }
- // if the edit starts in the space before and extends into the range
- else if edit.start_byte < range.start_byte {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
-
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- // If the edit is an insertion at the start of the tree, shift
- else if edit.start_byte == range.start_byte && is_pure_insertion {
- range.start_byte = edit.new_end_byte;
- range.start_point = edit.new_end_position;
- layer.flags |= LayerUpdateFlags::MOVED;
- } else {
- range.end_byte = range
- .end_byte
- .saturating_sub(edit.old_end_byte)
- .saturating_add(edit.new_end_byte);
- range.end_point = point_add(
- edit.new_end_position,
- point_sub(range.end_point, edit.old_end_position),
- );
- layer.flags = LayerUpdateFlags::MODIFIED;
- }
- }
- }
- }
-
- let hash = layers_hasher.hash_one(layer);
- // Safety: insert_no_grow is unsafe because it assumes that the table
- // has enough capacity to hold additional elements.
- // This is always the case as we reserved enough capacity above.
- unsafe { layers_table.insert_no_grow(hash, layer_id) };
- }
-
- PARSER.with(|ts_parser| {
- let ts_parser = &mut ts_parser.borrow_mut();
- ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
- let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
- // TODO: might need to set cursor range
- cursor.set_byte_range(0..usize::MAX);
- cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let source_slice = source.slice(..);
-
- while let Some(layer_id) = queue.pop_front() {
- let layer = &mut self.layers[layer_id];
-
- // Mark the layer as touched
- layer.flags |= LayerUpdateFlags::TOUCHED;
-
- // If a tree already exists, notify it of changes.
- if let Some(tree) = &mut layer.tree {
- if layer
- .flags
- .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
- {
- for edit in edits.iter().rev() {
- // Apply the edits in reverse.
- // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
- tree.edit(edit);
- }
- }
-
- if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
- // Re-parse the tree.
- layer.parse(&mut ts_parser.parser, source)?;
- }
- } else {
- // always parse if this layer has never been parsed before
- layer.parse(&mut ts_parser.parser, source)?;
- }
-
- // Switch to an immutable borrow.
- let layer = &self.layers[layer_id];
-
- // Process injections.
- let matches = cursor.matches(
- &layer.config.injections_query,
- layer.tree().root_node(),
- RopeProvider(source_slice),
- );
- let mut combined_injections = vec![
- (None, Vec::new(), IncludedChildren::default());
- layer.config.combined_injections_patterns.len()
- ];
- let mut injections = Vec::new();
- let mut last_injection_end = 0;
- for mat in matches {
- let (injection_capture, content_node, included_children) = layer
- .config
- .injection_for_match(&layer.config.injections_query, &mat, source_slice);
-
- // in case this is a combined injection save it for more processing later
- if let Some(combined_injection_idx) = layer
- .config
- .combined_injections_patterns
- .iter()
- .position(|&pattern| pattern == mat.pattern_index)
- {
- let entry = &mut combined_injections[combined_injection_idx];
- if injection_capture.is_some() {
- entry.0 = injection_capture;
- }
- if let Some(content_node) = content_node {
- if content_node.start_byte() >= last_injection_end {
- entry.1.push(content_node);
- last_injection_end = content_node.end_byte();
- }
- }
- entry.2 = included_children;
- continue;
- }
-
- // Explicitly remove this match so that none of its other captures will remain
- // in the stream of captures.
- mat.remove();
-
- // If a language is found with the given name, then add a new language layer
- // to the highlighted document.
- if let (Some(injection_capture), Some(content_node)) =
- (injection_capture, content_node)
- {
- if let Some(config) = (injection_callback)(&injection_capture) {
- let ranges =
- intersect_ranges(&layer.ranges, &[content_node], included_children);
-
- if !ranges.is_empty() {
- if content_node.start_byte() < last_injection_end {
- continue;
- }
- last_injection_end = content_node.end_byte();
- injections.push((config, ranges));
- }
- }
- }
- }
-
- for (lang_name, content_nodes, included_children) in combined_injections {
- if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
- if let Some(config) = (injection_callback)(&lang_name) {
- let ranges =
- intersect_ranges(&layer.ranges, &content_nodes, included_children);
- if !ranges.is_empty() {
- injections.push((config, ranges));
- }
- }
- }
- }
-
- let depth = layer.depth + 1;
- // TODO: can't inline this since matches borrows self.layers
- for (config, ranges) in injections {
- let parent = Some(layer_id);
- let new_layer = LanguageLayer {
- tree: None,
- config,
- depth,
- ranges,
- flags: LayerUpdateFlags::empty(),
- parent: None,
- };
-
- // Find an identical existing layer
- let layer = layers_table
- .get(layers_hasher.hash_one(&new_layer), |&it| {
- self.layers[it] == new_layer
- })
- .copied();
-
- // ...or insert a new one.
- let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
- self.layers[layer_id].parent = parent;
-
- queue.push_back(layer_id);
- }
-
- // TODO: pre-process local scopes at this time, rather than highlight?
- // would solve problems with locals not working across boundaries
- }
-
- // Return the cursor back in the pool.
- ts_parser.cursors.push(cursor);
-
- // Reset all `LayerUpdateFlags` and remove all untouched layers
- self.layers.retain(|_, layer| {
- replace(&mut layer.flags, LayerUpdateFlags::empty())
- .contains(LayerUpdateFlags::TOUCHED)
- });
-
+ if edits.is_empty() {
Ok(())
- })
+ } else {
+ self.inner.update(source, PARSE_TIMEOUT, &edits, loader)
+ }
}
- pub fn tree(&self) -> &Tree {
- self.layers[self.root].tree()
+ pub fn layer(&self, layer: Layer) -> &tree_house::LayerData {
+ self.inner.layer(layer)
}
- /// Iterate over the highlighted regions for a given slice of source code.
- pub fn highlight_iter<'a>(
- &'a self,
- source: RopeSlice<'a>,
- range: Option<std::ops::Range<usize>>,
- cancellation_flag: Option<&'a AtomicUsize>,
- ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
- let mut layers = self
- .layers
- .iter()
- .filter_map(|(_, layer)| {
- // TODO: if range doesn't overlap layer range, skip it
-
- // Reuse a cursor from the pool if available.
- let mut cursor = PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.pop().unwrap_or_default()
- });
-
- // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
- // prevents them from being moved. But both of these values are really just
- // pointers, so it's actually ok to move them.
- let cursor_ref = unsafe {
- mem::transmute::<&mut tree_sitter::QueryCursor, &mut tree_sitter::QueryCursor>(
- &mut cursor,
- )
- };
-
- // if reusing cursors & no range this resets to whole range
- cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
- cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
-
- let mut captures = cursor_ref
- .captures(
- &layer.config.query,
- layer.tree().root_node(),
- RopeProvider(source),
- )
- .peekable();
-
- // If there's no captures, skip the layer
- captures.peek()?;
-
- Some(HighlightIterLayer {
- highlight_end_stack: Vec::new(),
- scope_stack: vec![LocalScope {
- inherits: false,
- range: 0..usize::MAX,
- local_defs: Vec::new(),
- }],
- cursor,
- _tree: None,
- captures: RefCell::new(captures),
- config: layer.config.as_ref(), // TODO: just reuse `layer`
- depth: layer.depth, // TODO: just reuse `layer`
- })
- })
- .collect::<Vec<_>>();
-
- layers.sort_unstable_by_key(|layer| layer.sort_key());
-
- let mut result = HighlightIter {
- source,
- byte_offset: range.map_or(0, |r| r.start),
- cancellation_flag,
- iter_count: 0,
- layers,
- next_event: None,
- last_highlight_range: None,
- };
- result.sort_layers();
- result
+ pub fn root_layer(&self) -> Layer {
+ self.inner.root()
}
- pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {
- let mut container_id = self.root;
-
- for (layer_id, layer) in self.layers.iter() {
- if layer.depth > self.layers[container_id].depth
- && layer.contains_byte_range(start, end)
- {
- container_id = layer_id;
- }
- }
-
- self.layers[container_id].tree()
+ pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer {
+ self.inner.layer_for_byte_range(start, end)
}
- pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
- self.tree_for_byte_range(start, end)
- .root_node()
- .named_descendant_for_byte_range(start, end)
+ pub fn root_language(&self) -> Language {
+ self.layer(self.root_layer()).language
}
- pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {
- self.tree_for_byte_range(start, end)
- .root_node()
- .descendant_for_byte_range(start, end)
+ pub fn tree(&self) -> &Tree {
+ self.inner.tree()
}
- pub fn walk(&self) -> TreeCursor<'_> {
- // data structure to find the smallest range that contains a point
- // when some of the ranges in the structure can overlap.
- TreeCursor::new(&self.layers, self.root)
+ pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree {
+ self.inner.tree_for_byte_range(start, end)
}
- // Commenting
- // comment_strings_for_pos
- // is_commented
-
- // Indentation
- // suggested_indent_for_line_at_buffer_row
- // suggested_indent_for_buffer_row
- // indent_level_for_line
-
- // TODO: Folding
-}
-
-bitflags! {
- /// Flags that track the status of a layer
- /// in the `Sytaxn::update` function
- #[derive(Debug)]
- struct LayerUpdateFlags : u32{
- const MODIFIED = 0b001;
- const MOVED = 0b010;
- const TOUCHED = 0b100;
+ pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> {
+ self.inner.named_descendant_for_byte_range(start, end)
}
-}
-
-#[derive(Debug)]
-pub struct LanguageLayer {
- // mode
- // grammar
- pub config: Arc<HighlightConfiguration>,
- pub(crate) tree: Option<Tree>,
- pub ranges: Vec<Range>,
- pub depth: u32,
- flags: LayerUpdateFlags,
- parent: Option<LayerId>,
-}
-/// This PartialEq implementation only checks if that
-/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
-/// It does not check whether the layers have the same internal treesitter
-/// state.
-impl PartialEq for LanguageLayer {
- fn eq(&self, other: &Self) -> bool {
- self.depth == other.depth
- && self.config.language == other.config.language
- && self.ranges == other.ranges
+ pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Node> {
+ self.inner.descendant_for_byte_range(start, end)
}
-}
-/// Hash implementation belongs to PartialEq implementation above.
-/// See its documentation for details.
-impl Hash for LanguageLayer {
- fn hash<H: Hasher>(&self, state: &mut H) {
- self.depth.hash(state);
- self.config.language.hash(state);
- self.ranges.hash(state);
+ pub fn walk(&self) -> TreeCursor {
+ self.inner.walk()
}
-}
-impl LanguageLayer {
- pub fn tree(&self) -> &Tree {
- // TODO: no unwrap
- self.tree.as_ref().unwrap()
- }
-
- fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
- parser
- .set_included_ranges(&self.ranges)
- .map_err(|_| Error::InvalidRanges)?;
-
- parser
- .set_language(&self.config.language)
- .map_err(|_| Error::InvalidLanguage)?;
-
- // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
- let tree = parser
- .parse_with(
- &mut |byte, _| {
- if byte <= source.len_bytes() {
- let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
- &chunk.as_bytes()[byte - start_byte..]
- } else {
- // out of range
- &[]
- }
- },
- self.tree.as_ref(),
- )
- .ok_or(Error::Cancelled)?;
- // unsafe { ts_parser.parser.set_cancellation_flag(None) };
- self.tree = Some(tree);
- Ok(())
+ pub fn highlighter<'a>(
+ &'a self,
+ source: RopeSlice<'a>,
+ loader: &'a Loader,
+ range: impl RangeBounds<u32>,
+ ) -> Highlighter<'a> {
+ Highlighter::new(&self.inner, source, loader, range)
}
- /// Whether the layer contains the given byte range.
- ///
- /// If the layer has multiple ranges (i.e. combined injections), the
- /// given range is considered contained if it is within the start and
- /// end bytes of the first and last ranges **and** if the given range
- /// starts or ends within any of the layer's ranges.
- fn contains_byte_range(&self, start: usize, end: usize) -> bool {
- let layer_start = self
- .ranges
- .first()
- .expect("ranges should not be empty")
- .start_byte;
- let layer_end = self
- .ranges
- .last()
- .expect("ranges should not be empty")
- .end_byte;
-
- layer_start <= start
- && layer_end >= end
- && self.ranges.iter().any(|range| {
- let byte_range = range.start_byte..range.end_byte;
- byte_range.contains(&start) || byte_range.contains(&end)
- })
+ pub fn query_iter<'a, QueryLoader, LayerState, Range>(
+ &'a self,
+ source: RopeSlice<'a>,
+ loader: QueryLoader,
+ range: Range,
+ ) -> QueryIter<'a, 'a, QueryLoader, LayerState>
+ where
+ QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a,
+ LayerState: Default,
+ Range: RangeBounds<u32>,
+ {
+ QueryIter::new(&self.inner, source, loader, range)
}
}
-pub(crate) fn generate_edits(
- old_text: RopeSlice,
- changeset: &ChangeSet,
-) -> Vec<tree_sitter::InputEdit> {
- use Operation::*;
+pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>;
+
+fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec<InputEdit> {
+ use crate::Operation::*;
+ use tree_sitter::Point;
+
let mut old_pos = 0;
let mut edits = Vec::new();
@@ -1076,35 +515,6 @@ pub(crate) fn generate_edits(
let mut iter = changeset.changes.iter().peekable();
// TODO; this is a lot easier with Change instead of Operation.
-
- fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) {
- let byte = text.char_to_byte(pos); // <- attempted to index past end
- let line = text.char_to_line(pos);
- let line_start_byte = text.line_to_byte(line);
- let col = byte - line_start_byte;
-
- (byte, Point::new(line, col))
- }
-
- fn traverse(point: Point, text: &Tendril) -> Point {
- let Point {
- mut row,
- mut column,
- } = point;
-
- // TODO: there should be a better way here.
- let mut chars = text.chars().peekable();
- while let Some(ch) = chars.next() {
- if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
- row += 1;
- column = 0;
- } else {
- column += 1;
- }
- }
- Point { row, column }
- }
-
while let Some(change) = iter.next() {
let len = match change {
Delete(i) | Retain(i) => *i,
@@ -1115,47 +525,47 @@ pub(crate) fn generate_edits(
match change {
Retain(_) => {}
Delete(_) => {
- let (start_byte, start_position) = point_at_pos(old_text, old_pos);
- let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
+ let start_byte = old_text.char_to_byte(old_pos) as u32;
+ let old_end_byte = old_text.char_to_byte(old_end) as u32;
// deletion
- edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte, // old_end to byte
- new_end_byte: start_byte, // old_pos to byte
- start_position, // old pos to coords
- old_end_position, // old_end to coords
- new_end_position: start_position, // old pos to coords
+ edits.push(InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte, // old_pos to byte
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO,
});
}
Insert(s) => {
- let (start_byte, start_position) = point_at_pos(old_text, old_pos);
+ let start_byte = old_text.char_to_byte(old_pos) as u32;
// a subsequent delete means a replace, consume it
if let Some(Delete(len)) = iter.peek() {
old_end = old_pos + len;
- let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end);
+ let old_end_byte = old_text.char_to_byte(old_end) as u32;
iter.next();
// replacement
- edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte, // old_end to byte
- new_end_byte: start_byte + s.len(), // old_pos to byte + s.len()
- start_position, // old pos to coords
- old_end_position, // old_end to coords
- new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
+ edits.push(InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte, // old_end to byte
+ new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len()
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO,
});
} else {
// insert
- edits.push(tree_sitter::InputEdit {
- start_byte, // old_pos to byte
- old_end_byte: start_byte, // same
- new_end_byte: start_byte + s.len(), // old_pos + s.len()
- start_position, // old pos to coords
- old_end_position: start_position, // same
- new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over)
+ edits.push(InputEdit {
+ start_byte, // old_pos to byte
+ old_end_byte: start_byte, // same
+ new_end_byte: start_byte + s.len() as u32, // old_pos + s.len()
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO,
});
}
}
@@ -1165,949 +575,295 @@ pub(crate) fn generate_edits(
edits
}
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{iter, mem, ops, str};
-use tree_sitter::{
- Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
- QueryMatch, Range, TextProvider, Tree,
-};
-
-const CANCELLATION_CHECK_INTERVAL: usize = 100;
-
-/// Indicates which highlight should be applied to a region of source code.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub struct Highlight(pub usize);
-
-/// Represents the reason why syntax highlighting failed.
-#[derive(Debug, PartialEq, Eq)]
-pub enum Error {
- Cancelled,
- InvalidLanguage,
- InvalidRanges,
- Unknown,
-}
-
-/// Represents a single step in rendering a syntax-highlighted document.
-#[derive(Copy, Clone, Debug)]
-pub enum HighlightEvent {
- Source { start: usize, end: usize },
- HighlightStart(Highlight),
- HighlightEnd,
-}
-
-/// Contains the data needed to highlight code written in a particular language.
+/// A set of "overlay" highlights and ranges they apply to.
///
-/// This struct is immutable and can be shared between threads.
+/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights.
#[derive(Debug)]
-pub struct HighlightConfiguration {
- pub language: Grammar,
- pub query: Query,
- injections_query: Query,
- combined_injections_patterns: Vec<usize>,
- highlights_pattern_index: usize,
- highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
- non_local_variable_patterns: Vec<bool>,
- injection_content_capture_index: Option<u32>,
- injection_language_capture_index: Option<u32>,
- injection_filename_capture_index: Option<u32>,
- injection_shebang_capture_index: Option<u32>,
- local_scope_capture_index: Option<u32>,
- local_def_capture_index: Option<u32>,
- local_def_value_capture_index: Option<u32>,
- local_ref_capture_index: Option<u32>,
-}
-
-#[derive(Debug)]
-struct LocalDef<'a> {
- name: Cow<'a, str>,
- value_range: ops::Range<usize>,
- highlight: Option<Highlight>,
-}
-
-#[derive(Debug)]
-struct LocalScope<'a> {
- inherits: bool,
- range: ops::Range<usize>,
- local_defs: Vec<LocalDef<'a>>,
-}
-
-#[derive(Debug)]
-struct HighlightIter<'a> {
- source: RopeSlice<'a>,
- byte_offset: usize,
- cancellation_flag: Option<&'a AtomicUsize>,
- layers: Vec<HighlightIterLayer<'a>>,
- iter_count: usize,
- next_event: Option<HighlightEvent>,
- last_highlight_range: Option<(usize, usize, u32)>,
-}
-
-// Adapter to convert rope chunks to bytes
-pub struct ChunksBytes<'a> {
- chunks: ropey::iter::Chunks<'a>,
-}
-impl<'a> Iterator for ChunksBytes<'a> {
- type Item = &'a [u8];
- fn next(&mut self) -> Option<Self::Item> {
- self.chunks.next().map(str::as_bytes)
- }
+pub enum OverlayHighlights {
+ /// All highlights use a single `Highlight`.
+ ///
+ /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in
+ /// the future though.
+ Homogeneous {
+ highlight: Highlight,
+ ranges: Vec<ops::Range<usize>>,
+ },
+ /// A collection of different highlights for given ranges.
+ ///
+ /// Note that the ranges **must be non-overlapping**.
+ Heterogenous {
+ highlights: Vec<(Highlight, ops::Range<usize>)>,
+ },
}
-pub struct RopeProvider<'a>(pub RopeSlice<'a>);
-impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> {
- type I = ChunksBytes<'a>;
-
- fn text(&mut self, node: Node) -> Self::I {
- let fragment = self.0.byte_slice(node.start_byte()..node.end_byte());
- ChunksBytes {
- chunks: fragment.chunks(),
+impl OverlayHighlights {
+ pub fn single(highlight: Highlight, range: ops::Range<usize>) -> Self {
+ Self::Homogeneous {
+ highlight,
+ ranges: vec![range],
}
}
-}
-
-struct HighlightIterLayer<'a> {
- _tree: Option<Tree>,
- cursor: QueryCursor,
- captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
- config: &'a HighlightConfiguration,
- highlight_end_stack: Vec<usize>,
- scope_stack: Vec<LocalScope<'a>>,
- depth: u32,
-}
-impl fmt::Debug for HighlightIterLayer<'_> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("HighlightIterLayer").finish()
+ fn is_empty(&self) -> bool {
+ match self {
+ Self::Homogeneous { ranges, .. } => ranges.is_empty(),
+ Self::Heterogenous { highlights } => highlights.is_empty(),
+ }
}
}
-impl HighlightConfiguration {
- /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
- /// queries.
- ///
- /// # Parameters
+#[derive(Debug)]
+struct Overlay {
+ highlights: OverlayHighlights,
+ /// The position of the highlighter into the Vec of ranges of the overlays.
///
- /// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
- /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
- /// should be non-empty, otherwise no syntax highlights will be added.
- /// * `injections_query` - A string containing tree patterns for injecting other languages
- /// into the document. This can be empty if no injections are desired.
- /// * `locals_query` - A string containing tree patterns for tracking local variable
- /// definitions and references. This can be empty if local variable tracking is not needed.
+ /// Used by the `OverlayHighlighter`.
+ idx: usize,
+ /// The currently active highlight (and the ending character index) for this overlay.
///
- /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
- pub fn new(
- language: Grammar,
- highlights_query: &str,
- injection_query: &str,
- locals_query: &str,
- ) -> Result<Self, QueryError> {
- // Concatenate the query strings, keeping track of the start offset of each section.
- let mut query_source = String::new();
- query_source.push_str(locals_query);
- let highlights_query_offset = query_source.len();
- query_source.push_str(highlights_query);
-
- // Construct a single query by concatenating the three query strings, but record the
- // range of pattern indices that belong to each individual string.
- let query = Query::new(&language, &query_source)?;
- let mut highlights_pattern_index = 0;
- for i in 0..(query.pattern_count()) {
- let pattern_offset = query.start_byte_for_pattern(i);
- if pattern_offset < highlights_query_offset {
- highlights_pattern_index += 1;
- }
- }
-
- let injections_query = Query::new(&language, injection_query)?;
- let combined_injections_patterns = (0..injections_query.pattern_count())
- .filter(|&i| {
- injections_query
- .property_settings(i)
- .iter()
- .any(|s| &*s.key == "injection.combined")
- })
- .collect();
-
- // Find all of the highlighting patterns that are disabled for nodes that
- // have been identified as local variables.
- let non_local_variable_patterns = (0..query.pattern_count())
- .map(|i| {
- query
- .property_predicates(i)
- .iter()
- .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
- })
- .collect();
-
- // Store the numeric ids for all of the special captures.
- let mut injection_content_capture_index = None;
- let mut injection_language_capture_index = None;
- let mut injection_filename_capture_index = None;
- let mut injection_shebang_capture_index = None;
- let mut local_def_capture_index = None;
- let mut local_def_value_capture_index = None;
- let mut local_ref_capture_index = None;
- let mut local_scope_capture_index = None;
- for (i, name) in query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "local.definition" => local_def_capture_index = i,
- "local.definition-value" => local_def_value_capture_index = i,
- "local.reference" => local_ref_capture_index = i,
- "local.scope" => local_scope_capture_index = i,
- _ => {}
- }
- }
-
- for (i, name) in injections_query.capture_names().iter().enumerate() {
- let i = Some(i as u32);
- match *name {
- "injection.content" => injection_content_capture_index = i,
- "injection.language" => injection_language_capture_index = i,
- "injection.filename" => injection_filename_capture_index = i,
- "injection.shebang" => injection_shebang_capture_index = i,
- _ => {}
- }
- }
+ /// Used by the `OverlayHighlighter`.
+ active_highlight: Option<(Highlight, usize)>,
+}
- let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]);
- Ok(Self {
- language,
- query,
- injections_query,
- combined_injections_patterns,
- highlights_pattern_index,
- highlight_indices,
- non_local_variable_patterns,
- injection_content_capture_index,
- injection_language_capture_index,
- injection_filename_capture_index,
- injection_shebang_capture_index,
- local_scope_capture_index,
- local_def_capture_index,
- local_def_value_capture_index,
- local_ref_capture_index,
+impl Overlay {
+ fn new(highlights: OverlayHighlights) -> Option<Self> {
+ (!highlights.is_empty()).then_some(Self {
+ highlights,
+ idx: 0,
+ active_highlight: None,
})
}
- /// Get a slice containing all of the highlight names used in the configuration.
- pub fn names(&self) -> &[&str] {
- self.query.capture_names()
- }
-
- /// Set the list of recognized highlight names.
- ///
- /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
- /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
- /// these queries can choose to recognize highlights with different levels of specificity.
- /// For example, the string `function.builtin` will match against `function.builtin.constructor`
- /// but will not match `function.method.builtin` and `function.method`.
- ///
- /// When highlighting, results are returned as `Highlight` values, which contain the index
- /// of the matched highlight this list of highlight names.
- pub fn configure(&self, recognized_names: &[String]) {
- let mut capture_parts = Vec::new();
- let indices: Vec<_> = self
- .query
- .capture_names()
- .iter()
- .map(move |capture_name| {
- capture_parts.clear();
- capture_parts.extend(capture_name.split('.'));
-
- let mut best_index = None;
- let mut best_match_len = 0;
- for (i, recognized_name) in recognized_names.iter().enumerate() {
- let mut len = 0;
- let mut matches = true;
- for (i, part) in recognized_name.split('.').enumerate() {
- match capture_parts.get(i) {
- Some(capture_part) if *capture_part == part => len += 1,
- _ => {
- matches = false;
- break;
- }
- }
- }
- if matches && len > best_match_len {
- best_index = Some(i);
- best_match_len = len;
- }
- }
- best_index.map(Highlight)
- })
- .collect();
-
- self.highlight_indices.store(Arc::new(indices));
- }
-
- fn injection_pair<'a>(
- &self,
- query_match: &QueryMatch<'a, 'a>,
- source: RopeSlice<'a>,
- ) -> (Option<InjectionLanguageMarker<'a>>, Option<Node<'a>>) {
- let mut injection_capture = None;
- let mut content_node = None;
-
- for capture in query_match.captures {
- let index = Some(capture.index);
- if index == self.injection_language_capture_index {
- injection_capture = Some(InjectionLanguageMarker::Name(
- source.byte_slice(capture.node.byte_range()),
- ));
- } else if index == self.injection_filename_capture_index {
- injection_capture = Some(InjectionLanguageMarker::Filename(
- source.byte_slice(capture.node.byte_range()),
- ));
- } else if index == self.injection_shebang_capture_index {
- let node_slice = source.byte_slice(capture.node.byte_range());
-
- // some languages allow space and newlines before the actual string content
- // so a shebang could be on either the first or second line
- let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
- node_slice.byte_slice(..end)
- } else {
- node_slice
- };
-
- static SHEBANG_REGEX: Lazy<rope::Regex> =
- Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
-
- injection_capture = SHEBANG_REGEX
- .captures_iter(lines.regex_input())
- .map(|cap| {
- let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
- InjectionLanguageMarker::Shebang(cap)
- })
- .next()
- } else if index == self.injection_content_capture_index {
- content_node = Some(capture.node);
- }
- }
- (injection_capture, content_node)
- }
-
- fn injection_for_match<'a>(
- &self,
- query: &'a Query,
- query_match: &QueryMatch<'a, 'a>,
- source: RopeSlice<'a>,
- ) -> (
- Option<InjectionLanguageMarker<'a>>,
- Option<Node<'a>>,
- IncludedChildren,
- ) {
- let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
-
- let mut included_children = IncludedChildren::default();
- for prop in query.property_settings(query_match.pattern_index) {
- match prop.key.as_ref() {
- // In addition to specifying the language name via the text of a
- // captured node, it can also be hard-coded via a `#set!` predicate
- // that sets the injection.language key.
- "injection.language" if injection_capture.is_none() => {
- injection_capture = prop
- .value
- .as_deref()
- .map(InjectionLanguageMarker::LanguageId);
- }
-
- // By default, injections do not include the *children* of an
- // `injection.content` node - only the ranges that belong to the
- // node itself. This can be changed using a `#set!` predicate that
- // sets the `injection.include-children` key.
- "injection.include-children" => included_children = IncludedChildren::All,
-
- // Some queries might only exclude named children but include unnamed
- // children in their `injection.content` node. This can be enabled using
- // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
- "injection.include-unnamed-children" => {
- included_children = IncludedChildren::Unnamed
- }
- _ => {}
- }
+ fn current(&self) -> Option<(Highlight, ops::Range<usize>)> {
+ match &self.highlights {
+ OverlayHighlights::Homogeneous { highlight, ranges } => ranges
+ .get(self.idx)
+ .map(|range| (*highlight, range.clone())),
+ OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(),
}
-
- (injection_capture, content_node, included_children)
}
-}
-impl HighlightIterLayer<'_> {
- // First, sort scope boundaries by their byte offset in the document. At a
- // given position, emit scope endings before scope beginnings. Finally, emit
- // scope boundaries from deeper layers first.
- fn sort_key(&self) -> Option<(usize, bool, isize)> {
- let depth = -(self.depth as isize);
- let next_start = self
- .captures
- .borrow_mut()
- .peek()
- .map(|(m, i)| m.captures[*i].node.start_byte());
- let next_end = self.highlight_end_stack.last().cloned();
- match (next_start, next_end) {
- (Some(start), Some(end)) => {
- if start < end {
- Some((start, true, depth))
- } else {
- Some((end, false, depth))
- }
+ fn start(&self) -> Option<usize> {
+ match &self.highlights {
+ OverlayHighlights::Homogeneous { ranges, .. } => {
+ ranges.get(self.idx).map(|range| range.start)
}
- (Some(i), None) => Some((i, true, depth)),
- (None, Some(j)) => Some((j, false, depth)),
- _ => None,
+ OverlayHighlights::Heterogenous { highlights } => highlights
+ .get(self.idx)
+ .map(|(_highlight, range)| range.start),
}
}
}
-#[derive(Clone)]
-enum IncludedChildren {
- None,
- All,
- Unnamed,
-}
-
-impl Default for IncludedChildren {
- fn default() -> Self {
- Self::None
- }
+/// A collection of highlights to apply when rendering which merge on top of syntax highlights.
+#[derive(Debug)]
+pub struct OverlayHighlighter {
+ overlays: Vec<Overlay>,
+ next_highlight_start: usize,
+ next_highlight_end: usize,
}
-// Compute the ranges that should be included when parsing an injection.
-// This takes into account three things:
-// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
-// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
-// are the ranges of those nodes.
-// * `includes_children` - For some injections, the content nodes' children should be
-// excluded from the nested document, so that only the content nodes' *own* content
-// is reparsed. For other injections, the content nodes' entire ranges should be
-// reparsed, including the ranges of their children.
-fn intersect_ranges(
- parent_ranges: &[Range],
- nodes: &[Node],
- included_children: IncludedChildren,
-) -> Vec<Range> {
- let mut cursor = nodes[0].walk();
- let mut result = Vec::new();
- let mut parent_range_iter = parent_ranges.iter();
- let mut parent_range = parent_range_iter
- .next()
- .expect("Layers should only be constructed with non-empty ranges vectors");
- for node in nodes.iter() {
- let mut preceding_range = Range {
- start_byte: 0,
- start_point: Point::new(0, 0),
- end_byte: node.start_byte(),
- end_point: node.start_position(),
- };
- let following_range = Range {
- start_byte: node.end_byte(),
- start_point: node.end_position(),
- end_byte: usize::MAX,
- end_point: Point::new(usize::MAX, usize::MAX),
- };
-
- for excluded_range in node
- .children(&mut cursor)
- .filter_map(|child| match included_children {
- IncludedChildren::None => Some(child.range()),
- IncludedChildren::All => None,
- IncludedChildren::Unnamed => {
- if child.is_named() {
- Some(child.range())
- } else {
- None
- }
- }
- })
- .chain([following_range].iter().cloned())
- {
- let mut range = Range {
- start_byte: preceding_range.end_byte,
- start_point: preceding_range.end_point,
- end_byte: excluded_range.start_byte,
- end_point: excluded_range.start_point,
- };
- preceding_range = excluded_range;
-
- if range.end_byte < parent_range.start_byte {
- continue;
- }
-
- while parent_range.start_byte <= range.end_byte {
- if parent_range.end_byte > range.start_byte {
- if range.start_byte < parent_range.start_byte {
- range.start_byte = parent_range.start_byte;
- range.start_point = parent_range.start_point;
- }
-
- if parent_range.end_byte < range.end_byte {
- if range.start_byte < parent_range.end_byte {
- result.push(Range {
- start_byte: range.start_byte,
- start_point: range.start_point,
- end_byte: parent_range.end_byte,
- end_point: parent_range.end_point,
- });
- }
- range.start_byte = parent_range.end_byte;
- range.start_point = parent_range.end_point;
- } else {
- if range.start_byte < range.end_byte {
- result.push(range);
- }
- break;
- }
- }
+impl OverlayHighlighter {
+ pub fn new(overlays: impl IntoIterator<Item = OverlayHighlights>) -> Self {
+ let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect();
+ let next_highlight_start = overlays
+ .iter()
+ .filter_map(|overlay| overlay.start())
+ .min()
+ .unwrap_or(usize::MAX);
- if let Some(next_range) = parent_range_iter.next() {
- parent_range = next_range;
- } else {
- return result;
- }
- }
+ Self {
+ overlays,
+ next_highlight_start,
+ next_highlight_end: usize::MAX,
}
}
- result
-}
-impl HighlightIter<'_> {
- fn emit_event(
- &mut self,
- offset: usize,
- event: Option<HighlightEvent>,
- ) -> Option<Result<HighlightEvent, Error>> {
- let result;
- if self.byte_offset < offset {
- result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: offset,
- }));
- self.byte_offset = offset;
- self.next_event = event;
- } else {
- result = event.map(Ok);
- }
- self.sort_layers();
- result
- }
-
- fn sort_layers(&mut self) {
- while !self.layers.is_empty() {
- if let Some(sort_key) = self.layers[0].sort_key() {
- let mut i = 0;
- while i + 1 < self.layers.len() {
- if let Some(next_offset) = self.layers[i + 1].sort_key() {
- if next_offset < sort_key {
- i += 1;
- continue;
- }
- } else {
- let layer = self.layers.remove(i + 1);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- break;
- }
- if i > 0 {
- self.layers[0..(i + 1)].rotate_left(1);
- }
- break;
- } else {
- let layer = self.layers.remove(0);
- PARSER.with(|ts_parser| {
- let highlighter = &mut ts_parser.borrow_mut();
- highlighter.cursors.push(layer.cursor);
- });
- }
- }
+ /// The current position in the overlay highlights.
+ ///
+ /// This method is meant to be used when treating this type as a cursor over the overlay
+ /// highlights.
+ ///
+ /// `usize::MAX` is returned when there are no more overlay highlights.
+ pub fn next_event_offset(&self) -> usize {
+ self.next_highlight_start.min(self.next_highlight_end)
}
-}
-
-impl Iterator for HighlightIter<'_> {
- type Item = Result<HighlightEvent, Error>;
-
- fn next(&mut self) -> Option<Self::Item> {
- 'main: loop {
- // If we've already determined the next highlight boundary, just return it.
- if let Some(e) = self.next_event.take() {
- return Some(Ok(e));
- }
-
- // Periodically check for cancellation, returning `Cancelled` error if the
- // cancellation flag was flipped.
- if let Some(cancellation_flag) = self.cancellation_flag {
- self.iter_count += 1;
- if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
- self.iter_count = 0;
- if cancellation_flag.load(Ordering::Relaxed) != 0 {
- return Some(Err(Error::Cancelled));
- }
- }
- }
-
- // If none of the layers have any more highlight boundaries, terminate.
- if self.layers.is_empty() {
- let len = self.source.len_bytes();
- return if self.byte_offset < len {
- let result = Some(Ok(HighlightEvent::Source {
- start: self.byte_offset,
- end: len,
- }));
- self.byte_offset = len;
- result
- } else {
- None
- };
- }
-
- // Get the next capture from whichever layer has the earliest highlight boundary.
- let range;
- let layer = &mut self.layers[0];
- let captures = layer.captures.get_mut();
- if let Some((next_match, capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*capture_index];
- range = next_capture.node.byte_range();
-
- // If any previous highlight ends before this node starts, then before
- // processing this capture, emit the source code up until the end of the
- // previous highlight, and an end event for that highlight.
- if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- if end_byte <= range.start {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- }
- }
- }
- // If there are no more captures, then emit any remaining highlight end events.
- // And if there are none of those, then just advance to the end of the document.
- else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
- layer.highlight_end_stack.pop();
- return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
- } else {
- return self.emit_event(self.source.len_bytes(), None);
- };
-
- let (mut match_, capture_index) = captures.next().unwrap();
- let mut capture = match_.captures[capture_index];
-
- // Remove from the local scope stack any local scopes that have already ended.
- while range.start > layer.scope_stack.last().unwrap().range.end {
- layer.scope_stack.pop();
- }
-
- // If this capture is for tracking local variables, then process the
- // local variable info.
- let mut reference_highlight = None;
- let mut definition_highlight = None;
- while match_.pattern_index < layer.config.highlights_pattern_index {
- // If the node represents a local scope, push a new local scope onto
- // the scope stack.
- if Some(capture.index) == layer.config.local_scope_capture_index {
- definition_highlight = None;
- let mut scope = LocalScope {
- inherits: true,
- range: range.clone(),
- local_defs: Vec::new(),
- };
- for prop in layer.config.query.property_settings(match_.pattern_index) {
- if let "local.scope-inherits" = prop.key.as_ref() {
- scope.inherits =
- prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
- }
- }
- layer.scope_stack.push(scope);
- }
- // If the node represents a definition, add a new definition to the
- // local scope at the top of the scope stack.
- else if Some(capture.index) == layer.config.local_def_capture_index {
- reference_highlight = None;
- let scope = layer.scope_stack.last_mut().unwrap();
-
- let mut value_range = 0..0;
- for capture in match_.captures {
- if Some(capture.index) == layer.config.local_def_value_capture_index {
- value_range = capture.node.byte_range();
- }
- }
- let name = byte_range_to_str(range.clone(), self.source);
- scope.local_defs.push(LocalDef {
- name,
- value_range,
- highlight: None,
- });
- definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
- }
- // If the node represents a reference, then try to find the corresponding
- // definition in the scope stack.
- else if Some(capture.index) == layer.config.local_ref_capture_index
- && definition_highlight.is_none()
+ pub fn advance(&mut self) -> (HighlightEvent, impl Iterator<Item = Highlight> + '_) {
+ let mut refresh = false;
+ let prev_stack_size = self
+ .overlays
+ .iter()
+ .filter(|overlay| overlay.active_highlight.is_some())
+ .count();
+ let pos = self.next_event_offset();
+
+ if self.next_highlight_end == pos {
+ for overlay in self.overlays.iter_mut() {
+ if overlay
+ .active_highlight
+ .is_some_and(|(_highlight, end)| end == pos)
{
- definition_highlight = None;
- let name = byte_range_to_str(range.clone(), self.source);
- for scope in layer.scope_stack.iter().rev() {
- if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
- if def.name == name && range.start >= def.value_range.end {
- Some(def.highlight)
- } else {
- None
- }
- }) {
- reference_highlight = highlight;
- break;
- }
- if !scope.inherits {
- break;
- }
- }
- }
-
- // Continue processing any additional matches for the same node.
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
-
- // Otherwise, this capture must represent a highlight.
- // If this exact range has already been highlighted by an earlier pattern, or by
- // a different layer, then skip over this one.
- if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
- if range.start == last_start && range.end == last_end && layer.depth < last_depth {
- self.sort_layers();
- continue 'main;
+ overlay.active_highlight.take();
}
}
- // If the current node was found to be a local variable, then skip over any
- // highlighting patterns that are disabled for local variables.
- if definition_highlight.is_some() || reference_highlight.is_some() {
- while layer.config.non_local_variable_patterns[match_.pattern_index] {
- match_.remove();
- if let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- continue;
- }
- }
-
- self.sort_layers();
- continue 'main;
- }
- }
+ refresh = true;
+ }
- // Use the last capture found for the current node, skipping over any
- // highlight patterns that also match this node. Captures
- // for a given node are ordered by pattern index, so these subsequent
- // captures are guaranteed to be for highlighting, not injections or
- // local variables.
- while let Some((next_match, next_capture_index)) = captures.peek() {
- let next_capture = next_match.captures[*next_capture_index];
- if next_capture.node == capture.node {
- match_.remove();
- capture = next_capture;
- match_ = captures.next().unwrap().0;
- } else {
- break;
+ while self.next_highlight_start == pos {
+ let mut activated_idx = usize::MAX;
+ for (idx, overlay) in self.overlays.iter_mut().enumerate() {
+ let Some((highlight, range)) = overlay.current() else {
+ continue;
+ };
+ if range.start != self.next_highlight_start {
+ continue;
}
- }
- let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
+ // If this overlay has a highlight at this start index, set its active highlight
+ // and increment the cursor position within the overlay.
+ overlay.active_highlight = Some((highlight, range.end));
+ overlay.idx += 1;
- // If this node represents a local definition, then store the current
- // highlight value on the local scope entry representing this node.
- if let Some(definition_highlight) = definition_highlight {
- *definition_highlight = current_highlight;
+ activated_idx = activated_idx.min(idx);
}
- // Emit a scope start event and push the node's end position to the stack.
- if let Some(highlight) = reference_highlight.or(current_highlight) {
- self.last_highlight_range = Some((range.start, range.end, layer.depth));
- layer.highlight_end_stack.push(range.end);
- return self
- .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
- }
-
- self.sort_layers();
+ // If `self.next_highlight_start == pos` that means that some overlay was ready to
+ // emit a highlight, so `activated_idx` must have been set to an existing index.
+ assert!(
+ (0..self.overlays.len()).contains(&activated_idx),
+ "expected an overlay to highlight (at pos {pos}, there are {} overlays)",
+ self.overlays.len()
+ );
+
+ // If any overlays are active after the (lowest) one which was just activated, the
+ // highlights need to be refreshed.
+ refresh |= self.overlays[activated_idx..]
+ .iter()
+ .any(|overlay| overlay.active_highlight.is_some());
+
+ self.next_highlight_start = self
+ .overlays
+ .iter()
+ .filter_map(|overlay| overlay.start())
+ .min()
+ .unwrap_or(usize::MAX);
}
- }
-}
-
-#[derive(Debug, Clone)]
-pub enum InjectionLanguageMarker<'a> {
- /// The language is specified by `LanguageConfiguration`'s `language_id` field.
- ///
- /// This marker is used when a pattern sets the `injection.language` property, for example
- /// `(#set! injection.language "rust")`.
- LanguageId(&'a str),
- /// The language is specified in the document and captured by `@injection.language`.
- ///
- /// This is used for markdown code fences for example. While the `LanguageId` variant can be
- /// looked up by finding the language config that sets an `language_id`, this variant contains
- /// text from the document being highlighted, so the text is checked against each language's
- /// `injection_regex`.
- Name(RopeSlice<'a>),
- Filename(RopeSlice<'a>),
- Shebang(RopeSlice<'a>),
-}
-const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
-
-pub struct Merge<I> {
- iter: I,
- spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
+ self.next_highlight_end = self
+ .overlays
+ .iter()
+ .filter_map(|overlay| Some(overlay.active_highlight?.1))
+ .min()
+ .unwrap_or(usize::MAX);
- next_event: Option<HighlightEvent>,
- next_span: Option<(usize, std::ops::Range<usize>)>,
+ let (event, start) = if refresh {
+ (HighlightEvent::Refresh, 0)
+ } else {
+ (HighlightEvent::Push, prev_stack_size)
+ };
- queue: Vec<HighlightEvent>,
+ (
+ event,
+ self.overlays
+ .iter()
+ .flat_map(|overlay| overlay.active_highlight)
+ .map(|(highlight, _end)| highlight)
+ .skip(start),
+ )
+ }
}
-/// Merge a list of spans into the highlight event stream.
-pub fn merge<I: Iterator<Item = HighlightEvent>>(
- iter: I,
- spans: Vec<(usize, std::ops::Range<usize>)>,
-) -> Merge<I> {
- let spans = Box::new(spans.into_iter());
- let mut merge = Merge {
- iter,
- spans,
- next_event: None,
- next_span: None,
- queue: Vec::new(),
- };
- merge.next_event = merge.iter.next();
- merge.next_span = merge.spans.next();
- merge
+#[derive(Debug)]
+pub enum CapturedNode<'a> {
+ Single(Node<'a>),
+ /// Guaranteed to be not empty
+ Grouped(Vec<Node<'a>>),
}
-impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
- type Item = HighlightEvent;
- fn next(&mut self) -> Option<Self::Item> {
- use HighlightEvent::*;
- if let Some(event) = self.queue.pop() {
- return Some(event);
+impl CapturedNode<'_> {
+ pub fn start_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.start_byte() as usize,
+ Self::Grouped(ns) => ns[0].start_byte() as usize,
}
+ }
- loop {
- match (self.next_event, &self.next_span) {
- // this happens when range is partially or fully offscreen
- (Some(Source { start, .. }), Some((span, range))) if start > range.start => {
- if start > range.end {
- self.next_span = self.spans.next();
- } else {
- self.next_span = Some((*span, start..range.end));
- };
- }
- _ => break,
- }
+ pub fn end_byte(&self) -> usize {
+ match self {
+ Self::Single(n) => n.end_byte() as usize,
+ Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize,
}
+ }
- match (self.next_event, &self.next_span) {
- (Some(HighlightStart(i)), _) => {
- self.next_event = self.iter.next();
- Some(HighlightStart(i))
- }
- (Some(HighlightEnd), _) => {
- self.next_event = self.iter.next();
- Some(HighlightEnd)
- }
- (Some(Source { start, end }), Some((_, range))) if start < range.start => {
- let intersect = range.start.min(end);
- let event = Source {
- start,
- end: intersect,
- };
-
- if end == intersect {
- // the event is complete
- self.next_event = self.iter.next();
- } else {
- // subslice the event
- self.next_event = Some(Source {
- start: intersect,
- end,
- });
- };
-
- Some(event)
- }
- (Some(Source { start, end }), Some((span, range))) if start == range.start => {
- let intersect = range.end.min(end);
- let event = HighlightStart(Highlight(*span));
-
- // enqueue in reverse order
- self.queue.push(HighlightEnd);
- self.queue.push(Source {
- start,
- end: intersect,
- });
+ pub fn byte_range(&self) -> ops::Range<usize> {
+ self.start_byte()..self.end_byte()
+ }
+}
- if end == intersect {
- // the event is complete
- self.next_event = self.iter.next();
- } else {
- // subslice the event
- self.next_event = Some(Source {
- start: intersect,
- end,
- });
- };
+#[derive(Debug)]
+pub struct TextObjectQuery {
+ query: Query,
+}
- if intersect == range.end {
- self.next_span = self.spans.next();
- } else {
- self.next_span = Some((*span, intersect..range.end));
- }
+impl TextObjectQuery {
+ pub fn new(query: Query) -> Self {
+ Self { query }
+ }
- Some(event)
- }
- (Some(event), None) => {
- self.next_event = self.iter.next();
- Some(event)
- }
- // Can happen if cursor at EOF and/or diagnostic reaches past the end.
- // We need to actually emit events for the cursor-at-EOF situation,
- // even though the range is past the end of the text. This needs to be
- // handled appropriately by the drawing code by not assuming that
- // all `Source` events point to valid indices in the rope.
- (None, Some((span, range))) => {
- let event = HighlightStart(Highlight(*span));
- self.queue.push(HighlightEnd);
- self.queue.push(Source {
- start: range.start,
- end: range.end,
- });
- self.next_span = self.spans.next();
- Some(event)
- }
- (None, None) => None,
- e => unreachable!("{:?}", e),
- }
+ /// Run the query on the given node and return sub nodes which match given
+ /// capture ("function.inside", "class.around", etc).
+ ///
+ /// Captures may contain multiple nodes by using quantifiers (+, *, etc),
+ /// and support for this is partial and could use improvement.
+ ///
+ /// ```query
+ /// (comment)+ @capture
+ ///
+ /// ; OR
+ /// (
+ /// (comment)*
+ /// .
+ /// (function)
+ /// ) @capture
+ /// ```
+ pub fn capture_nodes<'a>(
+ &'a self,
+ capture_name: &str,
+ node: &Node<'a>,
+ slice: RopeSlice<'a>,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ self.capture_nodes_any(&[capture_name], node, slice)
}
-}
-fn node_is_visible(node: &Node) -> bool {
- node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
-}
+ /// Find the first capture that exists out of all given `capture_names`
+ /// and return sub nodes that match this capture.
+ pub fn capture_nodes_any<'a>(
+ &'a self,
+ capture_names: &[&str],
+ node: &Node<'a>,
+ slice: RopeSlice<'a>,
+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
+ let capture = capture_names
+ .iter()
+ .find_map(|cap| self.query.get_capture(cap))?;
-fn format_anonymous_node_kind(kind: &str) -> Cow<str> {
- if kind.contains('"') {
- Cow::Owned(kind.replace('"', "\\\""))
- } else {
- Cow::Borrowed(kind)
+ let mut cursor = InactiveQueryCursor::new();
+ cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
+ let mut cursor = cursor.execute_query(&self.query, node, RopeInput::new(slice));
+ let capture_node = iter::from_fn(move || {
+ let (mat, _) = cursor.next_matched_node()?;
+ Some(mat.nodes_for_capture(capture).cloned().collect())
+ })
+ .filter_map(move |nodes: Vec<_>| {
+ if nodes.len() > 1 {
+ Some(CapturedNode::Grouped(nodes))
+ } else {
+ nodes.into_iter().map(CapturedNode::Single).next()
+ }
+ });
+ Some(capture_node)
}
}
@@ -2123,6 +879,18 @@ pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result
}
}
+fn node_is_visible(node: &Node) -> bool {
+ node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id()))
+}
+
+fn format_anonymous_node_kind(kind: &str) -> Cow<str> {
+ if kind.contains('"') {
+ Cow::Owned(kind.replace('"', "\\\""))
+ } else {
+ Cow::Borrowed(kind)
+ }
+}
+
fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W,
cursor: &mut tree_sitter::TreeCursor,
@@ -2173,9 +941,13 @@ fn pretty_print_tree_impl<W: fmt::Write>(
#[cfg(test)]
mod test {
+ use once_cell::sync::Lazy;
+
use super::*;
use crate::{Rope, Transaction};
+ static LOADER: Lazy<Loader> = Lazy::new(|| crate::config::user_lang_loader().unwrap());
+
#[test]
fn test_textobject_queries() {
let query_str = r#"
@@ -2190,29 +962,16 @@ mod test {
"#,
);
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
- let language = get_language("rust").unwrap();
-
- let query = Query::new(&language, query_str).unwrap();
- let textobject = TextObjectQuery { query };
- let mut cursor = QueryCursor::new();
-
- let config = HighlightConfiguration::new(language, "", "", "").unwrap();
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let language = LOADER.language_for_name("rust").unwrap();
+ let grammar = LOADER.get_config(language).unwrap().grammar;
+ let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap();
+ let textobject = TextObjectQuery::new(query);
+ let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap();
let root = syntax.tree().root_node();
- let mut test = |capture, range| {
+ let test = |capture, range| {
let matches: Vec<_> = textobject
- .capture_nodes(capture, root, source.slice(..), &mut cursor)
+ .capture_nodes(capture, &root, source.slice(..))
.unwrap()
.collect();
@@ -2232,82 +991,8 @@ mod test {
}
#[test]
- fn test_parser() {
- let highlight_names: Vec<String> = [
- "attribute",
- "constant",
- "function.builtin",
- "function",
- "keyword",
- "operator",
- "property",
- "punctuation",
- "punctuation.bracket",
- "punctuation.delimiter",
- "string",
- "string.special",
- "tag",
- "type",
- "type.builtin",
- "variable",
- "variable.builtin",
- "variable.parameter",
- ]
- .iter()
- .cloned()
- .map(String::from)
- .collect();
-
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
-
- let language = get_language("rust").unwrap();
- let config = HighlightConfiguration::new(
- language,
- &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm")
- .unwrap(),
- &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm")
- .unwrap(),
- "", // locals.scm
- )
- .unwrap();
- config.configure(&highlight_names);
-
- let source = Rope::from_str(
- "
- struct Stuff {}
- fn main() {}
- ",
- );
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
- let tree = syntax.tree();
- let root = tree.root_node();
- assert_eq!(root.kind(), "source_file");
-
- assert_eq!(
- root.to_sexp(),
- concat!(
- "(source_file ",
- "(struct_item name: (type_identifier) body: (field_declaration_list)) ",
- "(function_item name: (identifier) parameters: (parameters) body: (block)))"
- )
- );
-
- let struct_node = root.child(0).unwrap();
- assert_eq!(struct_node.kind(), "struct_item");
- }
-
- #[test]
fn test_input_edits() {
- use tree_sitter::InputEdit;
+ use tree_sitter::{InputEdit, Point};
let doc = Rope::from("hello world!\ntest 123");
let transaction = Transaction::change(
@@ -2324,17 +1009,17 @@ mod test {
start_byte: 6,
old_end_byte: 11,
new_end_byte: 10,
- start_position: Point { row: 0, column: 6 },
- old_end_position: Point { row: 0, column: 11 },
- new_end_position: Point { row: 0, column: 10 }
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO
},
InputEdit {
start_byte: 12,
old_end_byte: 17,
new_end_byte: 12,
- start_position: Point { row: 0, column: 12 },
- old_end_position: Point { row: 1, column: 4 },
- new_end_position: Point { row: 0, column: 12 }
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO
}
]
);
@@ -2353,9 +1038,9 @@ mod test {
start_byte: 8,
old_end_byte: 8,
new_end_byte: 14,
- start_position: Point { row: 0, column: 8 },
- old_end_position: Point { row: 0, column: 8 },
- new_end_position: Point { row: 0, column: 14 }
+ start_point: Point::ZERO,
+ old_end_point: Point::ZERO,
+ new_end_point: Point::ZERO
}]
);
}
@@ -2369,26 +1054,13 @@ mod test {
end: usize,
) {
let source = Rope::from_str(source);
-
- let loader = Loader::new(Configuration {
- language: vec![],
- language_server: HashMap::new(),
- })
- .unwrap();
- let language = get_language(language_name).unwrap();
-
- let config = HighlightConfiguration::new(language, "", "", "").unwrap();
- let syntax = Syntax::new(
- source.slice(..),
- Arc::new(config),
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
+ let language = LOADER.language_for_name(language_name).unwrap();
+ let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap();
let root = syntax
.tree()
.root_node()
- .descendant_for_byte_range(start, end)
+ .descendant_for_byte_range(start as u32, end as u32)
.unwrap();
let mut output = String::new();
@@ -2456,14 +1128,4 @@ mod test {
source.len(),
);
}
-
- #[test]
- fn test_load_runtime_file() {
- // Test to make sure we can load some data from the runtime directory.
- let contents = load_runtime_file("rust", "indents.scm").unwrap();
- assert!(!contents.is_empty());
-
- let results = load_runtime_file("rust", "does-not-exist");
- assert!(results.is_err());
- }
}
diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs
index f73103c2..432611bb 100644
--- a/helix-core/src/syntax/config.rs
+++ b/helix-core/src/syntax/config.rs
@@ -1,8 +1,7 @@
-use crate::{auto_pairs::AutoPairs, diagnostic::Severity};
+use crate::{auto_pairs::AutoPairs, diagnostic::Severity, Language};
use globset::GlobSet;
use helix_stdx::rope;
-use once_cell::sync::OnceCell;
use serde::{ser::SerializeSeq as _, Deserialize, Serialize};
use std::{
@@ -10,7 +9,6 @@ use std::{
fmt::{self, Display},
path::PathBuf,
str::FromStr,
- sync::Arc,
};
#[derive(Debug, Serialize, Deserialize)]
@@ -24,6 +22,9 @@ pub struct Configuration {
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
pub struct LanguageConfiguration {
+ #[serde(skip)]
+ pub(super) language: Option<Language>,
+
#[serde(rename = "name")]
pub language_id: String, // c-sharp, rust, tsx
#[serde(rename = "language-id")]
@@ -70,9 +71,6 @@ pub struct LanguageConfiguration {
pub injection_regex: Option<rope::Regex>,
// first_line_regex
//
- #[serde(skip)]
- pub(crate) highlight_config: OnceCell<Option<Arc<super::HighlightConfiguration>>>,
- // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
#[serde(
default,
skip_serializing_if = "Vec::is_empty",
@@ -83,10 +81,6 @@ pub struct LanguageConfiguration {
#[serde(skip_serializing_if = "Option::is_none")]
pub indent: Option<IndentationConfiguration>,
- #[serde(skip)]
- pub(crate) indent_query: OnceCell<Option<tree_sitter::Query>>,
- #[serde(skip)]
- pub(crate) textobject_query: OnceCell<Option<super::TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>,
@@ -106,6 +100,13 @@ pub struct LanguageConfiguration {
pub persistent_diagnostic_sources: Vec<String>,
}
+impl LanguageConfiguration {
+ pub fn language(&self) -> Language {
+ // This value must be set by `super::Loader::new`.
+ self.language.unwrap()
+ }
+}
+
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum FileType {
/// The extension of the file, either the `Path::extension` or the full
diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs
deleted file mode 100644
index d82ea74d..00000000
--- a/helix-core/src/syntax/tree_cursor.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-use std::{cmp::Reverse, ops::Range};
-
-use super::{LanguageLayer, LayerId};
-
-use slotmap::HopSlotMap;
-use tree_sitter::Node;
-
-/// The byte range of an injection layer.
-///
-/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
-/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
-/// contains a point with maximum depth.
-#[derive(Debug)]
-struct InjectionRange {
- start: usize,
- end: usize,
- layer_id: LayerId,
- depth: u32,
-}
-
-pub struct TreeCursor<'a> {
- layers: &'a HopSlotMap<LayerId, LanguageLayer>,
- root: LayerId,
- current: LayerId,
- injection_ranges: Vec<InjectionRange>,
- // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
- // that returns very surprising results in testing.
- cursor: Node<'a>,
-}
-
-impl<'a> TreeCursor<'a> {
- pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
- let mut injection_ranges = Vec::new();
-
- for (layer_id, layer) in layers.iter() {
- // Skip the root layer
- if layer.parent.is_none() {
- continue;
- }
- for byte_range in layer.ranges.iter() {
- let range = InjectionRange {
- start: byte_range.start_byte,
- end: byte_range.end_byte,
- layer_id,
- depth: layer.depth,
- };
- injection_ranges.push(range);
- }
- }
-
- injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));
-
- let cursor = layers[root].tree().root_node();
-
- Self {
- layers,
- root,
- current: root,
- injection_ranges,
- cursor,
- }
- }
-
- pub fn node(&self) -> Node<'a> {
- self.cursor
- }
-
- pub fn goto_parent(&mut self) -> bool {
- if let Some(parent) = self.node().parent() {
- self.cursor = parent;
- return true;
- }
-
- // If we are already on the root layer, we cannot ascend.
- if self.current == self.root {
- return false;
- }
-
- // Ascend to the parent layer.
- let range = self.node().byte_range();
- let parent_id = self.layers[self.current]
- .parent
- .expect("non-root layers have a parent");
- self.current = parent_id;
- let root = self.layers[self.current].tree().root_node();
- self.cursor = root
- .descendant_for_byte_range(range.start, range.end)
- .unwrap_or(root);
-
- true
- }
-
- pub fn goto_parent_with<P>(&mut self, predicate: P) -> bool
- where
- P: Fn(&Node) -> bool,
- {
- while self.goto_parent() {
- if predicate(&self.node()) {
- return true;
- }
- }
-
- false
- }
-
- /// Finds the injection layer that has exactly the same range as the given `range`.
- fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
- let start_idx = self
- .injection_ranges
- .partition_point(|range| range.end < search_range.end);
-
- self.injection_ranges[start_idx..]
- .iter()
- .take_while(|range| range.end == search_range.end)
- .find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
- }
-
- fn goto_first_child_impl(&mut self, named: bool) -> bool {
- // Check if the current node's range is an exact injection layer range.
- if let Some(layer_id) = self
- .layer_id_of_byte_range(self.node().byte_range())
- .filter(|&layer_id| layer_id != self.current)
- {
- // Switch to the child layer.
- self.current = layer_id;
- self.cursor = self.layers[self.current].tree().root_node();
- return true;
- }
-
- let child = if named {
- self.cursor.named_child(0)
- } else {
- self.cursor.child(0)
- };
-
- if let Some(child) = child {
- // Otherwise descend in the current tree.
- self.cursor = child;
- true
- } else {
- false
- }
- }
-
- pub fn goto_first_child(&mut self) -> bool {
- self.goto_first_child_impl(false)
- }
-
- pub fn goto_first_named_child(&mut self) -> bool {
- self.goto_first_child_impl(true)
- }
-
- fn goto_next_sibling_impl(&mut self, named: bool) -> bool {
- let sibling = if named {
- self.cursor.next_named_sibling()
- } else {
- self.cursor.next_sibling()
- };
-
- if let Some(sibling) = sibling {
- self.cursor = sibling;
- true
- } else {
- false
- }
- }
-
- pub fn goto_next_sibling(&mut self) -> bool {
- self.goto_next_sibling_impl(false)
- }
-
- pub fn goto_next_named_sibling(&mut self) -> bool {
- self.goto_next_sibling_impl(true)
- }
-
- fn goto_prev_sibling_impl(&mut self, named: bool) -> bool {
- let sibling = if named {
- self.cursor.prev_named_sibling()
- } else {
- self.cursor.prev_sibling()
- };
-
- if let Some(sibling) = sibling {
- self.cursor = sibling;
- true
- } else {
- false
- }
- }
-
- pub fn goto_prev_sibling(&mut self) -> bool {
- self.goto_prev_sibling_impl(false)
- }
-
- pub fn goto_prev_named_sibling(&mut self) -> bool {
- self.goto_prev_sibling_impl(true)
- }
-
- /// Finds the injection layer that contains the given start-end range.
- fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
- let start_idx = self
- .injection_ranges
- .partition_point(|range| range.end < end);
-
- self.injection_ranges[start_idx..]
- .iter()
- .take_while(|range| range.start < end || range.depth > 1)
- .find_map(|range| (range.start <= start).then_some(range.layer_id))
- .unwrap_or(self.root)
- }
-
- pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
- self.current = self.layer_id_containing_byte_range(start, end);
- let root = self.layers[self.current].tree().root_node();
- self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
- }
-
- /// Returns an iterator over the children of the node the TreeCursor is on
- /// at the time this is called.
- pub fn children(&'a mut self) -> ChildIter<'a> {
- let parent = self.node();
-
- ChildIter {
- cursor: self,
- parent,
- named: false,
- }
- }
-
- /// Returns an iterator over the named children of the node the TreeCursor is on
- /// at the time this is called.
- pub fn named_children(&'a mut self) -> ChildIter<'a> {
- let parent = self.node();
-
- ChildIter {
- cursor: self,
- parent,
- named: true,
- }
- }
-}
-
-pub struct ChildIter<'n> {
- cursor: &'n mut TreeCursor<'n>,
- parent: Node<'n>,
- named: bool,
-}
-
-impl<'n> Iterator for ChildIter<'n> {
- type Item = Node<'n>;
-
- fn next(&mut self) -> Option<Self::Item> {
- // first iteration, just visit the first child
- if self.cursor.node() == self.parent {
- self.cursor
- .goto_first_child_impl(self.named)
- .then(|| self.cursor.node())
- } else {
- self.cursor
- .goto_next_sibling_impl(self.named)
- .then(|| self.cursor.node())
- }
- }
-}
diff --git a/helix-core/src/text_annotations.rs b/helix-core/src/text_annotations.rs
index 9704c3d6..0f492b8b 100644
--- a/helix-core/src/text_annotations.rs
+++ b/helix-core/src/text_annotations.rs
@@ -5,7 +5,7 @@ use std::ops::Range;
use std::ptr::NonNull;
use crate::doc_formatter::FormattedGrapheme;
-use crate::syntax::Highlight;
+use crate::syntax::{Highlight, OverlayHighlights};
use crate::{Position, Tendril};
/// An inline annotation is continuous text shown
@@ -300,10 +300,7 @@ impl<'a> TextAnnotations<'a> {
}
}
- pub fn collect_overlay_highlights(
- &self,
- char_range: Range<usize>,
- ) -> Vec<(usize, Range<usize>)> {
+ pub fn collect_overlay_highlights(&self, char_range: Range<usize>) -> OverlayHighlights {
let mut highlights = Vec::new();
self.reset_pos(char_range.start);
for char_idx in char_range {
@@ -311,11 +308,11 @@ impl<'a> TextAnnotations<'a> {
// we don't know the number of chars the original grapheme takes
// however it doesn't matter as highlight boundaries are automatically
// aligned to grapheme boundaries in the rendering code
- highlights.push((highlight.0, char_idx..char_idx + 1))
+ highlights.push((highlight, char_idx..char_idx + 1));
}
}
- highlights
+ OverlayHighlights::Heterogenous { highlights }
}
/// Add new inline annotations.
diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs
index 9015e957..008228f4 100644
--- a/helix-core/src/textobject.rs
+++ b/helix-core/src/textobject.rs
@@ -1,13 +1,12 @@
use std::fmt::Display;
use ropey::RopeSlice;
-use tree_sitter::{Node, QueryCursor};
use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction;
-use crate::syntax::config::LanguageConfiguration;
+use crate::syntax;
use crate::Range;
use crate::{surround, Syntax};
@@ -260,18 +259,18 @@ pub fn textobject_treesitter(
range: Range,
textobject: TextObject,
object_name: &str,
- slice_tree: Node,
- lang_config: &LanguageConfiguration,
+ syntax: &Syntax,
+ loader: &syntax::Loader,
_count: usize,
) -> Range {
+ let root = syntax.tree().root_node();
+ let textobject_query = loader.textobject_query(syntax.root_language());
let get_range = move || -> Option<Range> {
let byte_pos = slice.char_to_byte(range.cursor(slice));
let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner
- let mut cursor = QueryCursor::new();
- let node = lang_config
- .textobject_query()?
- .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)?
+ let node = textobject_query?
+ .capture_nodes(&capture_name, &root, slice)?
.filter(|node| node.byte_range().contains(&byte_pos))
.min_by_key(|node| node.byte_range().len())?;
diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs
index b41b2f64..ab733f93 100644
--- a/helix-core/tests/indent.rs
+++ b/helix-core/tests/indent.rs
@@ -1,4 +1,3 @@
-use arc_swap::ArcSwap;
use helix_core::{
indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle},
syntax::{config::Configuration, Loader},
@@ -6,7 +5,7 @@ use helix_core::{
};
use helix_stdx::rope::RopeSliceExt;
use ropey::Rope;
-use std::{ops::Range, path::PathBuf, process::Command, sync::Arc};
+use std::{ops::Range, path::PathBuf, process::Command};
#[test]
fn test_treesitter_indent_rust() {
@@ -196,17 +195,12 @@ fn test_treesitter_indent(
runtime.push("../runtime");
std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap());
- let language_config = loader.language_config_for_scope(lang_scope).unwrap();
+ let language = loader.language_for_scope(lang_scope).unwrap();
+ let language_config = loader.language(language).config();
let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit);
- let highlight_config = language_config.highlight_config(&[]).unwrap();
let text = doc.slice(..);
- let syntax = Syntax::new(
- text,
- highlight_config,
- Arc::new(ArcSwap::from_pointee(loader)),
- )
- .unwrap();
- let indent_query = language_config.indent_query().unwrap();
+ let syntax = Syntax::new(text, language, &loader).unwrap();
+ let indent_query = loader.indent_query(language).unwrap();
for i in 0..doc.len_lines() {
let line = text.line(i);
diff --git a/helix-loader/Cargo.toml b/helix-loader/Cargo.toml
index 493d8b30..dcd87e3a 100644
--- a/helix-loader/Cargo.toml
+++ b/helix-loader/Cargo.toml
@@ -21,7 +21,6 @@ anyhow = "1"
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"
etcetera = "0.10"
-tree-sitter.workspace = true
once_cell = "1.21"
log = "0.4"
@@ -32,5 +31,4 @@ cc = { version = "1" }
threadpool = { version = "1.0" }
tempfile.workspace = true
-[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
-libloading = "0.8"
+tree-house.workspace = true
diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs
index 343cc9b9..362d3ba0 100644
--- a/helix-loader/src/grammar.rs
+++ b/helix-loader/src/grammar.rs
@@ -9,7 +9,7 @@ use std::{
sync::mpsc::channel,
};
use tempfile::TempPath;
-use tree_sitter::Language;
+use tree_house::tree_sitter::Grammar;
#[cfg(unix)]
const DYLIB_EXTENSION: &str = "so";
@@ -61,28 +61,21 @@ const BUILD_TARGET: &str = env!("BUILD_TARGET");
const REMOTE_NAME: &str = "origin";
#[cfg(target_arch = "wasm32")]
-pub fn get_language(name: &str) -> Result<Language> {
+pub fn get_language(name: &str) -> Result<Option<Grammar>> {
unimplemented!()
}
#[cfg(not(target_arch = "wasm32"))]
-pub fn get_language(name: &str) -> Result<Language> {
- use libloading::{Library, Symbol};
+pub fn get_language(name: &str) -> Result<Option<Grammar>> {
let mut rel_library_path = PathBuf::new().join("grammars").join(name);
rel_library_path.set_extension(DYLIB_EXTENSION);
let library_path = crate::runtime_file(&rel_library_path);
+ if !library_path.exists() {
+ return Ok(None);
+ }
- let library = unsafe { Library::new(&library_path) }
- .with_context(|| format!("Error opening dynamic library {:?}", library_path))?;
- let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
- let language = unsafe {
- let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
- .get(language_fn_name.as_bytes())
- .with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
- language_fn()
- };
- std::mem::forget(library);
- Ok(language)
+ let grammar = unsafe { Grammar::new(name, &library_path) }?;
+ Ok(Some(grammar))
}
fn ensure_git_is_available() -> Result<()> {
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 3a2db261..64768c0d 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -3503,12 +3503,12 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) {
enter_insert_mode(cx);
let (view, doc) = current!(cx.editor);
+ let loader = cx.editor.syn_loader.load();
let text = doc.text().slice(..);
let contents = doc.text();
let selection = doc.selection(view.id);
- let language_config = doc.language_config();
let syntax = doc.syntax();
let tab_width = doc.tab_width();
@@ -3524,7 +3524,7 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) {
let line_end_index = cursor_line_start;
let indent = indent::indent_for_newline(
- language_config,
+ &loader,
syntax,
&doc.config.load().indent_heuristic,
&doc.indent_style,
@@ -3634,6 +3634,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation)
enter_insert_mode(cx);
let config = cx.editor.config();
let (view, doc) = current!(cx.editor);
+ let loader = cx.editor.syn_loader.load();
let text = doc.text().slice(..);
let contents = doc.text();
@@ -3683,7 +3684,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation)
let indent = match line.first_non_whitespace_char() {
Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(),
_ => indent::indent_for_newline(
- doc.language_config(),
+ &loader,
doc.syntax(),
&config.indent_heuristic,
&doc.indent_style,
@@ -4185,6 +4186,7 @@ pub mod insert {
pub fn insert_newline(cx: &mut Context) {
let config = cx.editor.config();
let (view, doc) = current_ref!(cx.editor);
+ let loader = cx.editor.syn_loader.load();
let text = doc.text().slice(..);
let line_ending = doc.line_ending.as_str();
@@ -4230,7 +4232,7 @@ pub mod insert {
let indent = match line.first_non_whitespace_char() {
Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(),
_ => indent::indent_for_newline(
- doc.language_config(),
+ &loader,
doc.syntax(),
&config.indent_heuristic,
&doc.indent_style,
@@ -5787,19 +5789,14 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct
let count = cx.count();
let motion = move |editor: &mut Editor| {
let (view, doc) = current!(editor);
- if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) {
+ let loader = editor.syn_loader.load();
+ if let Some(syntax) = doc.syntax() {
let text = doc.text().slice(..);
let root = syntax.tree().root_node();
let selection = doc.selection(view.id).clone().transform(|range| {
let new_range = movement::goto_treesitter_object(
- text,
- range,
- object,
- direction,
- root,
- lang_config,
- count,
+ text, range, object, direction, &root, syntax, &loader, count,
);
if editor.mode == Mode::Select {
@@ -5887,21 +5884,15 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
if let Some(ch) = event.char() {
let textobject = move |editor: &mut Editor| {
let (view, doc) = current!(editor);
+ let loader = editor.syn_loader.load();
let text = doc.text().slice(..);
let textobject_treesitter = |obj_name: &str, range: Range| -> Range {
- let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) {
- Some(t) => t,
- None => return range,
+ let Some(syntax) = doc.syntax() else {
+ return range;
};
textobject::textobject_treesitter(
- text,
- range,
- objtype,
- obj_name,
- syntax.tree().root_node(),
- lang_config,
- count,
+ text, range, objtype, obj_name, syntax, &loader, count,
)
};
diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs
index db2942ea..200323e1 100644
--- a/helix-term/src/commands/typed.rs
+++ b/helix-term/src/commands/typed.rs
@@ -1672,16 +1672,14 @@ fn tree_sitter_highlight_name(
_args: Args,
event: PromptEvent,
) -> anyhow::Result<()> {
- fn find_highlight_at_cursor(
- cx: &mut compositor::Context<'_>,
- ) -> Option<helix_core::syntax::Highlight> {
- use helix_core::syntax::HighlightEvent;
+ use helix_core::syntax::Highlight;
- let (view, doc) = current!(cx.editor);
+ fn find_highlight_at_cursor(editor: &Editor) -> Option<Highlight> {
+ let (view, doc) = current_ref!(editor);
let syntax = doc.syntax()?;
let text = doc.text().slice(..);
let cursor = doc.selection(view.id).primary().cursor(text);
- let byte = text.char_to_byte(cursor);
+ let byte = text.char_to_byte(cursor) as u32;
let node = syntax.descendant_for_byte_range(byte, byte)?;
// Query the same range as the one used in syntax highlighting.
let range = {
@@ -1691,25 +1689,22 @@ fn tree_sitter_highlight_name(
let last_line = text.len_lines().saturating_sub(1);
let height = view.inner_area(doc).height;
let last_visible_line = (row + height as usize).saturating_sub(1).min(last_line);
- let start = text.line_to_byte(row.min(last_line));
- let end = text.line_to_byte(last_visible_line + 1);
+ let start = text.line_to_byte(row.min(last_line)) as u32;
+ let end = text.line_to_byte(last_visible_line + 1) as u32;
start..end
};
- let mut highlight = None;
+ let loader = editor.syn_loader.load();
+ let mut highlighter = syntax.highlighter(text, &loader, range);
- for event in syntax.highlight_iter(text, Some(range), None) {
- match event.unwrap() {
- HighlightEvent::Source { start, end }
- if start == node.start_byte() && end == node.end_byte() =>
- {
- return highlight;
- }
- HighlightEvent::HighlightStart(hl) => {
- highlight = Some(hl);
- }
- _ => (),
+ while highlighter.next_event_offset() != u32::MAX {
+ let start = highlighter.next_event_offset();
+ highlighter.advance();
+ let end = highlighter.next_event_offset();
+
+ if start <= node.start_byte() && end >= node.end_byte() {
+ return highlighter.active_highlights().next_back();
}
}
@@ -1720,11 +1715,11 @@ fn tree_sitter_highlight_name(
return Ok(());
}
- let Some(highlight) = find_highlight_at_cursor(cx) else {
+ let Some(highlight) = find_highlight_at_cursor(cx.editor) else {
return Ok(());
};
- let content = cx.editor.theme.scope(highlight.0).to_string();
+ let content = cx.editor.theme.scope(highlight).to_string();
let callback = async move {
let call: job::Callback = Callback::EditorCompositor(Box::new(
@@ -2200,8 +2195,8 @@ fn tree_sitter_subtree(
if let Some(syntax) = doc.syntax() {
let primary_selection = doc.selection(view.id).primary();
let text = doc.text();
- let from = text.char_to_byte(primary_selection.from());
- let to = text.char_to_byte(primary_selection.to());
+ let from = text.char_to_byte(primary_selection.from()) as u32;
+ let to = text.char_to_byte(primary_selection.to()) as u32;
if let Some(selected_node) = syntax.descendant_for_byte_range(from, to) {
let mut contents = String::from("```tsq\n");
helix_core::syntax::pretty_print_tree(&mut contents, selected_node)?;
diff --git a/helix-term/src/ui/document.rs b/helix-term/src/ui/document.rs
index 8423ae8e..d71c47a1 100644
--- a/helix-term/src/ui/document.rs
+++ b/helix-term/src/ui/document.rs
@@ -3,8 +3,7 @@ use std::cmp::min;
use helix_core::doc_formatter::{DocumentFormatter, GraphemeSource, TextFormat};
use helix_core::graphemes::Grapheme;
use helix_core::str_utils::char_to_byte_idx;
-use helix_core::syntax::Highlight;
-use helix_core::syntax::HighlightEvent;
+use helix_core::syntax::{self, HighlightEvent, Highlighter, OverlayHighlights};
use helix_core::text_annotations::TextAnnotations;
use helix_core::{visual_offset_from_block, Position, RopeSlice};
use helix_stdx::rope::RopeSliceExt;
@@ -17,61 +16,6 @@ use tui::buffer::Buffer as Surface;
use crate::ui::text_decorations::DecorationManager;
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-enum StyleIterKind {
- /// base highlights (usually emitted by TS), byte indices (potentially not codepoint aligned)
- BaseHighlights,
- /// overlay highlights (emitted by custom code from selections), char indices
- Overlay,
-}
-
-/// A wrapper around a HighlightIterator
-/// that merges the layered highlights to create the final text style
-/// and yields the active text style and the char_idx where the active
-/// style will have to be recomputed.
-///
-/// TODO(ropey2): hopefully one day helix and ropey will operate entirely
-/// on byte ranges and we can remove this
-struct StyleIter<'a, H: Iterator<Item = HighlightEvent>> {
- text_style: Style,
- active_highlights: Vec<Highlight>,
- highlight_iter: H,
- kind: StyleIterKind,
- text: RopeSlice<'a>,
- theme: &'a Theme,
-}
-
-impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> {
- type Item = (Style, usize);
- fn next(&mut self) -> Option<(Style, usize)> {
- while let Some(event) = self.highlight_iter.next() {
- match event {
- HighlightEvent::HighlightStart(highlights) => {
- self.active_highlights.push(highlights)
- }
- HighlightEvent::HighlightEnd => {
- self.active_highlights.pop();
- }
- HighlightEvent::Source { mut end, .. } => {
- let style = self
- .active_highlights
- .iter()
- .fold(self.text_style, |acc, span| {
- acc.patch(self.theme.highlight(span.0))
- });
- if self.kind == StyleIterKind::BaseHighlights {
- // Move the end byte index to the nearest character boundary (rounding up)
- // and convert it to a character index.
- end = self.text.byte_to_char(self.text.ceil_char_boundary(end));
- }
- return Some((style, end));
- }
- }
- }
- None
- }
-}
-
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub struct LinePos {
/// Indicates whether the given visual line
@@ -90,8 +34,8 @@ pub fn render_document(
doc: &Document,
offset: ViewPosition,
doc_annotations: &TextAnnotations,
- syntax_highlight_iter: impl Iterator<Item = HighlightEvent>,
- overlay_highlight_iter: impl Iterator<Item = HighlightEvent>,
+ syntax_highlighter: Option<Highlighter<'_>>,
+ overlay_highlights: Vec<syntax::OverlayHighlights>,
theme: &Theme,
decorations: DecorationManager,
) {
@@ -108,8 +52,8 @@ pub fn render_document(
offset.anchor,
&doc.text_format(viewport.width, Some(theme)),
doc_annotations,
- syntax_highlight_iter,
- overlay_highlight_iter,
+ syntax_highlighter,
+ overlay_highlights,
theme,
decorations,
)
@@ -122,8 +66,8 @@ pub fn render_text(
anchor: usize,
text_fmt: &TextFormat,
text_annotations: &TextAnnotations,
- syntax_highlight_iter: impl Iterator<Item = HighlightEvent>,
- overlay_highlight_iter: impl Iterator<Item = HighlightEvent>,
+ syntax_highlighter: Option<Highlighter<'_>>,
+ overlay_highlights: Vec<syntax::OverlayHighlights>,
theme: &Theme,
mut decorations: DecorationManager,
) {
@@ -133,22 +77,8 @@ pub fn render_text(
let mut formatter =
DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, text_annotations, anchor);
- let mut syntax_styles = StyleIter {
- text_style: renderer.text_style,
- active_highlights: Vec::with_capacity(64),
- highlight_iter: syntax_highlight_iter,
- kind: StyleIterKind::BaseHighlights,
- theme,
- text,
- };
- let mut overlay_styles = StyleIter {
- text_style: Style::default(),
- active_highlights: Vec::with_capacity(64),
- highlight_iter: overlay_highlight_iter,
- kind: StyleIterKind::Overlay,
- theme,
- text,
- };
+ let mut syntax_highlighter = SyntaxHighlighter::new(syntax_highlighter, text, theme);
+ let mut overlay_highlighter = OverlayHighlighter::new(overlay_highlights, theme);
let mut last_line_pos = LinePos {
first_visual_line: false,
@@ -158,12 +88,6 @@ pub fn render_text(
let mut last_line_end = 0;
let mut is_in_indent_area = true;
let mut last_line_indent_level = 0;
- let mut syntax_style_span = syntax_styles
- .next()
- .unwrap_or_else(|| (Style::default(), usize::MAX));
- let mut overlay_style_span = overlay_styles
- .next()
- .unwrap_or_else(|| (Style::default(), usize::MAX));
let mut reached_view_top = false;
loop {
@@ -207,21 +131,17 @@ pub fn render_text(
}
// acquire the correct grapheme style
- while grapheme.char_idx >= syntax_style_span.1 {
- syntax_style_span = syntax_styles
- .next()
- .unwrap_or((Style::default(), usize::MAX));
+ while grapheme.char_idx >= syntax_highlighter.pos {
+ syntax_highlighter.advance();
}
- while grapheme.char_idx >= overlay_style_span.1 {
- overlay_style_span = overlay_styles
- .next()
- .unwrap_or((Style::default(), usize::MAX));
+ while grapheme.char_idx >= overlay_highlighter.pos {
+ overlay_highlighter.advance();
}
let grapheme_style = if let GraphemeSource::VirtualText { highlight } = grapheme.source {
let mut style = renderer.text_style;
if let Some(highlight) = highlight {
- style = style.patch(theme.highlight(highlight.0));
+ style = style.patch(theme.highlight(highlight));
}
GraphemeStyle {
syntax_style: style,
@@ -229,8 +149,8 @@ pub fn render_text(
}
} else {
GraphemeStyle {
- syntax_style: syntax_style_span.0,
- overlay_style: overlay_style_span.0,
+ syntax_style: syntax_highlighter.style,
+ overlay_style: overlay_highlighter.style,
}
};
decorations.decorate_grapheme(renderer, &grapheme);
@@ -549,3 +469,98 @@ impl<'a> TextRenderer<'a> {
)
}
}
+
+struct SyntaxHighlighter<'h, 'r, 't> {
+ inner: Option<Highlighter<'h>>,
+ text: RopeSlice<'r>,
+ /// The character index of the next highlight event, or `usize::MAX` if the highlighter is
+ /// finished.
+ pos: usize,
+ theme: &'t Theme,
+ style: Style,
+}
+
+impl<'h, 'r, 't> SyntaxHighlighter<'h, 'r, 't> {
+ fn new(inner: Option<Highlighter<'h>>, text: RopeSlice<'r>, theme: &'t Theme) -> Self {
+ let mut highlighter = Self {
+ inner,
+ text,
+ pos: 0,
+ theme,
+ style: Style::default(),
+ };
+ highlighter.update_pos();
+ highlighter
+ }
+
+ fn update_pos(&mut self) {
+ self.pos = self
+ .inner
+ .as_ref()
+ .and_then(|highlighter| {
+ let next_byte_idx = highlighter.next_event_offset();
+ (next_byte_idx != u32::MAX).then(|| {
+ // Move the byte index to the nearest character boundary (rounding up) and
+ // convert it to a character index.
+ self.text
+ .byte_to_char(self.text.ceil_char_boundary(next_byte_idx as usize))
+ })
+ })
+ .unwrap_or(usize::MAX);
+ }
+
+ fn advance(&mut self) {
+ let Some(highlighter) = self.inner.as_mut() else {
+ return;
+ };
+
+ let (event, highlights) = highlighter.advance();
+ let base = match event {
+ HighlightEvent::Refresh => Style::default(),
+ HighlightEvent::Push => self.style,
+ };
+
+ self.style = highlights.fold(base, |acc, highlight| {
+ acc.patch(self.theme.highlight(highlight))
+ });
+ self.update_pos();
+ }
+}
+
+struct OverlayHighlighter<'t> {
+ inner: syntax::OverlayHighlighter,
+ pos: usize,
+ theme: &'t Theme,
+ style: Style,
+}
+
+impl<'t> OverlayHighlighter<'t> {
+ fn new(overlays: Vec<OverlayHighlights>, theme: &'t Theme) -> Self {
+ let inner = syntax::OverlayHighlighter::new(overlays);
+ let mut highlighter = Self {
+ inner,
+ pos: 0,
+ theme,
+ style: Style::default(),
+ };
+ highlighter.update_pos();
+ highlighter
+ }
+
+ fn update_pos(&mut self) {
+ self.pos = self.inner.next_event_offset();
+ }
+
+ fn advance(&mut self) {
+ let (event, highlights) = self.inner.advance();
+ let base = match event {
+ HighlightEvent::Refresh => Style::default(),
+ HighlightEvent::Push => self.style,
+ };
+
+ self.style = highlights.fold(base, |acc, highlight| {
+ acc.patch(self.theme.highlight(highlight))
+ });
+ self.update_pos();
+ }
+}
diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs
index 6be56574..9343d55d 100644
--- a/helix-term/src/ui/editor.rs
+++ b/helix-term/src/ui/editor.rs
@@ -17,7 +17,7 @@ use helix_core::{
diagnostic::NumberOrString,
graphemes::{next_grapheme_boundary, prev_grapheme_boundary},
movement::Direction,
- syntax::{self, HighlightEvent},
+ syntax::{self, OverlayHighlights},
text_annotations::TextAnnotations,
unicode::width::UnicodeWidthStr,
visual_offset_from_block, Change, Position, Range, Selection, Transaction,
@@ -31,7 +31,7 @@ use helix_view::{
keyboard::{KeyCode, KeyModifiers},
Document, Editor, Theme, View,
};
-use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc};
+use std::{mem::take, num::NonZeroUsize, ops, path::PathBuf, rc::Rc};
use tui::{buffer::Buffer as Surface, text::Span};
@@ -87,6 +87,7 @@ impl EditorView {
let area = view.area;
let theme = &editor.theme;
let config = editor.config();
+ let loader = editor.syn_loader.load();
let view_offset = doc.view_offset(view.id);
@@ -115,51 +116,33 @@ impl EditorView {
decorations.add_decoration(line_decoration);
}
- let syntax_highlights =
- Self::doc_syntax_highlights(doc, view_offset.anchor, inner.height, theme);
+ let syntax_highlighter =
+ Self::doc_syntax_highlighter(doc, view_offset.anchor, inner.height, &loader);
+ let mut overlays = Vec::new();
- let mut overlay_highlights =
- Self::empty_highlight_iter(doc, view_offset.anchor, inner.height);
- let overlay_syntax_highlights = Self::overlay_syntax_highlights(
+ overlays.push(Self::overlay_syntax_highlights(
doc,
view_offset.anchor,
inner.height,
&text_annotations,
- );
- if !overlay_syntax_highlights.is_empty() {
- overlay_highlights =
- Box::new(syntax::merge(overlay_highlights, overlay_syntax_highlights));
- }
+ ));
- for diagnostic in Self::doc_diagnostics_highlights(doc, theme) {
- // Most of the `diagnostic` Vecs are empty most of the time. Skipping
- // a merge for any empty Vec saves a significant amount of work.
- if diagnostic.is_empty() {
- continue;
- }
- overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic));
- }
+ Self::doc_diagnostics_highlights_into(doc, theme, &mut overlays);
if is_focused {
if let Some(tabstops) = Self::tabstop_highlights(doc, theme) {
- overlay_highlights = Box::new(syntax::merge(overlay_highlights, tabstops));
+ overlays.push(tabstops);
}
- let highlights = syntax::merge(
- overlay_highlights,
- Self::doc_selection_highlights(
- editor.mode(),
- doc,
- view,
- theme,
- &config.cursor_shape,
- self.terminal_focused,
- ),
- );
- let focused_view_elements = Self::highlight_focused_view_elements(view, doc, theme);
- if focused_view_elements.is_empty() {
- overlay_highlights = Box::new(highlights)
- } else {
- overlay_highlights = Box::new(syntax::merge(highlights, focused_view_elements))
+ overlays.push(Self::doc_selection_highlights(
+ editor.mode(),
+ doc,
+ view,
+ theme,
+ &config.cursor_shape,
+ self.terminal_focused,
+ ));
+ if let Some(overlay) = Self::highlight_focused_view_elements(view, doc, theme) {
+ overlays.push(overlay);
}
}
@@ -207,8 +190,8 @@ impl EditorView {
doc,
view_offset,
&text_annotations,
- syntax_highlights,
- overlay_highlights,
+ syntax_highlighter,
+ overlays,
theme,
decorations,
);
@@ -287,57 +270,23 @@ impl EditorView {
start..end
}
- pub fn empty_highlight_iter(
- doc: &Document,
- anchor: usize,
- height: u16,
- ) -> Box<dyn Iterator<Item = HighlightEvent>> {
- let text = doc.text().slice(..);
- let row = text.char_to_line(anchor.min(text.len_chars()));
-
- // Calculate viewport byte ranges:
- // Saturating subs to make it inclusive zero indexing.
- let range = Self::viewport_byte_range(text, row, height);
- Box::new(
- [HighlightEvent::Source {
- start: text.byte_to_char(range.start),
- end: text.byte_to_char(range.end),
- }]
- .into_iter(),
- )
- }
-
- /// Get syntax highlights for a document in a view represented by the first line
+ /// Get the syntax highlighter for a document in a view represented by the first line
/// and column (`offset`) and the last line. This is done instead of using a view
/// directly to enable rendering syntax highlighted docs anywhere (eg. picker preview)
- pub fn doc_syntax_highlights<'doc>(
- doc: &'doc Document,
+ pub fn doc_syntax_highlighter<'editor>(
+ doc: &'editor Document,
anchor: usize,
height: u16,
- _theme: &Theme,
- ) -> Box<dyn Iterator<Item = HighlightEvent> + 'doc> {
+ loader: &'editor syntax::Loader,
+ ) -> Option<syntax::Highlighter<'editor>> {
+ let syntax = doc.syntax()?;
let text = doc.text().slice(..);
let row = text.char_to_line(anchor.min(text.len_chars()));
-
let range = Self::viewport_byte_range(text, row, height);
+ let range = range.start as u32..range.end as u32;
- match doc.syntax() {
- Some(syntax) => {
- let iter = syntax
- // TODO: range doesn't actually restrict source, just highlight range
- .highlight_iter(text.slice(..), Some(range), None)
- .map(|event| event.unwrap());
-
- Box::new(iter)
- }
- None => Box::new(
- [HighlightEvent::Source {
- start: range.start,
- end: range.end,
- }]
- .into_iter(),
- ),
- }
+ let highlighter = syntax.highlighter(text, loader, range);
+ Some(highlighter)
}
pub fn overlay_syntax_highlights(
@@ -345,7 +294,7 @@ impl EditorView {
anchor: usize,
height: u16,
text_annotations: &TextAnnotations,
- ) -> Vec<(usize, std::ops::Range<usize>)> {
+ ) -> OverlayHighlights {
let text = doc.text().slice(..);
let row = text.char_to_line(anchor.min(text.len_chars()));
@@ -356,35 +305,29 @@ impl EditorView {
}
/// Get highlight spans for document diagnostics
- pub fn doc_diagnostics_highlights(
+ pub fn doc_diagnostics_highlights_into(
doc: &Document,
theme: &Theme,
- ) -> [Vec<(usize, std::ops::Range<usize>)>; 7] {
+ overlay_highlights: &mut Vec<OverlayHighlights>,
+ ) {
use helix_core::diagnostic::{DiagnosticTag, Range, Severity};
let get_scope_of = |scope| {
theme
- .find_scope_index_exact(scope)
- // get one of the themes below as fallback values
- .or_else(|| theme.find_scope_index_exact("diagnostic"))
- .or_else(|| theme.find_scope_index_exact("ui.cursor"))
- .or_else(|| theme.find_scope_index_exact("ui.selection"))
- .expect(
- "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`",
- )
+ .find_highlight_exact(scope)
+ // get one of the themes below as fallback values
+ .or_else(|| theme.find_highlight_exact("diagnostic"))
+ .or_else(|| theme.find_highlight_exact("ui.cursor"))
+ .or_else(|| theme.find_highlight_exact("ui.selection"))
+ .expect(
+ "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`",
+ )
};
- // basically just queries the theme color defined in the config
- let hint = get_scope_of("diagnostic.hint");
- let info = get_scope_of("diagnostic.info");
- let warning = get_scope_of("diagnostic.warning");
- let error = get_scope_of("diagnostic.error");
- let r#default = get_scope_of("diagnostic"); // this is a bit redundant but should be fine
-
// Diagnostic tags
- let unnecessary = theme.find_scope_index_exact("diagnostic.unnecessary");
- let deprecated = theme.find_scope_index_exact("diagnostic.deprecated");
+ let unnecessary = theme.find_highlight_exact("diagnostic.unnecessary");
+ let deprecated = theme.find_highlight_exact("diagnostic.deprecated");
- let mut default_vec: Vec<(usize, std::ops::Range<usize>)> = Vec::new();
+ let mut default_vec = Vec::new();
let mut info_vec = Vec::new();
let mut hint_vec = Vec::new();
let mut warning_vec = Vec::new();
@@ -392,31 +335,30 @@ impl EditorView {
let mut unnecessary_vec = Vec::new();
let mut deprecated_vec = Vec::new();
- let push_diagnostic =
- |vec: &mut Vec<(usize, std::ops::Range<usize>)>, scope, range: Range| {
- // If any diagnostic overlaps ranges with the prior diagnostic,
- // merge the two together. Otherwise push a new span.
- match vec.last_mut() {
- Some((_, existing_range)) if range.start <= existing_range.end => {
- // This branch merges overlapping diagnostics, assuming that the current
- // diagnostic starts on range.start or later. If this assertion fails,
- // we will discard some part of `diagnostic`. This implies that
- // `doc.diagnostics()` is not sorted by `diagnostic.range`.
- debug_assert!(existing_range.start <= range.start);
- existing_range.end = range.end.max(existing_range.end)
- }
- _ => vec.push((scope, range.start..range.end)),
+ let push_diagnostic = |vec: &mut Vec<ops::Range<usize>>, range: Range| {
+ // If any diagnostic overlaps ranges with the prior diagnostic,
+ // merge the two together. Otherwise push a new span.
+ match vec.last_mut() {
+ Some(existing_range) if range.start <= existing_range.end => {
+ // This branch merges overlapping diagnostics, assuming that the current
+ // diagnostic starts on range.start or later. If this assertion fails,
+ // we will discard some part of `diagnostic`. This implies that
+ // `doc.diagnostics()` is not sorted by `diagnostic.range`.
+ debug_assert!(existing_range.start <= range.start);
+ existing_range.end = range.end.max(existing_range.end)
}
- };
+ _ => vec.push(range.start..range.end),
+ }
+ };
for diagnostic in doc.diagnostics() {
// Separate diagnostics into different Vecs by severity.
- let (vec, scope) = match diagnostic.severity {
- Some(Severity::Info) => (&mut info_vec, info),
- Some(Severity::Hint) => (&mut hint_vec, hint),
- Some(Severity::Warning) => (&mut warning_vec, warning),
- Some(Severity::Error) => (&mut error_vec, error),
- _ => (&mut default_vec, r#default),
+ let vec = match diagnostic.severity {
+ Some(Severity::Info) => &mut info_vec,
+ Some(Severity::Hint) => &mut hint_vec,
+ Some(Severity::Warning) => &mut warning_vec,
+ Some(Severity::Error) => &mut error_vec,
+ _ => &mut default_vec,
};
// If the diagnostic has tags and a non-warning/error severity, skip rendering
@@ -429,34 +371,59 @@ impl EditorView {
Some(Severity::Warning | Severity::Error)
)
{
- push_diagnostic(vec, scope, diagnostic.range);
+ push_diagnostic(vec, diagnostic.range);
}
for tag in &diagnostic.tags {
match tag {
DiagnosticTag::Unnecessary => {
- if let Some(scope) = unnecessary {
- push_diagnostic(&mut unnecessary_vec, scope, diagnostic.range)
+ if unnecessary.is_some() {
+ push_diagnostic(&mut unnecessary_vec, diagnostic.range)
}
}
DiagnosticTag::Deprecated => {
- if let Some(scope) = deprecated {
- push_diagnostic(&mut deprecated_vec, scope, diagnostic.range)
+ if deprecated.is_some() {
+ push_diagnostic(&mut deprecated_vec, diagnostic.range)
}
}
}
}
}
- [
- default_vec,
- unnecessary_vec,
- deprecated_vec,
- info_vec,
- hint_vec,
- warning_vec,
- error_vec,
- ]
+ overlay_highlights.push(OverlayHighlights::Homogeneous {
+ highlight: get_scope_of("diagnostic"),
+ ranges: default_vec,
+ });
+ if let Some(highlight) = unnecessary {
+ overlay_highlights.push(OverlayHighlights::Homogeneous {
+ highlight,
+ ranges: unnecessary_vec,
+ });
+ }
+ if let Some(highlight) = deprecated {
+ overlay_highlights.push(OverlayHighlights::Homogeneous {
+ highlight,
+ ranges: deprecated_vec,
+ });
+ }
+ overlay_highlights.extend([
+ OverlayHighlights::Homogeneous {
+ highlight: get_scope_of("diagnostic.info"),
+ ranges: info_vec,
+ },
+ OverlayHighlights::Homogeneous {
+ highlight: get_scope_of("diagnostic.hint"),
+ ranges: hint_vec,
+ },
+ OverlayHighlights::Homogeneous {
+ highlight: get_scope_of("diagnostic.warning"),
+ ranges: warning_vec,
+ },
+ OverlayHighlights::Homogeneous {
+ highlight: get_scope_of("diagnostic.error"),
+ ranges: error_vec,
+ },
+ ]);
}
/// Get highlight spans for selections in a document view.
@@ -467,7 +434,7 @@ impl EditorView {
theme: &Theme,
cursor_shape_config: &CursorShapeConfig,
is_terminal_focused: bool,
- ) -> Vec<(usize, std::ops::Range<usize>)> {
+ ) -> OverlayHighlights {
let text = doc.text().slice(..);
let selection = doc.selection(view.id);
let primary_idx = selection.primary_index();
@@ -476,34 +443,34 @@ impl EditorView {
let cursor_is_block = cursorkind == CursorKind::Block;
let selection_scope = theme
- .find_scope_index_exact("ui.selection")
+ .find_highlight_exact("ui.selection")
.expect("could not find `ui.selection` scope in the theme!");
let primary_selection_scope = theme
- .find_scope_index_exact("ui.selection.primary")
+ .find_highlight_exact("ui.selection.primary")
.unwrap_or(selection_scope);
let base_cursor_scope = theme
- .find_scope_index_exact("ui.cursor")
+ .find_highlight_exact("ui.cursor")
.unwrap_or(selection_scope);
let base_primary_cursor_scope = theme
- .find_scope_index("ui.cursor.primary")
+ .find_highlight("ui.cursor.primary")
.unwrap_or(base_cursor_scope);
let cursor_scope = match mode {
- Mode::Insert => theme.find_scope_index_exact("ui.cursor.insert"),
- Mode::Select => theme.find_scope_index_exact("ui.cursor.select"),
- Mode::Normal => theme.find_scope_index_exact("ui.cursor.normal"),
+ Mode::Insert => theme.find_highlight_exact("ui.cursor.insert"),
+ Mode::Select => theme.find_highlight_exact("ui.cursor.select"),
+ Mode::Normal => theme.find_highlight_exact("ui.cursor.normal"),
}
.unwrap_or(base_cursor_scope);
let primary_cursor_scope = match mode {
- Mode::Insert => theme.find_scope_index_exact("ui.cursor.primary.insert"),
- Mode::Select => theme.find_scope_index_exact("ui.cursor.primary.select"),
- Mode::Normal => theme.find_scope_index_exact("ui.cursor.primary.normal"),
+ Mode::Insert => theme.find_highlight_exact("ui.cursor.primary.insert"),
+ Mode::Select => theme.find_highlight_exact("ui.cursor.primary.select"),
+ Mode::Normal => theme.find_highlight_exact("ui.cursor.primary.normal"),
}
.unwrap_or(base_primary_cursor_scope);
- let mut spans: Vec<(usize, std::ops::Range<usize>)> = Vec::new();
+ let mut spans = Vec::new();
for (i, range) in selection.iter().enumerate() {
let selection_is_primary = i == primary_idx;
let (cursor_scope, selection_scope) = if selection_is_primary {
@@ -563,7 +530,7 @@ impl EditorView {
}
}
- spans
+ OverlayHighlights::Heterogenous { highlights: spans }
}
/// Render brace match, etc (meant for the focused view only)
@@ -571,41 +538,24 @@ impl EditorView {
view: &View,
doc: &Document,
theme: &Theme,
- ) -> Vec<(usize, std::ops::Range<usize>)> {
+ ) -> Option<OverlayHighlights> {
// Highlight matching braces
- if let Some(syntax) = doc.syntax() {
- let text = doc.text().slice(..);
- use helix_core::match_brackets;
- let pos = doc.selection(view.id).primary().cursor(text);
-
- if let Some(pos) =
- match_brackets::find_matching_bracket(syntax, doc.text().slice(..), pos)
- {
- // ensure col is on screen
- if let Some(highlight) = theme.find_scope_index_exact("ui.cursor.match") {
- return vec![(highlight, pos..pos + 1)];
- }
- }
- }
- Vec::new()
+ let syntax = doc.syntax()?;
+ let highlight = theme.find_highlight_exact("ui.cursor.match")?;
+ let text = doc.text().slice(..);
+ let pos = doc.selection(view.id).primary().cursor(text);
+ let pos = helix_core::match_brackets::find_matching_bracket(syntax, text, pos)?;
+ Some(OverlayHighlights::single(highlight, pos..pos + 1))
}
- pub fn tabstop_highlights(
- doc: &Document,
- theme: &Theme,
- ) -> Option<Vec<(usize, std::ops::Range<usize>)>> {
+ pub fn tabstop_highlights(doc: &Document, theme: &Theme) -> Option<OverlayHighlights> {
let snippet = doc.active_snippet.as_ref()?;
- let highlight = theme.find_scope_index_exact("tabstop")?;
- let mut highlights = Vec::new();
+ let highlight = theme.find_highlight_exact("tabstop")?;
+ let mut ranges = Vec::new();
for tabstop in snippet.tabstops() {
- highlights.extend(
- tabstop
- .ranges
- .iter()
- .map(|range| (highlight, range.start..range.end)),
- );
+ ranges.extend(tabstop.ranges.iter().map(|range| range.start..range.end));
}
- (!highlights.is_empty()).then_some(highlights)
+ Some(OverlayHighlights::Homogeneous { highlight, ranges })
}
/// Render bufferline at the top
diff --git a/helix-term/src/ui/lsp/signature_help.rs b/helix-term/src/ui/lsp/signature_help.rs
index 2dee8124..87a3eb95 100644
--- a/helix-term/src/ui/lsp/signature_help.rs
+++ b/helix-term/src/ui/lsp/signature_help.rs
@@ -1,7 +1,7 @@
use std::sync::Arc;
use arc_swap::ArcSwap;
-use helix_core::syntax;
+use helix_core::syntax::{self, OverlayHighlights};
use helix_view::graphics::{Margin, Rect, Style};
use helix_view::input::Event;
use tui::buffer::Buffer;
@@ -102,13 +102,12 @@ impl Component for SignatureHelp {
.unwrap_or_else(|| &self.signatures[0]);
let active_param_span = signature.active_param_range.map(|(start, end)| {
- vec![(
- cx.editor
- .theme
- .find_scope_index_exact("ui.selection")
- .unwrap(),
- start..end,
- )]
+ let highlight = cx
+ .editor
+ .theme
+ .find_highlight_exact("ui.selection")
+ .unwrap();
+ OverlayHighlights::single(highlight, start..end)
});
let signature = self
@@ -120,7 +119,7 @@ impl Component for SignatureHelp {
signature.signature.as_str(),
&self.language,
Some(&cx.editor.theme),
- Arc::clone(&self.config_loader),
+ &self.config_loader.load(),
active_param_span,
);
@@ -178,7 +177,7 @@ impl Component for SignatureHelp {
signature.signature.as_str(),
&self.language,
None,
- Arc::clone(&self.config_loader),
+ &self.config_loader.load(),
None,
);
let (sig_width, sig_height) =
diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index fe581b5a..ae58d75e 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs
@@ -10,8 +10,8 @@ use std::sync::Arc;
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use helix_core::{
- syntax::{self, HighlightEvent, InjectionLanguageMarker, Syntax},
- RopeSlice,
+ syntax::{self, HighlightEvent, OverlayHighlights},
+ RopeSlice, Syntax,
};
use helix_view::{
graphics::{Margin, Rect, Style},
@@ -32,8 +32,12 @@ pub fn highlighted_code_block<'a>(
text: &str,
language: &str,
theme: Option<&Theme>,
- config_loader: Arc<ArcSwap<syntax::Loader>>,
- additional_highlight_spans: Option<Vec<(usize, std::ops::Range<usize>)>>,
+ loader: &syntax::Loader,
+ // Optional overlay highlights to mix in with the syntax highlights.
+ //
+ // Note that `OverlayHighlights` is typically used with char indexing but the only caller
+ // which passes this parameter currently passes **byte indices** instead.
+ additional_highlight_spans: Option<OverlayHighlights>,
) -> Text<'a> {
let mut spans = Vec::new();
let mut lines = Vec::new();
@@ -48,67 +52,74 @@ pub fn highlighted_code_block<'a>(
};
let ropeslice = RopeSlice::from(text);
- let syntax = config_loader
- .load()
- .language_configuration_for_injection_string(&InjectionLanguageMarker::Name(
- language.into(),
- ))
- .and_then(|config| config.highlight_config(theme.scopes()))
- .and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader)));
-
- let syntax = match syntax {
- Some(s) => s,
- None => return styled_multiline_text(text, code_style),
+ let Some(syntax) = loader
+ .language_for_match(RopeSlice::from(language))
+ .and_then(|lang| Syntax::new(ropeslice, lang, loader).ok())
+ else {
+ return styled_multiline_text(text, code_style);
};
- let highlight_iter = syntax
- .highlight_iter(ropeslice, None, None)
- .map(|e| e.unwrap());
- let highlight_iter: Box<dyn Iterator<Item = HighlightEvent>> =
- if let Some(spans) = additional_highlight_spans {
- Box::new(helix_core::syntax::merge(highlight_iter, spans))
- } else {
- Box::new(highlight_iter)
- };
-
- let mut highlights = Vec::new();
- for event in highlight_iter {
- match event {
- HighlightEvent::HighlightStart(span) => {
- highlights.push(span);
+ let mut syntax_highlighter = syntax.highlighter(ropeslice, loader, ..);
+ let mut syntax_highlight_stack = Vec::new();
+ let mut overlay_highlight_stack = Vec::new();
+ let mut overlay_highlighter = syntax::OverlayHighlighter::new(additional_highlight_spans);
+ let mut pos = 0;
+
+ while pos < ropeslice.len_bytes() as u32 {
+ if pos == syntax_highlighter.next_event_offset() {
+ let (event, new_highlights) = syntax_highlighter.advance();
+ if event == HighlightEvent::Refresh {
+ syntax_highlight_stack.clear();
}
- HighlightEvent::HighlightEnd => {
- highlights.pop();
+ syntax_highlight_stack.extend(new_highlights);
+ } else if pos == overlay_highlighter.next_event_offset() as u32 {
+ let (event, new_highlights) = overlay_highlighter.advance();
+ if event == HighlightEvent::Refresh {
+ overlay_highlight_stack.clear();
}
- HighlightEvent::Source { start, end } => {
- let style = highlights
- .iter()
- .fold(text_style, |acc, span| acc.patch(theme.highlight(span.0)));
-
- let mut slice = &text[start..end];
- // TODO: do we need to handle all unicode line endings
- // here, or is just '\n' okay?
- while let Some(end) = slice.find('\n') {
- // emit span up to newline
- let text = &slice[..end];
- let text = text.replace('\t', " "); // replace tabs
- let span = Span::styled(text, style);
- spans.push(span);
-
- // truncate slice to after newline
- slice = &slice[end + 1..];
-
- // make a new line
- let spans = std::mem::take(&mut spans);
- lines.push(Spans::from(spans));
- }
+ overlay_highlight_stack.extend(new_highlights)
+ }
- // if there's anything left, emit it too
- if !slice.is_empty() {
- let span = Span::styled(slice.replace('\t', " "), style);
- spans.push(span);
- }
- }
+ let start = pos;
+ pos = syntax_highlighter
+ .next_event_offset()
+ .min(overlay_highlighter.next_event_offset() as u32);
+ if pos == u32::MAX {
+ pos = ropeslice.len_bytes() as u32;
+ }
+ if pos == start {
+ continue;
+ }
+ assert!(pos > start);
+
+ let style = syntax_highlight_stack
+ .iter()
+ .chain(overlay_highlight_stack.iter())
+ .fold(text_style, |acc, highlight| {
+ acc.patch(theme.highlight(*highlight))
+ });
+
+ let mut slice = &text[start as usize..pos as usize];
+ // TODO: do we need to handle all unicode line endings
+ // here, or is just '\n' okay?
+ while let Some(end) = slice.find('\n') {
+ // emit span up to newline
+ let text = &slice[..end];
+ let text = text.replace('\t', " "); // replace tabs
+ let span = Span::styled(text, style);
+ spans.push(span);
+
+ // truncate slice to after newline
+ slice = &slice[end + 1..];
+
+ // make a new line
+ let spans = std::mem::take(&mut spans);
+ lines.push(Spans::from(spans));
+ }
+
+ if !slice.is_empty() {
+ let span = Span::styled(slice.replace('\t', " "), style);
+ spans.push(span);
}
}
@@ -286,7 +297,7 @@ impl Markdown {
&text,
language,
theme,
- Arc::clone(&self.config_loader),
+ &self.config_loader.load(),
None,
);
lines.extend(tui_text.lines.into_iter());
diff --git a/helix-term/src/ui/picker.rs b/helix-term/src/ui/picker.rs
index 5a4b3afb..7abdfce8 100644
--- a/helix-term/src/ui/picker.rs
+++ b/helix-term/src/ui/picker.rs
@@ -940,21 +940,18 @@ impl<T: 'static + Send + Sync, D: 'static + Send + Sync> Picker<T, D> {
}
}
- let syntax_highlights = EditorView::doc_syntax_highlights(
+ let loader = cx.editor.syn_loader.load();
+
+ let syntax_highlighter =
+ EditorView::doc_syntax_highlighter(doc, offset.anchor, area.height, &loader);
+ let mut overlay_highlights = Vec::new();
+
+ EditorView::doc_diagnostics_highlights_into(
doc,
- offset.anchor,
- area.height,
&cx.editor.theme,
+ &mut overlay_highlights,
);
- let mut overlay_highlights =
- EditorView::empty_highlight_iter(doc, offset.anchor, area.height);
- for spans in EditorView::doc_diagnostics_highlights(doc, &cx.editor.theme) {
- if spans.is_empty() {
- continue;
- }
- overlay_highlights = Box::new(helix_core::syntax::merge(overlay_highlights, spans));
- }
let mut decorations = DecorationManager::default();
if let Some((start, end)) = range {
@@ -984,7 +981,7 @@ impl<T: 'static + Send + Sync, D: 'static + Send + Sync> Picker<T, D> {
offset,
// TODO: compute text annotations asynchronously here (like inlay hints)
&TextAnnotations::default(),
- syntax_highlights,
+ syntax_highlighter,
overlay_highlights,
&cx.editor.theme,
decorations,
diff --git a/helix-term/src/ui/picker/handlers.rs b/helix-term/src/ui/picker/handlers.rs
index 040fffa8..9a3af9b3 100644
--- a/helix-term/src/ui/picker/handlers.rs
+++ b/helix-term/src/ui/picker/handlers.rs
@@ -70,23 +70,21 @@ impl<T: 'static + Send + Sync, D: 'static + Send + Sync> AsyncHook
return;
}
- let Some(language_config) = doc.detect_language_config(&editor.syn_loader.load())
- else {
+ let loader = editor.syn_loader.load();
+ let Some(language_config) = doc.detect_language_config(&loader) else {
return;
};
- doc.language = Some(language_config.clone());
+ let language = language_config.language();
+ doc.language = Some(language_config);
let text = doc.text().clone();
- let loader = editor.syn_loader.clone();
tokio::task::spawn_blocking(move || {
- let Some(syntax) = language_config
- .highlight_config(&loader.load().scopes())
- .and_then(|highlight_config| {
- helix_core::Syntax::new(text.slice(..), highlight_config, loader)
- })
- else {
- log::info!("highlighting picker item failed");
- return;
+ let syntax = match helix_core::Syntax::new(text.slice(..), language, &loader) {
+ Ok(syntax) => syntax,
+ Err(err) => {
+ log::info!("highlighting picker preview failed: {err}");
+ return;
+ }
};
job::dispatch_blocking(move |editor, compositor| {
diff --git a/helix-term/src/ui/prompt.rs b/helix-term/src/ui/prompt.rs
index 3c97a93c..ee5c46e7 100644
--- a/helix-term/src/ui/prompt.rs
+++ b/helix-term/src/ui/prompt.rs
@@ -529,7 +529,7 @@ impl Prompt {
&self.line,
language,
Some(&cx.editor.theme),
- loader.clone(),
+ &loader.load(),
None,
)
.into();
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 34a3df82..fb89e2e0 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -10,7 +10,7 @@ use helix_core::diagnostic::DiagnosticProvider;
use helix_core::doc_formatter::TextFormat;
use helix_core::encoding::Encoding;
use helix_core::snippets::{ActiveSnippet, SnippetRenderCtx};
-use helix_core::syntax::{config::LanguageServerFeature, Highlight};
+use helix_core::syntax::config::LanguageServerFeature;
use helix_core::text_annotations::{InlineAnnotation, Overlay};
use helix_event::TaskController;
use helix_lsp::util::lsp_pos_to_pos;
@@ -219,7 +219,7 @@ pub struct Document {
#[derive(Debug, Clone, Default)]
pub struct DocumentColorSwatches {
pub color_swatches: Vec<InlineAnnotation>,
- pub colors: Vec<Highlight>,
+ pub colors: Vec<syntax::Highlight>,
pub color_swatches_padding: Vec<InlineAnnotation>,
}
@@ -1141,11 +1141,13 @@ impl Document {
/// Detect the programming language based on the file type.
pub fn detect_language_config(
&self,
- config_loader: &syntax::Loader,
+ loader: &syntax::Loader,
) -> Option<Arc<syntax::config::LanguageConfiguration>> {
- config_loader
- .language_config_for_file_name(self.path.as_ref()?)
- .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..)))
+ let language = loader
+ .language_for_filename(self.path.as_ref()?)
+ .or_else(|| loader.language_for_shebang(self.text().slice(..)))?;
+
+ Some(loader.language(language).config().clone())
}
/// Detect the indentation used in the file, or otherwise defaults to the language indentation
@@ -1288,17 +1290,18 @@ impl Document {
loader: &syntax::Loader,
) {
self.language = language_config;
- self.syntax = self
- .language
- .as_ref()
- .and_then(|config| config.highlight_config(&loader.scopes()))
- .and_then(|highlight_config| {
- Syntax::new(
- self.text.slice(..),
- highlight_config,
- self.syn_loader.clone(),
- )
- });
+ self.syntax = self.language.as_ref().and_then(|config| {
+ Syntax::new(self.text.slice(..), config.language(), loader)
+ .map_err(|err| {
+ // `NoRootConfig` means that there was an issue loading the language/syntax
+ // config for the root language of the document. An error must have already
+ // been logged by `LanguageData::syntax_config`.
+ if err != syntax::HighlighterError::NoRootConfig {
+ log::warn!("Error building syntax for '{}': {err}", self.display_name());
+ }
+ })
+ .ok()
+ });
}
/// Set the programming language for the file if you know the language but don't have the
@@ -1308,10 +1311,11 @@ impl Document {
language_id: &str,
loader: &syntax::Loader,
) -> anyhow::Result<()> {
- let language_config = loader
- .language_config_for_language_id(language_id)
+ let language = loader
+ .language_for_name(language_id)
.ok_or_else(|| anyhow!("invalid language id: {}", language_id))?;
- self.set_language(Some(language_config), loader);
+ let config = loader.language(language).config().clone();
+ self.set_language(Some(config), loader);
Ok(())
}
@@ -1430,14 +1434,14 @@ impl Document {
// update tree-sitter syntax tree
if let Some(syntax) = &mut self.syntax {
- // TODO: no unwrap
- let res = syntax.update(
+ let loader = self.syn_loader.load();
+ if let Err(err) = syntax.update(
old_doc.slice(..),
self.text.slice(..),
transaction.changes(),
- );
- if res.is_err() {
- log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
+ &loader,
+ ) {
+ log::error!("TS parser failed, disabling TS for the current buffer: {err}");
self.syntax = None;
}
}
@@ -2245,8 +2249,7 @@ impl Document {
viewport_width,
wrap_indicator: wrap_indicator.into_boxed_str(),
wrap_indicator_highlight: theme
- .and_then(|theme| theme.find_scope_index("ui.virtual.wrap"))
- .map(Highlight),
+ .and_then(|theme| theme.find_highlight("ui.virtual.wrap")),
soft_wrap_at_text_width,
}
}
diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs
index ab763a96..9aa073fc 100644
--- a/helix-view/src/editor.rs
+++ b/helix-view/src/editor.rs
@@ -1362,7 +1362,7 @@ impl Editor {
fn set_theme_impl(&mut self, theme: Theme, preview: ThemeAction) {
// `ui.selection` is the only scope required to be able to render a theme.
- if theme.find_scope_index_exact("ui.selection").is_none() {
+ if theme.find_highlight_exact("ui.selection").is_none() {
self.set_error("Invalid theme: `ui.selection` required");
return;
}
@@ -1516,12 +1516,12 @@ impl Editor {
if let helix_lsp::Error::ExecutableNotFound(err) = err {
// Silence by default since some language servers might just not be installed
log::debug!(
- "Language server not found for `{}` {} {}", language.scope(), lang, err,
+ "Language server not found for `{}` {} {}", language.scope, lang, err,
);
} else {
log::error!(
"Failed to initialize the language servers for `{}` - `{}` {{ {} }}",
- language.scope(),
+ language.scope,
lang,
err
);
diff --git a/helix-view/src/theme.rs b/helix-view/src/theme.rs
index af8f03bc..61d490ff 100644
--- a/helix-view/src/theme.rs
+++ b/helix-view/src/theme.rs
@@ -294,43 +294,36 @@ fn build_theme_values(
impl Theme {
/// To allow `Highlight` to represent arbitrary RGB colors without turning it into an enum,
- /// we interpret the last 3 bytes of a `Highlight` as RGB colors.
- const RGB_START: usize = (usize::MAX << (8 + 8 + 8)) - 1;
+ /// we interpret the last 256^3 numbers as RGB.
+ const RGB_START: u32 = (u32::MAX << (8 + 8 + 8)) - 1 - (u32::MAX - Highlight::MAX);
/// Interpret a Highlight with the RGB foreground
- fn decode_rgb_highlight(rgb: usize) -> Option<(u8, u8, u8)> {
- (rgb > Self::RGB_START).then(|| {
- let [b, g, r, ..] = rgb.to_ne_bytes();
+ fn decode_rgb_highlight(highlight: Highlight) -> Option<(u8, u8, u8)> {
+ (highlight.get() > Self::RGB_START).then(|| {
+ let [b, g, r, ..] = (highlight.get() + 1).to_ne_bytes();
(r, g, b)
})
}
/// Create a Highlight that represents an RGB color
pub fn rgb_highlight(r: u8, g: u8, b: u8) -> Highlight {
- Highlight(usize::from_ne_bytes([
- b,
- g,
- r,
- u8::MAX,
- u8::MAX,
- u8::MAX,
- u8::MAX,
- u8::MAX,
- ]))
+ // -1 because highlight is "non-max": u32::MAX is reserved for the null pointer
+ // optimization.
+ Highlight::new(u32::from_ne_bytes([b, g, r, u8::MAX]) - 1)
}
#[inline]
- pub fn highlight(&self, index: usize) -> Style {
- if let Some((red, green, blue)) = Self::decode_rgb_highlight(index) {
+ pub fn highlight(&self, highlight: Highlight) -> Style {
+ if let Some((red, green, blue)) = Self::decode_rgb_highlight(highlight) {
Style::new().fg(Color::Rgb(red, green, blue))
} else {
- self.highlights[index]
+ self.highlights[highlight.idx()]
}
}
#[inline]
- pub fn scope(&self, index: usize) -> &str {
- &self.scopes[index]
+ pub fn scope(&self, highlight: Highlight) -> &str {
+ &self.scopes[highlight.idx()]
}
pub fn name(&self) -> &str {
@@ -361,13 +354,16 @@ impl Theme {
&self.scopes
}
- pub fn find_scope_index_exact(&self, scope: &str) -> Option<usize> {
- self.scopes().iter().position(|s| s == scope)
+ pub fn find_highlight_exact(&self, scope: &str) -> Option<Highlight> {
+ self.scopes()
+ .iter()
+ .position(|s| s == scope)
+ .map(|idx| Highlight::new(idx as u32))
}
- pub fn find_scope_index(&self, mut scope: &str) -> Option<usize> {
+ pub fn find_highlight(&self, mut scope: &str) -> Option<Highlight> {
loop {
- if let Some(highlight) = self.find_scope_index_exact(scope) {
+ if let Some(highlight) = self.find_highlight_exact(scope) {
return Some(highlight);
}
if let Some(new_end) = scope.rfind('.') {
@@ -626,23 +622,13 @@ mod tests {
fn convert_to_and_from() {
let (r, g, b) = (0xFF, 0xFE, 0xFA);
let highlight = Theme::rgb_highlight(r, g, b);
- assert_eq!(Theme::decode_rgb_highlight(highlight.0), Some((r, g, b)));
+ assert_eq!(Theme::decode_rgb_highlight(highlight), Some((r, g, b)));
}
/// make sure we can store all the colors at the end
- /// ```
- /// FF FF FF FF FF FF FF FF
- /// xor
- /// FF FF FF FF FF 00 00 00
- /// =
- /// 00 00 00 00 00 FF FF FF
- /// ```
- ///
- /// where the ending `(FF, FF, FF)` represents `(r, g, b)`
#[test]
fn full_numeric_range() {
- assert_eq!(usize::MAX ^ Theme::RGB_START, 256_usize.pow(3));
- assert_eq!(Theme::RGB_START + 256_usize.pow(3), usize::MAX);
+ assert_eq!(Highlight::MAX - Theme::RGB_START, 256_u32.pow(3));
}
#[test]
@@ -650,30 +636,27 @@ mod tests {
// color in the middle
let (r, g, b) = (0x14, 0xAA, 0xF7);
assert_eq!(
- Theme::default().highlight(Theme::rgb_highlight(r, g, b).0),
+ Theme::default().highlight(Theme::rgb_highlight(r, g, b)),
Style::new().fg(Color::Rgb(r, g, b))
);
// pure black
let (r, g, b) = (0x00, 0x00, 0x00);
assert_eq!(
- Theme::default().highlight(Theme::rgb_highlight(r, g, b).0),
+ Theme::default().highlight(Theme::rgb_highlight(r, g, b)),
Style::new().fg(Color::Rgb(r, g, b))
);
// pure white
let (r, g, b) = (0xff, 0xff, 0xff);
assert_eq!(
- Theme::default().highlight(Theme::rgb_highlight(r, g, b).0),
+ Theme::default().highlight(Theme::rgb_highlight(r, g, b)),
Style::new().fg(Color::Rgb(r, g, b))
);
}
#[test]
- #[should_panic(
- expected = "index out of bounds: the len is 0 but the index is 18446744073692774399"
- )]
+ #[should_panic(expected = "index out of bounds: the len is 0 but the index is 4278190078")]
fn out_of_bounds() {
- let (r, g, b) = (0x00, 0x00, 0x00);
-
- Theme::default().highlight(Theme::rgb_highlight(r, g, b).0 - 1);
+ let highlight = Highlight::new(Theme::rgb_highlight(0, 0, 0).get() - 1);
+ Theme::default().highlight(highlight);
}
}
diff --git a/helix-view/src/view.rs b/helix-view/src/view.rs
index 6d237e20..aecf09a6 100644
--- a/helix-view/src/view.rs
+++ b/helix-view/src/view.rs
@@ -11,7 +11,6 @@ use crate::{
use helix_core::{
char_idx_at_visual_offset,
doc_formatter::TextFormat,
- syntax::Highlight,
text_annotations::TextAnnotations,
visual_offset_from_anchor, visual_offset_from_block, Position, RopeSlice, Selection,
Transaction,
@@ -446,9 +445,7 @@ impl View {
let mut text_annotations = TextAnnotations::default();
if let Some(labels) = doc.jump_labels.get(&self.id) {
- let style = theme
- .and_then(|t| t.find_scope_index("ui.virtual.jump-label"))
- .map(Highlight);
+ let style = theme.and_then(|t| t.find_highlight("ui.virtual.jump-label"));
text_annotations.add_overlay(labels, style);
}
@@ -461,15 +458,10 @@ impl View {
padding_after_inlay_hints,
}) = doc.inlay_hints.get(&self.id)
{
- let type_style = theme
- .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint.type"))
- .map(Highlight);
- let parameter_style = theme
- .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint.parameter"))
- .map(Highlight);
- let other_style = theme
- .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint"))
- .map(Highlight);
+ let type_style = theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint.type"));
+ let parameter_style =
+ theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint.parameter"));
+ let other_style = theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint"));
// Overlapping annotations are ignored apart from the first so the order here is not random:
// types -> parameters -> others should hopefully be the "correct" order for most use cases,
diff --git a/xtask/src/helpers.rs b/xtask/src/helpers.rs
index d2c955bc..9bdca561 100644
--- a/xtask/src/helpers.rs
+++ b/xtask/src/helpers.rs
@@ -1,7 +1,7 @@
use std::path::{Path, PathBuf};
use crate::path;
-use helix_core::syntax::config::Configuration as LangConfig;
+use helix_core::syntax::{self, config::Configuration as LangConfig};
use helix_term::health::TsFeature;
/// Get the list of languages that support a particular tree-sitter
@@ -42,3 +42,7 @@ pub fn lang_config() -> LangConfig {
let text = std::fs::read_to_string(path::lang_config()).unwrap();
toml::from_str(&text).unwrap()
}
+
+pub fn syn_loader() -> syntax::Loader {
+ syntax::Loader::new(lang_config()).unwrap()
+}
diff --git a/xtask/src/main.rs b/xtask/src/main.rs
index d5cb6498..c6875d95 100644
--- a/xtask/src/main.rs
+++ b/xtask/src/main.rs
@@ -19,43 +19,23 @@ pub mod tasks {
}
pub fn querycheck(languages: impl Iterator<Item = String>) -> Result<(), DynError> {
- use crate::helpers::lang_config;
- use helix_core::{syntax::read_query, tree_sitter::Query};
- use helix_loader::grammar::get_language;
-
- let query_files = [
- "highlights.scm",
- "locals.scm",
- "injections.scm",
- "textobjects.scm",
- "indents.scm",
- ];
+ use helix_core::syntax::LanguageData;
let languages_to_check: HashSet<_> = languages.collect();
-
- for language in lang_config().language {
- if !languages_to_check.is_empty() && !languages_to_check.contains(&language.language_id)
+ let loader = crate::helpers::syn_loader();
+ for (_language, lang_data) in loader.languages() {
+ if !languages_to_check.is_empty()
+ && !languages_to_check.contains(&lang_data.config().language_id)
{
continue;
}
-
- let language_name = &language.language_id;
- let grammar_name = language.grammar.as_ref().unwrap_or(language_name);
- for query_file in query_files {
- let language = get_language(grammar_name);
- let query_text = read_query(language_name, query_file);
- if let Ok(lang) = language {
- if !query_text.is_empty() {
- if let Err(reason) = Query::new(&lang, &query_text) {
- return Err(format!(
- "Failed to parse {} queries for {}: {}",
- query_file, language_name, reason
- )
- .into());
- }
- }
- }
- }
+ let config = lang_data.config();
+ let Some(syntax_config) = LanguageData::compile_syntax_config(config, &loader)? else {
+ continue;
+ };
+ let grammar = syntax_config.grammar;
+ LanguageData::compile_indent_query(grammar, config)?;
+ LanguageData::compile_textobject_query(grammar, config)?;
}
println!("Query check succeeded");