helix

Unnamed repository; edit this file 'description' to name the repository.

master 24Branches 30Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'helix-syntax/src/lib.rs')

-rw-r--r--

helix-syntax/src/lib.rs

342

1 files changed, 342 insertions, 0 deletions

diff --git a/helix-syntax/src/lib.rs b/helix-syntax/src/lib.rs
new file mode 100644
index 00000000..04a2d27d
--- /dev/null
+++ b/helix-syntax/src/lib.rs

@@ -0,0 +1,342 @@

+use ::ropey::RopeSlice;

+use slotmap::{DefaultKey as LayerId, HopSlotMap};

+use tree_sitter::{Node, Parser, Point, Query, QueryCursor, Range, Tree};

+use std::borrow::Cow;

+use std::cell::RefCell;

+use std::hash::{Hash, Hasher};

+use std::path::Path;

+use std::str;

+use std::sync::Arc;

+use crate::parse::LayerUpdateFlags;

+pub use crate::config::{read_query, HighlightConfiguration};

+pub use crate::ropey::RopeProvider;

+pub use merge::merge;

+pub use pretty_print::pretty_print_tree;

+pub use tree_cursor::TreeCursor;

+mod config;

+pub mod highlighter;

+mod merge;

+mod parse;

+mod pretty_print;

+mod ropey;

+mod tree_cursor;

+#[derive(Debug)]

+pub struct Syntax {

+ layers: HopSlotMap<LayerId, LanguageLayer>,

+ root: LayerId,

+impl Syntax {

+ pub fn new(

+ source: RopeSlice,

+ config: Arc<HighlightConfiguration>,

+ injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,

+ ) -> Option<Self> {

+ let root_layer = LanguageLayer {

+ tree: None,

+ config,

+ depth: 0,

+ flags: LayerUpdateFlags::empty(),

+ ranges: vec![Range {

+ start_byte: 0,

+ end_byte: usize::MAX,

+ start_point: Point::new(0, 0),

+ end_point: Point::new(usize::MAX, usize::MAX),

+ }],

+ parent: None,

+ };

+ // track scope_descriptor: a Vec of scopes for item in tree

+ let mut layers = HopSlotMap::default();

+ let root = layers.insert(root_layer);

+ let mut syntax = Self { root, layers };

+ let res = syntax.update(source, Vec::new(), injection_callback);

+ if res.is_err() {

+ log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");

+ return None;

+ }

+ Some(syntax)

+ }

+ pub fn tree(&self) -> &Tree {

+ self.layers[self.root].tree()

+ }

+ pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree {

+ let mut container_id = self.root;

+ for (layer_id, layer) in self.layers.iter() {

+ if layer.depth > self.layers[container_id].depth

+ && layer.contains_byte_range(start, end)

+ {

+ container_id = layer_id;

+ }

+ self.layers[container_id].tree()

+ }

+ pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {

+ self.tree_for_byte_range(start, end)

+ .root_node()

+ .named_descendant_for_byte_range(start, end)

+ }

+ pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Node<'_>> {

+ self.tree_for_byte_range(start, end)

+ .root_node()

+ .descendant_for_byte_range(start, end)

+ }

+ pub fn walk(&self) -> TreeCursor<'_> {

+ TreeCursor::new(&self.layers, self.root)

+ }

+#[derive(Debug)]

+pub struct LanguageLayer {

+ // mode

+ // grammar

+ pub config: Arc<HighlightConfiguration>,

+ pub(crate) tree: Option<Tree>,

+ pub ranges: Vec<Range>,

+ pub depth: u32,

+ flags: LayerUpdateFlags,

+ parent: Option<LayerId>,

+/// This PartialEq implementation only checks if that

+/// two layers are theoretically identical (meaning they highlight the same text range with the same language).

+/// It does not check whether the layers have the same internal treesitter

+/// state.

+impl PartialEq for LanguageLayer {

+ fn eq(&self, other: &Self) -> bool {

+ self.depth == other.depth

+ && self.config.language == other.config.language

+ && self.ranges == other.ranges

+ }

+/// Hash implementation belongs to PartialEq implementation above.

+/// See its documentation for details.

+impl Hash for LanguageLayer {

+ fn hash<H: Hasher>(&self, state: &mut H) {

+ self.depth.hash(state);

+ self.config.language.hash(state);

+ self.ranges.hash(state);

+ }

+impl LanguageLayer {

+ pub fn tree(&self) -> &Tree {

+ // TODO: no unwrap

+ self.tree.as_ref().unwrap()

+ }

+ /// Whether the layer contains the given byte range.

+ ///

+ /// If the layer has multiple ranges (i.e. combined injections), the

+ /// given range is considered contained if it is within the start and

+ /// end bytes of the first and last ranges **and** if the given range

+ /// starts or ends within any of the layer's ranges.

+ fn contains_byte_range(&self, start: usize, end: usize) -> bool {

+ let layer_start = self

+ .ranges

+ .first()

+ .expect("ranges should not be empty")

+ .start_byte;

+ let layer_end = self

+ .ranges

+ .last()

+ .expect("ranges should not be empty")

+ .end_byte;

+ layer_start <= start

+ && layer_end >= end

+ && self.ranges.iter().any(|range| {

+ let byte_range = range.start_byte..range.end_byte;

+ byte_range.contains(&start) || byte_range.contains(&end)

+ })

+ }

+#[derive(Debug, Clone)]

+pub enum InjectionLanguageMarker<'a> {

+ Name(Cow<'a, str>),

+ Filename(Cow<'a, Path>),

+ Shebang(String),

+const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";

+#[derive(Debug)]

+pub enum CapturedNode<'a> {

+ Single(Node<'a>),

+ /// Guaranteed to be not empty

+ Grouped(Vec<Node<'a>>),

+impl<'a> CapturedNode<'a> {

+ pub fn start_byte(&self) -> usize {

+ match self {

+ Self::Single(n) => n.start_byte(),

+ Self::Grouped(ns) => ns[0].start_byte(),

+ }

+ pub fn end_byte(&self) -> usize {

+ match self {

+ Self::Single(n) => n.end_byte(),

+ Self::Grouped(ns) => ns.last().unwrap().end_byte(),

+ }

+ pub fn byte_range(&self) -> std::ops::Range<usize> {

+ self.start_byte()..self.end_byte()

+ }

+/// The maximum number of in-progress matches a TS cursor can consider at once.

+/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.

+/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.

+///

+/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).

+/// However, this causes performance issues for medium to large files.

+/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).

+///

+/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream

+/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).

+/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.

+///

+/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).

+/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.

+/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.

+const TREE_SITTER_MATCH_LIMIT: u32 = 256;

+#[derive(Debug)]

+pub struct TextObjectQuery {

+ pub query: Query,

+impl TextObjectQuery {

+ /// Run the query on the given node and return sub nodes which match given

+ /// capture ("function.inside", "class.around", etc).

+ ///

+ /// Captures may contain multiple nodes by using quantifiers (+, *, etc),

+ /// and support for this is partial and could use improvement.

+ ///

+ /// ```query

+ /// (comment)+ @capture

+ ///

+ /// ; OR

+ /// (

+ /// (comment)*

+ /// .

+ /// (function)

+ /// ) @capture

+ /// ```

+ pub fn capture_nodes<'a>(

+ &'a self,

+ capture_name: &str,

+ node: Node<'a>,

+ slice: RopeSlice<'a>,

+ cursor: &'a mut QueryCursor,

+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {

+ self.capture_nodes_any(&[capture_name], node, slice, cursor)

+ }

+ /// Find the first capture that exists out of all given `capture_names`

+ /// and return sub nodes that match this capture.

+ pub fn capture_nodes_any<'a>(

+ &'a self,

+ capture_names: &[&str],

+ node: Node<'a>,

+ slice: RopeSlice<'a>,

+ cursor: &'a mut QueryCursor,

+ ) -> Option<impl Iterator<Item = CapturedNode<'a>>> {

+ let capture_idx = capture_names

+ .iter()

+ .find_map(|cap| self.query.capture_index_for_name(cap))?;

+ cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);

+ let nodes = cursor

+ .captures(&self.query, node, RopeProvider(slice))

+ .filter_map(move |(mat, _)| {

+ let nodes: Vec<_> = mat

+ .captures

+ .iter()

+ .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node))

+ .collect();

+ if nodes.len() > 1 {

+ Some(CapturedNode::Grouped(nodes))

+ } else {

+ nodes.into_iter().map(CapturedNode::Single).next()

+ }

+ });

+ Some(nodes)

+ }

+/// Represents the reason why syntax highlighting failed.

+#[derive(Debug, PartialEq, Eq)]

+pub enum Error {

+ Cancelled,

+ InvalidLanguage,

+ InvalidRanges,

+ Unknown,

+#[derive(Clone)]

+enum IncludedChildren {

+ None,

+ All,

+ Unnamed,

+impl Default for IncludedChildren {

+ fn default() -> Self {

+ Self::None

+ }

+fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {

+ Cow::from(source.byte_slice(range))

+struct TsParser {

+ parser: tree_sitter::Parser,

+ pub cursors: Vec<QueryCursor>,

+// could also just use a pool, or a single instance?

+thread_local! {

+ static PARSER: RefCell<TsParser> = RefCell::new(TsParser {

+ parser: Parser::new(),

+ cursors: Vec::new(),

+ })

+pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {

+ PARSER.with(|parser| {

+ let mut parser = parser.borrow_mut();

+ let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);

+ let res = f(&mut cursor);

+ parser.cursors.push(cursor);

+ res

+ })