Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-syntax/src/tree_sitter/parser.rs')
-rw-r--r--helix-syntax/src/tree_sitter/parser.rs200
1 files changed, 200 insertions, 0 deletions
diff --git a/helix-syntax/src/tree_sitter/parser.rs b/helix-syntax/src/tree_sitter/parser.rs
new file mode 100644
index 00000000..bcd5fa79
--- /dev/null
+++ b/helix-syntax/src/tree_sitter/parser.rs
@@ -0,0 +1,200 @@
+use std::os::raw::c_void;
+use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::ptr::NonNull;
+use std::{fmt, ptr};
+
+use regex_cursor::Cursor;
+
+use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
+use crate::tree_sitter::{Grammar, IntoTsInput, Point, Range, TsInput};
+
+// opaque data
+enum ParserData {}
+
+/// A stateful object that this is used to produce a [`Tree`] based on some
+/// source code.
+pub struct Parser {
+ ptr: NonNull<ParserData>,
+}
+
+impl Parser {
+ /// Create a new parser.
+ #[must_use]
+ pub fn new() -> Parser {
+ Parser {
+ ptr: unsafe { ts_parser_new() },
+ }
+ }
+
+ /// Set the language that the parser should use for parsing.
+ pub fn set_language(&mut self, grammar: Grammar) {
+ unsafe { ts_parser_set_language(self.ptr, grammar) };
+ }
+
+ /// Set the ranges of text that the parser should include when parsing. By default, the parser
+ /// will always include entire documents. This function allows you to parse only a *portion*
+ /// of a document but still return a syntax tree whose ranges match up with the document as a
+ /// whole. You can also pass multiple disjoint ranges.
+ ///
+ /// `ranges` must be non-overlapping and sorted.
+ pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), InvalidRangesErrror> {
+ // TODO: save some memory by only storing byte ranges and converting them to TS ranges in an
+ // internal buffer here. Points are not used by TS. Alternatively we can path the TS C code
+ // to accept a simple pair (struct with two fields) of byte positions here instead of a full
+ // tree sitter range
+ let success = unsafe {
+ ts_parser_set_included_ranges(self.ptr, ranges.as_ptr(), ranges.len() as u32)
+ };
+ if success {
+ Ok(())
+ } else {
+ Err(InvalidRangesErrror)
+ }
+ }
+
+ #[must_use]
+ pub fn parse<I: TsInput>(
+ &mut self,
+ input: impl IntoTsInput<TsInput = I>,
+ old_tree: Option<&SyntaxTree>,
+ ) -> Option<SyntaxTree> {
+ let mut input = input.into_ts_input();
+ unsafe extern "C" fn read<C: TsInput>(
+ payload: NonNull<c_void>,
+ byte_index: u32,
+ _position: Point,
+ bytes_read: *mut u32,
+ ) -> *const u8 {
+ let cursor = catch_unwind(AssertUnwindSafe(move || {
+ let input: &mut C = payload.cast().as_mut();
+ let cursor = input.cursor_at(byte_index as usize);
+ let slice = cursor.chunk();
+ (slice.as_ptr(), slice.len().try_into().unwrap())
+ }));
+ match cursor {
+ Ok((ptr, len)) => {
+ *bytes_read = len;
+ ptr
+ }
+ Err(_) => {
+ *bytes_read = 0;
+ ptr::null()
+ }
+ }
+ }
+ let input = ParserInputRaw {
+ payload: NonNull::from(&mut input).cast(),
+ read: read::<I>,
+ // utf8
+ encoding: 0,
+ };
+ unsafe {
+ let old_tree = old_tree.map(|tree| tree.as_raw());
+ let new_tree = ts_parser_parse(self.ptr, old_tree, input);
+ new_tree.map(|raw| SyntaxTree::from_raw(raw))
+ }
+ }
+}
+
+impl Default for Parser {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+unsafe impl Sync for Parser {}
+unsafe impl Send for Parser {}
+impl Drop for Parser {
+ fn drop(&mut self) {
+ unsafe { ts_parser_delete(self.ptr) }
+ }
+}
+
+/// An error that occurred when trying to assign an incompatible [`Grammar`] to
+/// a [`Parser`].
+#[derive(Debug, PartialEq, Eq)]
+pub struct InvalidRangesErrror;
+
+impl fmt::Display for InvalidRangesErrror {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "include ranges are overlap or are not sorted",)
+ }
+}
+impl std::error::Error for InvalidRangesErrror {}
+
+type TreeSitterReadFn = unsafe extern "C" fn(
+ payload: NonNull<c_void>,
+ byte_index: u32,
+ position: Point,
+ bytes_read: *mut u32,
+) -> *const u8;
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct ParserInputRaw {
+ pub payload: NonNull<c_void>,
+ pub read: TreeSitterReadFn,
+ pub encoding: u32,
+}
+
+extern "C" {
+ /// Create a new parser
+ fn ts_parser_new() -> NonNull<ParserData>;
+ /// Delete the parser, freeing all of the memory that it used.
+ fn ts_parser_delete(parser: NonNull<ParserData>);
+ /// Set the language that the parser should use for parsing. Returns a boolean indicating
+ /// whether or not the language was successfully assigned. True means assignment
+ /// succeeded. False means there was a version mismatch: the language was generated with
+ /// an incompatible version of the Tree-sitter CLI. Check the language's version using
+ /// [`ts_language_version`] and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`]
+ /// and [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
+ fn ts_parser_set_language(parser: NonNull<ParserData>, language: Grammar) -> bool;
+ /// Set the ranges of text that the parser should include when parsing. By default, the parser
+ /// will always include entire documents. This function allows you to parse only a *portion*
+ /// of a document but still return a syntax tree whose ranges match up with the document as a
+ /// whole. You can also pass multiple disjoint ranges. The second and third parameters specify
+ /// the location and length of an array of ranges. The parser does *not* take ownership of
+ /// these ranges; it copies the data, so it doesn't matter how these ranges are allocated.
+ /// If `count` is zero, then the entire document will be parsed. Otherwise, the given ranges
+ /// must be ordered from earliest to latest in the document, and they must not overlap. That
+ /// is, the following must hold for all: `i < count - 1`: `ranges[i].end_byte <= ranges[i +
+ /// 1].start_byte` If this requirement is not satisfied, the operation will fail, the ranges
+ /// will not be assigned, and this function will return `false`. On success, this function
+ /// returns `true`
+ fn ts_parser_set_included_ranges(
+ parser: NonNull<ParserData>,
+ ranges: *const Range,
+ count: u32,
+ ) -> bool;
+
+ /// Use the parser to parse some source code and create a syntax tree. If you are parsing this
+ /// document for the first time, pass `NULL` for the `old_tree` parameter. Otherwise, if you
+ /// have already parsed an earlier version of this document and the document has since been
+ /// edited, pass the previous syntax tree so that the unchanged parts of it can be reused.
+ /// This will save time and memory. For this to work correctly, you must have already edited
+ /// the old syntax tree using the [`ts_tree_edit`] function in a way that exactly matches
+ /// the source code changes. The [`TSInput`] parameter lets you specify how to read the text.
+ /// It has the following three fields: 1. [`read`]: A function to retrieve a chunk of text
+ /// at a given byte offset and (row, column) position. The function should return a pointer
+ /// to the text and write its length to the [`bytes_read`] pointer. The parser does not
+ /// take ownership of this buffer; it just borrows it until it has finished reading it. The
+ /// function should write a zero value to the [`bytes_read`] pointer to indicate the end of the
+ /// document. 2. [`payload`]: An arbitrary pointer that will be passed to each invocation of
+ /// the [`read`] function. 3. [`encoding`]: An indication of how the text is encoded. Either
+ /// `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. This function returns a syntax tree
+ /// on success, and `NULL` on failure. There are three possible reasons for failure: 1. The
+ /// parser does not have a language assigned. Check for this using the [`ts_parser_language`]
+ /// function. 2. Parsing was cancelled due to a timeout that was set by an earlier call to the
+ /// [`ts_parser_set_timeout_micros`] function. You can resume parsing from where the parser
+ /// left out by calling [`ts_parser_parse`] again with the same arguments. Or you can start
+ /// parsing from scratch by first calling [`ts_parser_reset`]. 3. Parsing was cancelled using
+ /// a cancellation flag that was set by an earlier call to [`ts_parser_set_cancellation_flag`].
+ /// You can resume parsing from where the parser left out by calling [`ts_parser_parse`] again
+ /// with the same arguments. [`read`]: TSInput::read [`payload`]: TSInput::payload [`encoding`]:
+ /// TSInput::encoding [`bytes_read`]: TSInput::read
+ fn ts_parser_parse(
+ parser: NonNull<ParserData>,
+ old_tree: Option<NonNull<SyntaxTreeData>>,
+ input: ParserInputRaw,
+ ) -> Option<NonNull<SyntaxTreeData>>;
+}