Unnamed repository; edit this file 'description' to name the repository.
feat: find closest pair using tree-sitter
woojiq 2024-04-25
parent 50c90cb · commit 81dc8e8
-rw-r--r--helix-core/src/match_brackets.rs98
-rw-r--r--helix-core/src/selection.rs2
-rw-r--r--helix-core/src/surround.rs111
-rw-r--r--helix-core/src/textobject.rs12
-rw-r--r--helix-term/src/commands.rs53
5 files changed, 189 insertions, 87 deletions
diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs
index b8bcc28c..95d6a3dc 100644
--- a/helix-core/src/match_brackets.rs
+++ b/helix-core/src/match_brackets.rs
@@ -9,16 +9,32 @@ use crate::Syntax;
const MAX_PLAINTEXT_SCAN: usize = 10000;
const MATCH_LIMIT: usize = 16;
-// Limit matching pairs to only ( ) { } [ ] < > ' ' " "
-const PAIRS: &[(char, char)] = &[
+pub const BRACKETS: [(char, char); 7] = [
('(', ')'),
('{', '}'),
('[', ']'),
('<', '>'),
- ('\'', '\''),
- ('\"', '\"'),
+ ('«', '»'),
+ ('「', '」'),
+ ('(', ')'),
];
+// The difference between BRACKETS and PAIRS is that we can find matching
+// BRACKETS in a plain text file, but we can't do the same for PAIRs.
+// PAIRS also contains all BRACKETS.
+pub const PAIRS: [(char, char); BRACKETS.len() + 3] = {
+ let mut pairs = [(' ', ' '); BRACKETS.len() + 3];
+ let mut idx = 0;
+ while idx < BRACKETS.len() {
+ pairs[idx] = BRACKETS[idx];
+ idx += 1;
+ }
+ pairs[idx] = ('"', '"');
+ pairs[idx + 1] = ('\'', '\'');
+ pairs[idx + 2] = ('`', '`');
+ pairs
+};
+
/// Returns the position of the matching bracket under cursor.
///
/// If the cursor is on the opening bracket, the position of
@@ -30,7 +46,7 @@ const PAIRS: &[(char, char)] = &[
/// If no matching bracket is found, `None` is returned.
#[must_use]
pub fn find_matching_bracket(syntax: &Syntax, doc: RopeSlice, pos: usize) -> Option<usize> {
- if pos >= doc.len_chars() || !is_valid_bracket(doc.char(pos)) {
+ if pos >= doc.len_chars() || !is_valid_pair(doc.char(pos)) {
return None;
}
find_pair(syntax, doc, pos, false)
@@ -67,7 +83,7 @@ fn find_pair(
let (start_byte, end_byte) = surrounding_bytes(doc, &node)?;
let (start_char, end_char) = (doc.byte_to_char(start_byte), doc.byte_to_char(end_byte));
- if is_valid_pair(doc, start_char, end_char) {
+ if is_valid_pair_on_pos(doc, start_char, end_char) {
if end_byte == pos {
return Some(start_char);
}
@@ -140,14 +156,22 @@ fn find_pair(
/// If no matching bracket is found, `None` is returned.
#[must_use]
pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Option<usize> {
- // Don't do anything when the cursor is not on top of a bracket.
let bracket = doc.get_char(cursor_pos)?;
+ let matching_bracket = {
+ let pair = get_pair(bracket);
+ if pair.0 == bracket {
+ pair.1
+ } else {
+ pair.0
+ }
+ };
+ // Don't do anything when the cursor is not on top of a bracket.
if !is_valid_bracket(bracket) {
return None;
}
// Determine the direction of the matching.
- let is_fwd = is_forward_bracket(bracket);
+ let is_fwd = is_open_bracket(bracket);
let chars_iter = if is_fwd {
doc.chars_at(cursor_pos + 1)
} else {
@@ -159,19 +183,7 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
for (i, candidate) in chars_iter.take(MAX_PLAINTEXT_SCAN).enumerate() {
if candidate == bracket {
open_cnt += 1;
- } else if is_valid_pair(
- doc,
- if is_fwd {
- cursor_pos
- } else {
- cursor_pos - i - 1
- },
- if is_fwd {
- cursor_pos + i + 1
- } else {
- cursor_pos
- },
- ) {
+ } else if candidate == matching_bracket {
// Return when all pending brackets have been closed.
if open_cnt == 1 {
return Some(if is_fwd {
@@ -187,15 +199,49 @@ pub fn find_matching_bracket_plaintext(doc: RopeSlice, cursor_pos: usize) -> Opt
None
}
-fn is_valid_bracket(c: char) -> bool {
- PAIRS.iter().any(|(l, r)| *l == c || *r == c)
+/// Returns the open and closing chars pair. If not found in
+/// [`BRACKETS`] returns (ch, ch).
+///
+/// ```
+/// use helix_core::match_brackets::get_pair;
+///
+/// assert_eq!(get_pair('['), ('[', ']'));
+/// assert_eq!(get_pair('}'), ('{', '}'));
+/// assert_eq!(get_pair('"'), ('"', '"'));
+/// ```
+pub fn get_pair(ch: char) -> (char, char) {
+ PAIRS
+ .iter()
+ .find(|(open, close)| *open == ch || *close == ch)
+ .copied()
+ .unwrap_or((ch, ch))
+}
+
+pub fn is_open_bracket(ch: char) -> bool {
+ BRACKETS.iter().any(|(l, _)| *l == ch)
+}
+
+pub fn is_close_bracket(ch: char) -> bool {
+ BRACKETS.iter().any(|(_, r)| *r == ch)
+}
+
+pub fn is_valid_bracket(ch: char) -> bool {
+ BRACKETS.iter().any(|(l, r)| *l == ch || *r == ch)
+}
+
+pub fn is_open_pair(ch: char) -> bool {
+ PAIRS.iter().any(|(l, _)| *l == ch)
+}
+
+pub fn is_close_pair(ch: char) -> bool {
+ PAIRS.iter().any(|(_, r)| *r == ch)
}
-fn is_forward_bracket(c: char) -> bool {
- PAIRS.iter().any(|(l, _)| *l == c)
+pub fn is_valid_pair(ch: char) -> bool {
+ PAIRS.iter().any(|(l, r)| *l == ch || *r == ch)
}
-fn is_valid_pair(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
+fn is_valid_pair_on_pos(doc: RopeSlice, start_char: usize, end_char: usize) -> bool {
PAIRS.contains(&(doc.char(start_char), doc.char(end_char)))
}
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index 65261287..48eaf289 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -122,7 +122,7 @@ impl Range {
}
/// `Direction::Backward` when head < anchor.
- /// `Direction::Backward` otherwise.
+ /// `Direction::Forward` otherwise.
#[inline]
#[must_use]
pub fn direction(&self) -> Direction {
diff --git a/helix-core/src/surround.rs b/helix-core/src/surround.rs
index ed976488..879c2adf 100644
--- a/helix-core/src/surround.rs
+++ b/helix-core/src/surround.rs
@@ -1,18 +1,16 @@
use std::fmt::Display;
-use crate::{movement::Direction, search, Range, Selection};
+use crate::{
+ graphemes::next_grapheme_boundary,
+ match_brackets::{
+ find_matching_bracket, find_matching_bracket_fuzzy, get_pair, is_close_bracket,
+ is_open_bracket,
+ },
+ movement::Direction,
+ search, Range, Selection, Syntax,
+};
use ropey::RopeSlice;
-pub const PAIRS: &[(char, char)] = &[
- ('(', ')'),
- ('[', ']'),
- ('{', '}'),
- ('<', '>'),
- ('«', '»'),
- ('「', '」'),
- ('(', ')'),
-];
-
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
PairNotFound,
@@ -34,32 +32,68 @@ impl Display for Error {
type Result<T> = std::result::Result<T, Error>;
-/// Given any char in [PAIRS], return the open and closing chars. If not found in
-/// [PAIRS] return (ch, ch).
+/// Finds the position of surround pairs of any [`crate::match_brackets::PAIRS`]
+/// using tree-sitter when possible.
///
-/// ```
-/// use helix_core::surround::get_pair;
+/// # Returns
///
-/// assert_eq!(get_pair('['), ('[', ']'));
-/// assert_eq!(get_pair('}'), ('{', '}'));
-/// assert_eq!(get_pair('"'), ('"', '"'));
-/// ```
-pub fn get_pair(ch: char) -> (char, char) {
- PAIRS
- .iter()
- .find(|(open, close)| *open == ch || *close == ch)
- .copied()
- .unwrap_or((ch, ch))
+/// Tuple `(anchor, head)`, meaning it is not always ordered.
+pub fn find_nth_closest_pairs_pos(
+ syntax: Option<&Syntax>,
+ text: RopeSlice,
+ range: Range,
+ skip: usize,
+) -> Result<(usize, usize)> {
+ match syntax {
+ Some(syntax) => find_nth_closest_pairs_ts(syntax, text, range, skip),
+ None => find_nth_closest_pairs_plain(text, range, skip),
+ }
}
-pub fn find_nth_closest_pairs_pos(
+fn find_nth_closest_pairs_ts(
+ syntax: &Syntax,
text: RopeSlice,
range: Range,
mut skip: usize,
) -> Result<(usize, usize)> {
- let is_open_pair = |ch| PAIRS.iter().any(|(open, _)| *open == ch);
- let is_close_pair = |ch| PAIRS.iter().any(|(_, close)| *close == ch);
+ let mut opening = range.from();
+ // We want to expand the selection if we are already on the found pair,
+ // otherwise we would need to subtract "-1" from "range.to()".
+ let mut closing = range.to();
+
+ while skip > 0 {
+ closing = find_matching_bracket_fuzzy(syntax, text, closing).ok_or(Error::PairNotFound)?;
+ opening = find_matching_bracket(syntax, text, closing).ok_or(Error::PairNotFound)?;
+ // If we're already on a closing bracket "find_matching_bracket_fuzzy" will return
+ // the position of the opening bracket.
+ if closing < opening {
+ (opening, closing) = (closing, opening);
+ }
+
+ // In case found brackets are partially inside current selection.
+ if range.from() < opening || closing < range.to() - 1 {
+ closing = next_grapheme_boundary(text, closing);
+ } else {
+ skip -= 1;
+ if skip != 0 {
+ closing = next_grapheme_boundary(text, closing);
+ }
+ }
+ }
+
+ // Keep the original direction.
+ if let Direction::Forward = range.direction() {
+ Ok((opening, closing))
+ } else {
+ Ok((closing, opening))
+ }
+}
+fn find_nth_closest_pairs_plain(
+ text: RopeSlice,
+ range: Range,
+ mut skip: usize,
+) -> Result<(usize, usize)> {
let mut stack = Vec::with_capacity(2);
let pos = range.from();
let mut close_pos = pos.saturating_sub(1);
@@ -67,7 +101,7 @@ pub fn find_nth_closest_pairs_pos(
for ch in text.chars_at(pos) {
close_pos += 1;
- if is_open_pair(ch) {
+ if is_open_bracket(ch) {
// Track open pairs encountered so that we can step over
// the corresponding close pairs that will come up further
// down the loop. We want to find a lone close pair whose
@@ -76,7 +110,7 @@ pub fn find_nth_closest_pairs_pos(
continue;
}
- if !is_close_pair(ch) {
+ if !is_close_bracket(ch) {
// We don't care if this character isn't a brace pair item,
// so short circuit here.
continue;
@@ -157,7 +191,11 @@ pub fn find_nth_pairs_pos(
)
};
- Option::zip(open, close).ok_or(Error::PairNotFound)
+ // preserve original direction
+ match range.direction() {
+ Direction::Forward => Option::zip(open, close).ok_or(Error::PairNotFound),
+ Direction::Backward => Option::zip(close, open).ok_or(Error::PairNotFound),
+ }
}
fn find_nth_open_pair(
@@ -249,6 +287,7 @@ fn find_nth_close_pair(
/// are automatically detected around each cursor (note that this may result
/// in them selecting different surround characters for each selection).
pub fn get_surround_pos(
+ syntax: Option<&Syntax>,
text: RopeSlice,
selection: &Selection,
ch: Option<char>,
@@ -257,9 +296,13 @@ pub fn get_surround_pos(
let mut change_pos = Vec::new();
for &range in selection {
- let (open_pos, close_pos) = match ch {
- Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?,
- None => find_nth_closest_pairs_pos(text, range, skip)?,
+ let (open_pos, close_pos) = {
+ let range_raw = match ch {
+ Some(ch) => find_nth_pairs_pos(text, ch, range, skip)?,
+ None => find_nth_closest_pairs_pos(syntax, text, range, skip)?,
+ };
+ let range = Range::new(range_raw.0, range_raw.1);
+ (range.from(), range.to())
};
if change_pos.contains(&open_pos) || change_pos.contains(&close_pos) {
return Err(Error::CursorOverlap);
diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs
index bf00a458..41230126 100644
--- a/helix-core/src/textobject.rs
+++ b/helix-core/src/textobject.rs
@@ -7,9 +7,9 @@ use crate::chars::{categorize_char, char_is_whitespace, CharCategory};
use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary};
use crate::line_ending::rope_is_line_ending;
use crate::movement::Direction;
-use crate::surround;
use crate::syntax::LanguageConfiguration;
use crate::Range;
+use crate::{surround, Syntax};
fn find_word_boundary(slice: RopeSlice, mut pos: usize, direction: Direction, long: bool) -> usize {
use CharCategory::{Eol, Whitespace};
@@ -199,25 +199,28 @@ pub fn textobject_paragraph(
}
pub fn textobject_pair_surround(
+ syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
ch: char,
count: usize,
) -> Range {
- textobject_pair_surround_impl(slice, range, textobject, Some(ch), count)
+ textobject_pair_surround_impl(syntax, slice, range, textobject, Some(ch), count)
}
pub fn textobject_pair_surround_closest(
+ syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
count: usize,
) -> Range {
- textobject_pair_surround_impl(slice, range, textobject, None, count)
+ textobject_pair_surround_impl(syntax, slice, range, textobject, None, count)
}
fn textobject_pair_surround_impl(
+ syntax: Option<&Syntax>,
slice: RopeSlice,
range: Range,
textobject: TextObject,
@@ -226,8 +229,7 @@ fn textobject_pair_surround_impl(
) -> Range {
let pair_pos = match ch {
Some(ch) => surround::find_nth_pairs_pos(slice, ch, range, count),
- // Automatically find the closest surround pairs
- None => surround::find_nth_closest_pairs_pos(slice, range, count),
+ None => surround::find_nth_closest_pairs_pos(syntax, slice, range, count),
};
pair_pos
.map(|(anchor, head)| match textobject {
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index cc7b84c4..8610a204 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -5409,13 +5409,22 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
'e' => textobject_treesitter("entry", range),
'p' => textobject::textobject_paragraph(text, range, objtype, count),
'm' => textobject::textobject_pair_surround_closest(
- text, range, objtype, count,
+ doc.syntax(),
+ text,
+ range,
+ objtype,
+ count,
),
'g' => textobject_change(range),
// TODO: cancel new ranges if inconsistent surround matches across lines
- ch if !ch.is_ascii_alphanumeric() => {
- textobject::textobject_pair_surround(text, range, objtype, ch, count)
- }
+ ch if !ch.is_ascii_alphanumeric() => textobject::textobject_pair_surround(
+ doc.syntax(),
+ text,
+ range,
+ objtype,
+ ch,
+ count,
+ ),
_ => range,
}
});
@@ -5440,7 +5449,7 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) {
("c", "Comment (tree-sitter)"),
("T", "Test (tree-sitter)"),
("e", "Data structure entry (tree-sitter)"),
- ("m", "Closest surrounding pair"),
+ ("m", "Closest surrounding pair (tree-sitter)"),
("g", "Change"),
(" ", "... or any character acting as a pair"),
];
@@ -5454,7 +5463,7 @@ fn surround_add(cx: &mut Context) {
// surround_len is the number of new characters being added.
let (open, close, surround_len) = match event.char() {
Some(ch) => {
- let (o, c) = surround::get_pair(ch);
+ let (o, c) = match_brackets::get_pair(ch);
let mut open = Tendril::new();
open.push(o);
let mut close = Tendril::new();
@@ -5505,13 +5514,14 @@ fn surround_replace(cx: &mut Context) {
let text = doc.text().slice(..);
let selection = doc.selection(view.id);
- let change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) {
- Ok(c) => c,
- Err(err) => {
- cx.editor.set_error(err.to_string());
- return;
- }
- };
+ let change_pos =
+ match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
+ Ok(c) => c,
+ Err(err) => {
+ cx.editor.set_error(err.to_string());
+ return;
+ }
+ };
let selection = selection.clone();
let ranges: SmallVec<[Range; 1]> = change_pos.iter().map(|&p| Range::point(p)).collect();
@@ -5526,7 +5536,7 @@ fn surround_replace(cx: &mut Context) {
Some(to) => to,
None => return doc.set_selection(view.id, selection),
};
- let (open, close) = surround::get_pair(to);
+ let (open, close) = match_brackets::get_pair(to);
// the changeset has to be sorted to allow nested surrounds
let mut sorted_pos: Vec<(usize, char)> = Vec::new();
@@ -5563,13 +5573,14 @@ fn surround_delete(cx: &mut Context) {
let text = doc.text().slice(..);
let selection = doc.selection(view.id);
- let mut change_pos = match surround::get_surround_pos(text, selection, surround_ch, count) {
- Ok(c) => c,
- Err(err) => {
- cx.editor.set_error(err.to_string());
- return;
- }
- };
+ let mut change_pos =
+ match surround::get_surround_pos(doc.syntax(), text, selection, surround_ch, count) {
+ Ok(c) => c,
+ Err(err) => {
+ cx.editor.set_error(err.to_string());
+ return;
+ }
+ };
change_pos.sort_unstable(); // the changeset has to be sorted to allow nested surrounds
let transaction =
Transaction::change(doc.text(), change_pos.into_iter().map(|p| (p, p + 1, None)));