Unnamed repository; edit this file 'description' to name the repository.
Move rope grapheme iterators from core to stdx
Michael Davis 2025-01-27
parent 51832b0 · commit 20151a5
-rw-r--r--helix-core/src/doc_formatter.rs6
-rw-r--r--helix-core/src/graphemes.rs158
-rw-r--r--helix-core/src/indent.rs4
-rw-r--r--helix-core/src/lib.rs1
-rw-r--r--helix-core/src/position.rs12
-rw-r--r--helix-core/src/selection.rs4
-rw-r--r--helix-stdx/src/rope.rs179
-rw-r--r--helix-term/src/commands.rs24
8 files changed, 210 insertions, 178 deletions
diff --git a/helix-core/src/doc_formatter.rs b/helix-core/src/doc_formatter.rs
index cbc884d9..d7470942 100644
--- a/helix-core/src/doc_formatter.rs
+++ b/helix-core/src/doc_formatter.rs
@@ -19,10 +19,12 @@ mod test;
use unicode_segmentation::{Graphemes, UnicodeSegmentation};
+use helix_stdx::rope::{RopeGraphemes, RopeSliceExt};
+
use crate::graphemes::{Grapheme, GraphemeStr};
use crate::syntax::Highlight;
use crate::text_annotations::TextAnnotations;
-use crate::{Position, RopeGraphemes, RopeSlice};
+use crate::{Position, RopeSlice};
/// TODO make Highlight a u32 to reduce the size of this enum to a single word.
#[derive(Debug, Clone, Copy)]
@@ -219,7 +221,7 @@ impl<'t> DocumentFormatter<'t> {
text_fmt,
annotations,
visual_pos: Position { row: 0, col: 0 },
- graphemes: RopeGraphemes::new(text.slice(block_char_idx..)),
+ graphemes: text.slice(block_char_idx..).graphemes(),
char_pos: block_char_idx,
exhausted: false,
indent_level: None,
diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs
index 98dfa365..e6adeee9 100644
--- a/helix-core/src/graphemes.rs
+++ b/helix-core/src/graphemes.rs
@@ -1,7 +1,7 @@
//! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents.
//!
//! Based on <https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs>
-use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice};
+use ropey::{str_utils::byte_to_char_idx, RopeSlice};
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
use unicode_width::UnicodeWidthStr;
@@ -270,162 +270,6 @@ pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {
}
}
-/// An iterator over the graphemes of a `RopeSlice`.
-#[derive(Clone)]
-pub struct RopeGraphemes<'a> {
- text: RopeSlice<'a>,
- chunks: Chunks<'a>,
- cur_chunk: &'a str,
- cur_chunk_start: usize,
- cursor: GraphemeCursor,
-}
-
-impl fmt::Debug for RopeGraphemes<'_> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("RopeGraphemes")
- .field("text", &self.text)
- .field("chunks", &self.chunks)
- .field("cur_chunk", &self.cur_chunk)
- .field("cur_chunk_start", &self.cur_chunk_start)
- // .field("cursor", &self.cursor)
- .finish()
- }
-}
-
-impl RopeGraphemes<'_> {
- #[must_use]
- pub fn new(slice: RopeSlice) -> RopeGraphemes {
- let mut chunks = slice.chunks();
- let first_chunk = chunks.next().unwrap_or("");
- RopeGraphemes {
- text: slice,
- chunks,
- cur_chunk: first_chunk,
- cur_chunk_start: 0,
- cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
- }
- }
-}
-
-impl<'a> Iterator for RopeGraphemes<'a> {
- type Item = RopeSlice<'a>;
-
- fn next(&mut self) -> Option<RopeSlice<'a>> {
- let a = self.cursor.cur_cursor();
- let b;
- loop {
- match self
- .cursor
- .next_boundary(self.cur_chunk, self.cur_chunk_start)
- {
- Ok(None) => {
- return None;
- }
- Ok(Some(n)) => {
- b = n;
- break;
- }
- Err(GraphemeIncomplete::NextChunk) => {
- self.cur_chunk_start += self.cur_chunk.len();
- self.cur_chunk = self.chunks.next().unwrap_or("");
- }
- Err(GraphemeIncomplete::PreContext(idx)) => {
- let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
- self.cursor.provide_context(chunk, byte_idx);
- }
- _ => unreachable!(),
- }
- }
-
- if a < self.cur_chunk_start {
- Some(self.text.byte_slice(a..b))
- } else {
- let a2 = a - self.cur_chunk_start;
- let b2 = b - self.cur_chunk_start;
- Some((&self.cur_chunk[a2..b2]).into())
- }
- }
-}
-
-/// An iterator over the graphemes of a `RopeSlice` in reverse.
-#[derive(Clone)]
-pub struct RevRopeGraphemes<'a> {
- text: RopeSlice<'a>,
- chunks: Chunks<'a>,
- cur_chunk: &'a str,
- cur_chunk_start: usize,
- cursor: GraphemeCursor,
-}
-
-impl fmt::Debug for RevRopeGraphemes<'_> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("RevRopeGraphemes")
- .field("text", &self.text)
- .field("chunks", &self.chunks)
- .field("cur_chunk", &self.cur_chunk)
- .field("cur_chunk_start", &self.cur_chunk_start)
- // .field("cursor", &self.cursor)
- .finish()
- }
-}
-
-impl RevRopeGraphemes<'_> {
- #[must_use]
- pub fn new(slice: RopeSlice) -> RevRopeGraphemes {
- let (mut chunks, mut cur_chunk_start, _, _) = slice.chunks_at_byte(slice.len_bytes());
- chunks.reverse();
- let first_chunk = chunks.next().unwrap_or("");
- cur_chunk_start -= first_chunk.len();
- RevRopeGraphemes {
- text: slice,
- chunks,
- cur_chunk: first_chunk,
- cur_chunk_start,
- cursor: GraphemeCursor::new(slice.len_bytes(), slice.len_bytes(), true),
- }
- }
-}
-
-impl<'a> Iterator for RevRopeGraphemes<'a> {
- type Item = RopeSlice<'a>;
-
- fn next(&mut self) -> Option<RopeSlice<'a>> {
- let a = self.cursor.cur_cursor();
- let b;
- loop {
- match self
- .cursor
- .prev_boundary(self.cur_chunk, self.cur_chunk_start)
- {
- Ok(None) => {
- return None;
- }
- Ok(Some(n)) => {
- b = n;
- break;
- }
- Err(GraphemeIncomplete::PrevChunk) => {
- self.cur_chunk = self.chunks.next().unwrap_or("");
- self.cur_chunk_start -= self.cur_chunk.len();
- }
- Err(GraphemeIncomplete::PreContext(idx)) => {
- let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
- self.cursor.provide_context(chunk, byte_idx);
- }
- _ => unreachable!(),
- }
- }
-
- if a >= self.cur_chunk_start + self.cur_chunk.len() {
- Some(self.text.byte_slice(b..a))
- } else {
- let a2 = a - self.cur_chunk_start;
- let b2 = b - self.cur_chunk_start;
- Some((&self.cur_chunk[b2..a2]).into())
- }
- }
-}
-
/// A highly compressed Cow<'a, str> that holds
/// atmost u31::MAX bytes and is readonly
pub struct GraphemeStr<'a> {
diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs
index 93eb6ce2..04ce9a28 100644
--- a/helix-core/src/indent.rs
+++ b/helix-core/src/indent.rs
@@ -8,7 +8,7 @@ use crate::{
graphemes::{grapheme_width, tab_width_at},
syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node,
- Position, Rope, RopeGraphemes, RopeSlice, Tendril,
+ Position, Rope, RopeSlice, Tendril,
};
/// Enum representing indentation style.
@@ -200,7 +200,7 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us
/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
fn whitespace_with_same_width(text: RopeSlice) -> String {
let mut s = String::new();
- for grapheme in RopeGraphemes::new(text) {
+ for grapheme in text.graphemes() {
if grapheme == "\t" {
s.push('\t');
} else {
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index 2bf75f69..89c960ed 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -54,7 +54,6 @@ pub type Tendril = SmartString<smartstring::LazyCompact>;
#[doc(inline)]
pub use {regex, tree_sitter};
-pub use graphemes::RopeGraphemes;
pub use position::{
char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions,
visual_offset_from_anchor, visual_offset_from_block, Position, VisualOffsetError,
diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index 1b378911..cea0b607 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs
@@ -4,10 +4,12 @@ use std::{
ops::{Add, AddAssign, Sub, SubAssign},
};
+use helix_stdx::rope::RopeSliceExt;
+
use crate::{
chars::char_is_line_ending,
doc_formatter::{DocumentFormatter, TextFormat},
- graphemes::{ensure_grapheme_boundary_prev, grapheme_width, RopeGraphemes},
+ graphemes::{ensure_grapheme_boundary_prev, grapheme_width},
line_ending::line_end_char_index,
text_annotations::TextAnnotations,
RopeSlice,
@@ -101,7 +103,7 @@ pub fn coords_at_pos(text: RopeSlice, pos: usize) -> Position {
let line_start = text.line_to_char(line);
let pos = ensure_grapheme_boundary_prev(text, pos);
- let col = RopeGraphemes::new(text.slice(line_start..pos)).count();
+ let col = text.slice(line_start..pos).graphemes().count();
Position::new(line, col)
}
@@ -126,7 +128,7 @@ pub fn visual_coords_at_pos(text: RopeSlice, pos: usize, tab_width: usize) -> Po
let mut col = 0;
- for grapheme in RopeGraphemes::new(text.slice(line_start..pos)) {
+ for grapheme in text.slice(line_start..pos).graphemes() {
if grapheme == "\t" {
col += tab_width - (col % tab_width);
} else {
@@ -275,7 +277,7 @@ pub fn pos_at_coords(text: RopeSlice, coords: Position, limit_before_line_ending
};
let mut col_char_offset = 0;
- for (i, g) in RopeGraphemes::new(text.slice(line_start..line_end)).enumerate() {
+ for (i, g) in text.slice(line_start..line_end).graphemes().enumerate() {
if i == col {
break;
}
@@ -306,7 +308,7 @@ pub fn pos_at_visual_coords(text: RopeSlice, coords: Position, tab_width: usize)
let mut col_char_offset = 0;
let mut cols_remaining = col;
- for grapheme in RopeGraphemes::new(text.slice(line_start..line_end)) {
+ for grapheme in text.slice(line_start..line_end).graphemes() {
let grapheme_width = if grapheme == "\t" {
tab_width - ((col - cols_remaining) % tab_width)
} else {
diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs
index a134a06e..1db2d619 100644
--- a/helix-core/src/selection.rs
+++ b/helix-core/src/selection.rs
@@ -9,7 +9,7 @@ use crate::{
},
line_ending::get_line_ending,
movement::Direction,
- Assoc, ChangeSet, RopeGraphemes, RopeSlice,
+ Assoc, ChangeSet, RopeSlice,
};
use helix_stdx::range::is_subset;
use helix_stdx::rope::{self, RopeSliceExt};
@@ -379,7 +379,7 @@ impl Range {
/// Returns true if this Range covers a single grapheme in the given text
pub fn is_single_grapheme(&self, doc: RopeSlice) -> bool {
- let mut graphemes = RopeGraphemes::new(doc.slice(self.from()..self.to()));
+ let mut graphemes = doc.slice(self.from()..self.to()).graphemes();
let first = graphemes.next();
let second = graphemes.next();
first.is_some() && second.is_none()
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index eac1450b..9fc348f5 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -1,8 +1,10 @@
+use std::fmt;
use std::ops::{Bound, RangeBounds};
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
pub use regex_cursor::regex_automata::util::syntax::Config;
use regex_cursor::{Input as RegexInput, RopeyCursor};
+use ropey::iter::Chunks;
use ropey::RopeSlice;
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
@@ -122,6 +124,33 @@ pub trait RopeSliceExt<'a>: Sized {
/// ```
#[allow(clippy::wrong_self_convention)]
fn is_grapheme_boundary(self, byte_idx: usize) -> bool;
+ /// Returns an iterator over the grapheme clusters in the slice.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::RopeSlice;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = RopeSlice::from("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+ /// let graphemes: Vec<_> = text.graphemes().collect();
+ /// assert_eq!(graphemes.as_slice(), &["πŸ˜Άβ€πŸŒ«οΈ", "πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
+ /// ```
+ fn graphemes(self) -> RopeGraphemes<'a>;
+ /// Returns an iterator over the grapheme clusters in the slice, reversed.
+ ///
+ /// The returned iterator starts at the end of the slice and ends at the beginning of the
+ /// slice.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::RopeSlice;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = RopeSlice::from("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+ /// let graphemes: Vec<_> = text.graphemes_rev().collect();
+ /// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
+ /// ```
+ fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@@ -305,6 +334,32 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
}
}
}
+
+ fn graphemes(self) -> RopeGraphemes<'a> {
+ let mut chunks = self.chunks();
+ let first_chunk = chunks.next().unwrap_or("");
+ RopeGraphemes {
+ text: self,
+ chunks,
+ cur_chunk: first_chunk,
+ cur_chunk_start: 0,
+ cursor: GraphemeCursor::new(0, self.len_bytes(), true),
+ }
+ }
+
+ fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
+ let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
+ chunks.reverse();
+ let first_chunk = chunks.next().unwrap_or("");
+ cur_chunk_start -= first_chunk.len();
+ RevRopeGraphemes {
+ text: self,
+ chunks,
+ cur_chunk: first_chunk,
+ cur_chunk_start,
+ cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
+ }
+ }
}
// copied from std
@@ -314,6 +369,130 @@ const fn is_utf8_char_boundary(b: u8) -> bool {
(b as i8) >= -0x40
}
+/// An iterator over the graphemes of a `RopeSlice`.
+#[derive(Clone)]
+pub struct RopeGraphemes<'a> {
+ text: RopeSlice<'a>,
+ chunks: Chunks<'a>,
+ cur_chunk: &'a str,
+ cur_chunk_start: usize,
+ cursor: GraphemeCursor,
+}
+
+impl fmt::Debug for RopeGraphemes<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("RopeGraphemes")
+ .field("text", &self.text)
+ .field("chunks", &self.chunks)
+ .field("cur_chunk", &self.cur_chunk)
+ .field("cur_chunk_start", &self.cur_chunk_start)
+ // .field("cursor", &self.cursor)
+ .finish()
+ }
+}
+
+impl<'a> Iterator for RopeGraphemes<'a> {
+ type Item = RopeSlice<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let a = self.cursor.cur_cursor();
+ let b;
+ loop {
+ match self
+ .cursor
+ .next_boundary(self.cur_chunk, self.cur_chunk_start)
+ {
+ Ok(None) => {
+ return None;
+ }
+ Ok(Some(n)) => {
+ b = n;
+ break;
+ }
+ Err(GraphemeIncomplete::NextChunk) => {
+ self.cur_chunk_start += self.cur_chunk.len();
+ self.cur_chunk = self.chunks.next().unwrap_or("");
+ }
+ Err(GraphemeIncomplete::PreContext(idx)) => {
+ let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
+ self.cursor.provide_context(chunk, byte_idx);
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ if a < self.cur_chunk_start {
+ Some(self.text.byte_slice(a..b))
+ } else {
+ let a2 = a - self.cur_chunk_start;
+ let b2 = b - self.cur_chunk_start;
+ Some((&self.cur_chunk[a2..b2]).into())
+ }
+ }
+}
+
+/// An iterator over the graphemes of a `RopeSlice` in reverse.
+#[derive(Clone)]
+pub struct RevRopeGraphemes<'a> {
+ text: RopeSlice<'a>,
+ chunks: Chunks<'a>,
+ cur_chunk: &'a str,
+ cur_chunk_start: usize,
+ cursor: GraphemeCursor,
+}
+
+impl fmt::Debug for RevRopeGraphemes<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("RevRopeGraphemes")
+ .field("text", &self.text)
+ .field("chunks", &self.chunks)
+ .field("cur_chunk", &self.cur_chunk)
+ .field("cur_chunk_start", &self.cur_chunk_start)
+ // .field("cursor", &self.cursor)
+ .finish()
+ }
+}
+
+impl<'a> Iterator for RevRopeGraphemes<'a> {
+ type Item = RopeSlice<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let a = self.cursor.cur_cursor();
+ let b;
+ loop {
+ match self
+ .cursor
+ .prev_boundary(self.cur_chunk, self.cur_chunk_start)
+ {
+ Ok(None) => {
+ return None;
+ }
+ Ok(Some(n)) => {
+ b = n;
+ break;
+ }
+ Err(GraphemeIncomplete::PrevChunk) => {
+ self.cur_chunk = self.chunks.next().unwrap_or("");
+ self.cur_chunk_start -= self.cur_chunk.len();
+ }
+ Err(GraphemeIncomplete::PreContext(idx)) => {
+ let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
+ self.cursor.provide_context(chunk, byte_idx);
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ if a >= self.cur_chunk_start + self.cur_chunk.len() {
+ Some(self.text.byte_slice(b..a))
+ } else {
+ let a2 = a - self.cur_chunk_start;
+ let b2 = b - self.cur_chunk_start;
+ Some((&self.cur_chunk[b2..a2]).into())
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use ropey::RopeSlice;
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 851f9035..2a511cbe 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -20,7 +20,7 @@ use helix_core::{
comment,
doc_formatter::TextFormat,
encoding, find_workspace,
- graphemes::{self, next_grapheme_boundary, RevRopeGraphemes},
+ graphemes::{self, next_grapheme_boundary},
history::UndoKind,
increment,
indent::{self, IndentStyle},
@@ -35,8 +35,8 @@ use helix_core::{
text_annotations::{Overlay, TextAnnotations},
textobject,
unicode::width::UnicodeWidthChar,
- visual_offset_from_block, Deletion, LineEnding, Position, Range, Rope, RopeGraphemes,
- RopeReader, RopeSlice, Selection, SmallVec, Syntax, Tendril, Transaction,
+ visual_offset_from_block, Deletion, LineEnding, Position, Range, Rope, RopeReader, RopeSlice,
+ Selection, SmallVec, Syntax, Tendril, Transaction,
};
use helix_view::{
document::{FormatterError, Mode, SCRATCH_BUFFER_NAME},
@@ -1681,10 +1681,12 @@ fn replace(cx: &mut Context) {
if let Some(ch) = ch {
let transaction = Transaction::change_by_selection(doc.text(), selection, |range| {
if !range.is_empty() {
- let text: Tendril =
- RopeGraphemes::new(doc.text().slice(range.from()..range.to()))
- .map(|_g| ch)
- .collect();
+ let text: Tendril = doc
+ .text()
+ .slice(range.from()..range.to())
+ .graphemes()
+ .map(|_g| ch)
+ .collect();
(range.from(), range.to(), Some(text))
} else {
// No change.
@@ -6574,7 +6576,9 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) {
// madeup of word characters. The latter condition is needed because
// move_next_word_end simply treats a sequence of characters from
// the same char class as a word so `=<` would also count as a word.
- let add_label = RevRopeGraphemes::new(text.slice(..cursor_fwd.head))
+ let add_label = text
+ .slice(..cursor_fwd.head)
+ .graphemes_rev()
.take(2)
.take_while(|g| g.chars().all(char_is_word))
.count()
@@ -6600,7 +6604,9 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) {
// madeup of word characters. The latter condition is needed because
// move_prev_word_start simply treats a sequence of characters from
// the same char class as a word so `=<` would also count as a word.
- let add_label = RopeGraphemes::new(text.slice(cursor_rev.head..))
+ let add_label = text
+ .slice(cursor_rev.head..)
+ .graphemes()
.take(2)
.take_while(|g| g.chars().all(char_is_word))
.count()