Unnamed repository; edit this file 'description' to name the repository.
stdx: Add `RopeSliceExt::(nth_){next,prev}_grapheme_boundary`
These functions mirror those in `helix_core::graphemes` but operate directly on byte indices rather than character indices. These are meant to be used as we transition to Ropey v2 and always use byte indices.
Michael Davis 8 months ago
parent 362e97e · commit f5dc824
-rw-r--r--helix-stdx/src/rope.rs157
1 files changed, 157 insertions, 0 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index 9fc348f5..315e98e5 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -151,6 +151,88 @@ pub trait RopeSliceExt<'a>: Sized {
/// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
/// ```
fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
+ /// Finds the byte index of the next grapheme boundary after `byte_idx`.
+ ///
+ /// If the byte index lies on the last grapheme cluster in the slice then this function
+ /// returns `RopeSlice::len_bytes`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::Rope;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+ /// let slice = text.slice(..);
+ /// let mut byte_idx = 0;
+ /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ˜Άβ€πŸŒ«οΈ".into()));
+ /// byte_idx = slice.next_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ΄β€β˜ οΈ".into()));
+ ///
+ /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
+ /// // functionally the same as `ceil_grapheme_boundary`.
+ /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 1), byte_idx);
+ /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 2), byte_idx);
+ /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 1), slice.next_grapheme_boundary(byte_idx));
+ /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 2), slice.next_grapheme_boundary(byte_idx));
+ ///
+ /// byte_idx = slice.next_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ–ΌοΈ".into()));
+ /// byte_idx = slice.next_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).next(), None);
+ /// assert_eq!(byte_idx, slice.len_bytes());
+ /// ```
+ fn next_grapheme_boundary(self, byte_idx: usize) -> usize {
+ self.nth_next_grapheme_boundary(byte_idx, 1)
+ }
+ /// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`.
+ ///
+ /// If there are fewer than `n` grapheme clusters after `byte_idx` in the rope then this
+ /// function returns `RopeSlice::len_bytes`.
+ ///
+ /// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more
+ /// efficient.
+ fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
+ /// Finds the byte index of the previous grapheme boundary before `byte_idx`.
+ ///
+ /// If the byte index lies on the first grapheme cluster in the slice then this function
+ /// returns zero.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::Rope;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+ /// let slice = text.slice(..);
+ /// let mut byte_idx = text.len_bytes();
+ /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ–ΌοΈ".into()));
+ /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ΄β€β˜ οΈ".into()));
+ ///
+ /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
+ /// // functionally the same as `floor_grapheme_boundary`.
+ /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 1), byte_idx);
+ /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 2), byte_idx);
+ /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 1), slice.prev_grapheme_boundary(byte_idx));
+ /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 2), slice.prev_grapheme_boundary(byte_idx));
+ ///
+ /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ˜Άβ€πŸŒ«οΈ".into()));
+ /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
+ /// assert_eq!(slice.graphemes_at(byte_idx).prev(), None);
+ /// assert_eq!(byte_idx, 0);
+ /// ```
+ fn prev_grapheme_boundary(self, byte_idx: usize) -> usize {
+ self.nth_prev_grapheme_boundary(byte_idx, 1)
+ }
+ /// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`.
+ ///
+ /// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this
+ /// function returns zero.
+ ///
+ /// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more
+ /// efficient.
+ fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@@ -360,6 +442,81 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
}
}
+
+ fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
+ // Bounds check
+ assert!(byte_idx <= self.len_bytes());
+
+ byte_idx = self.floor_char_boundary(byte_idx);
+
+ // Get the chunk with our byte index in it.
+ let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
+
+ // Set up the grapheme cursor.
+ let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
+
+ // Find the nth next grapheme cluster boundary.
+ for _ in 0..n {
+ loop {
+ match gc.next_boundary(chunk, chunk_byte_idx) {
+ Ok(None) => return self.len_bytes(),
+ Ok(Some(boundary)) => {
+ byte_idx = boundary;
+ break;
+ }
+ Err(GraphemeIncomplete::NextChunk) => {
+ chunk_byte_idx += chunk.len();
+ let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx);
+ chunk = a;
+ }
+ Err(GraphemeIncomplete::PreContext(n)) => {
+ let ctx_chunk = self.chunk_at_byte(n - 1).0;
+ gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ }
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ byte_idx
+ }
+
+ fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
+ // Bounds check
+ assert!(byte_idx <= self.len_bytes());
+
+ byte_idx = self.ceil_char_boundary(byte_idx);
+
+ // Get the chunk with our byte index in it.
+ let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
+
+ // Set up the grapheme cursor.
+ let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
+
+ for _ in 0..n {
+ loop {
+ match gc.prev_boundary(chunk, chunk_byte_idx) {
+ Ok(None) => return 0,
+ Ok(Some(boundary)) => {
+ byte_idx = boundary;
+ break;
+ }
+ Err(GraphemeIncomplete::PrevChunk) => {
+ let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1);
+ chunk = a;
+ chunk_byte_idx = b;
+ }
+ Err(GraphemeIncomplete::PreContext(n)) => {
+ let ctx_chunk = self.chunk_at_byte(n - 1).0;
+ gc.provide_context(ctx_chunk, n - ctx_chunk.len());
+ }
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ byte_idx
+ }
}
// copied from std