Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-stdx/src/rope.rs')
| -rw-r--r-- | helix-stdx/src/rope.rs | 774 |
1 files changed, 26 insertions, 748 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 1ba4bc53..2695555e 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -1,15 +1,11 @@ -//! Functions and types for working with [RopeSlice] -use std::fmt; use std::ops::{Bound, RangeBounds}; pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex}; pub use regex_cursor::regex_automata::util::syntax::Config; use regex_cursor::{Input as RegexInput, RopeyCursor}; -use ropey::iter::Chunks; +use ropey::str_utils::byte_to_char_idx; use ropey::RopeSlice; -use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; -/// Additional utility functions for [RopeSlice] pub trait RopeSliceExt<'a>: Sized { fn ends_with(self, text: &str) -> bool; fn starts_with(self, text: &str) -> bool; @@ -21,260 +17,23 @@ pub trait RopeSliceExt<'a>: Sized { fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>; fn first_non_whitespace_char(self) -> Option<usize>; fn last_non_whitespace_char(self) -> Option<usize>; - /// Finds the closest byte index not exceeding `byte_idx` which lies on a character boundary. - /// - /// If `byte_idx` already lies on a character boundary then it is returned as-is. When - /// `byte_idx` lies between two character boundaries, this function returns the byte index of - /// the lesser / earlier / left-hand-side boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("β"); // three bytes: e2 8c 9a - /// assert_eq!(text.floor_char_boundary(0), 0); - /// assert_eq!(text.floor_char_boundary(1), 0); - /// assert_eq!(text.floor_char_boundary(2), 0); - /// assert_eq!(text.floor_char_boundary(3), 3); - /// ``` - fn floor_char_boundary(self, byte_idx: usize) -> usize; - /// Finds the closest byte index not below `byte_idx` which lies on a character boundary. - /// - /// If `byte_idx` already lies on a character boundary then it is returned as-is. When - /// `byte_idx` lies between two character boundaries, this function returns the byte index of - /// the greater / later / right-hand-side boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("β"); // three bytes: e2 8c 9a - /// assert_eq!(text.ceil_char_boundary(0), 0); - /// assert_eq!(text.ceil_char_boundary(1), 3); - /// assert_eq!(text.ceil_char_boundary(2), 3); - /// assert_eq!(text.ceil_char_boundary(3), 3); - /// ``` - fn ceil_char_boundary(self, byte_idx: usize) -> usize; - /// Checks whether the given `byte_idx` lies on a character boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("β"); // three bytes: e2 8c 9a - /// assert!(text.is_char_boundary(0)); - /// assert!(!text.is_char_boundary(1)); - /// assert!(!text.is_char_boundary(2)); - /// assert!(text.is_char_boundary(3)); - /// ``` - #[allow(clippy::wrong_self_convention)] - fn is_char_boundary(self, byte_idx: usize) -> bool; - /// Finds the closest byte index not exceeding `byte_idx` which lies on a grapheme cluster - /// boundary. - /// - /// If `byte_idx` already lies on a grapheme cluster boundary then it is returned as-is. When - /// `byte_idx` lies between two grapheme cluster boundaries, this function returns the byte - /// index of the lesser / earlier / left-hand-side boundary. - /// - /// `byte_idx` does not need to be aligned to a character boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a - /// assert_eq!(text.floor_grapheme_boundary(0), 0); - /// assert_eq!(text.floor_grapheme_boundary(1), 0); - /// assert_eq!(text.floor_grapheme_boundary(2), 2); - /// ``` - fn floor_grapheme_boundary(self, byte_idx: usize) -> usize; - /// Finds the closest byte index not exceeding `byte_idx` which lies on a grapheme cluster - /// boundary. - /// - /// If `byte_idx` already lies on a grapheme cluster boundary then it is returned as-is. When - /// `byte_idx` lies between two grapheme cluster boundaries, this function returns the byte - /// index of the greater / later / right-hand-side boundary. - /// - /// `byte_idx` does not need to be aligned to a character boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a - /// assert_eq!(text.ceil_grapheme_boundary(0), 0); - /// assert_eq!(text.ceil_grapheme_boundary(1), 2); - /// assert_eq!(text.ceil_grapheme_boundary(2), 2); - /// ``` - fn ceil_grapheme_boundary(self, byte_idx: usize) -> usize; - /// Checks whether the `byte_idx` lies on a grapheme cluster boundary. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a - /// assert!(text.is_grapheme_boundary(0)); - /// assert!(!text.is_grapheme_boundary(1)); - /// assert!(text.is_grapheme_boundary(2)); - /// ``` - #[allow(clippy::wrong_self_convention)] - fn is_grapheme_boundary(self, byte_idx: usize) -> bool; - /// Returns an iterator over the grapheme clusters in the slice. + /// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary + /// this function behaves the same as `byte_to_char` but if `byte_idx` is + /// not a valid char boundary (so within a char) this will return the next + /// char index. /// /// # Example /// /// ``` /// # use ropey::RopeSlice; /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let graphemes: Vec<_> = text.graphemes().collect(); - /// assert_eq!(graphemes.as_slice(), &["πΆβπ«οΈ", "π΄ββ οΈ", "πΌοΈ"]); + /// let text = RopeSlice::from("π"); + /// for i in 1..text.len_bytes() { + /// assert_eq!(text.byte_to_char(i), 0); + /// assert_eq!(text.byte_to_next_char(i), 1); + /// } /// ``` - fn graphemes(self) -> RopeGraphemes<'a> { - self.graphemes_at(0) - } - /// Returns an iterator over the grapheme clusters in the slice, reversed. - /// - /// The returned iterator starts at the end of the slice and ends at the beginning of the - /// slice. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let graphemes: Vec<_> = text.graphemes_rev().collect(); - /// assert_eq!(graphemes.as_slice(), &["πΌοΈ", "π΄ββ οΈ", "πΆβπ«οΈ"]); - /// ``` - fn graphemes_rev(self) -> RopeGraphemes<'a>; - /// Returns an iterator over the grapheme clusters in the slice at the given byte index. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// // 14 is the byte index of the pirate flag's starting cluster boundary. - /// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect(); - /// assert_eq!(graphemes.as_slice(), &["π΄ββ οΈ", "πΌοΈ"]); - /// // 27 is the byte index of the pirate flag's ending cluster boundary. - /// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect(); - /// assert_eq!(graphemes.as_slice(), &["π΄ββ οΈ", "πΆβπ«οΈ"]); - /// ``` - fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>; - /// Returns an iterator over the grapheme clusters in a rope and the byte index where each - /// grapheme cluster starts. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let graphemes: Vec<_> = slice.grapheme_indices_at(0).collect(); - /// assert_eq!( - /// graphemes.as_slice(), - /// &[(0, "πΆβπ«οΈ".into()), (14, "π΄ββ οΈ".into()), (27, "πΌοΈ".into())] - /// ); - /// let graphemes: Vec<_> = slice.grapheme_indices_at(slice.len_bytes()).reversed().collect(); - /// assert_eq!( - /// graphemes.as_slice(), - /// &[(27, "πΌοΈ".into()), (14, "π΄ββ οΈ".into()), (0, "πΆβπ«οΈ".into())] - /// ); - /// ``` - fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a>; - /// Finds the byte index of the next grapheme boundary after `byte_idx`. - /// - /// If the byte index lies on the last grapheme cluster in the slice then this function - /// returns `RopeSlice::len_bytes`. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let mut byte_idx = 0; - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πΆβπ«οΈ".into())); - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("π΄ββ οΈ".into())); - /// - /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is - /// // functionally the same as `ceil_grapheme_boundary`. - /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 1), byte_idx); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 2), byte_idx); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 1), slice.next_grapheme_boundary(byte_idx)); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 2), slice.next_grapheme_boundary(byte_idx)); - /// - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πΌοΈ".into())); - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), None); - /// assert_eq!(byte_idx, slice.len_bytes()); - /// ``` - fn next_grapheme_boundary(self, byte_idx: usize) -> usize { - self.nth_next_grapheme_boundary(byte_idx, 1) - } - /// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`. - /// - /// If there are fewer than `n` grapheme clusters after `byte_idx` in the rope then this - /// function returns `RopeSlice::len_bytes`. - /// - /// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more - /// efficient. - fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; - /// Finds the byte index of the previous grapheme boundary before `byte_idx`. - /// - /// If the byte index lies on the first grapheme cluster in the slice then this function - /// returns zero. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let mut byte_idx = text.len_bytes(); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πΌοΈ".into())); - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("π΄ββ οΈ".into())); - /// - /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is - /// // functionally the same as `floor_grapheme_boundary`. - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 1), byte_idx); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 2), byte_idx); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 1), slice.prev_grapheme_boundary(byte_idx)); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 2), slice.prev_grapheme_boundary(byte_idx)); - /// - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πΆβπ«οΈ".into())); - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), None); - /// assert_eq!(byte_idx, 0); - /// ``` - fn prev_grapheme_boundary(self, byte_idx: usize) -> usize { - self.nth_prev_grapheme_boundary(byte_idx, 1) - } - /// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`. - /// - /// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this - /// function returns zero. - /// - /// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more - /// efficient. - fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; + fn byte_to_next_char(self, byte_idx: usize) -> usize; } impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { @@ -284,7 +43,7 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { return false; } self.get_byte_slice(len - text.len()..) - .is_some_and(|end| end == text) + .map_or(false, |end| end == text) } fn starts_with(self, text: &str) -> bool { @@ -292,8 +51,8 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { if len < text.len() { return false; } - self.get_byte_slice(..text.len()) - .is_some_and(|start| start == text) + self.get_byte_slice(..len - text.len()) + .map_or(false, |start| start == text) } fn regex_input(self) -> RegexInput<RopeyCursor<'a>> { @@ -335,234 +94,14 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { .map(|pos| self.len_chars() - pos - 1) } - // These three are adapted from std: - - fn floor_char_boundary(self, byte_idx: usize) -> usize { - if byte_idx >= self.len_bytes() { - self.len_bytes() - } else { - let offset = self - .bytes_at(byte_idx + 1) - .reversed() - .take(4) - .position(is_utf8_char_boundary) - // A char can only be four bytes long so we are guaranteed to find a boundary. - .unwrap(); - - byte_idx - offset - } - } - - fn ceil_char_boundary(self, byte_idx: usize) -> usize { - if byte_idx > self.len_bytes() { - self.len_bytes() - } else { - let upper_bound = self.len_bytes().min(byte_idx + 4); - self.bytes_at(byte_idx) - .position(is_utf8_char_boundary) - .map_or(upper_bound, |pos| pos + byte_idx) - } - } - - fn is_char_boundary(self, byte_idx: usize) -> bool { - if byte_idx == 0 { - return true; - } - - if byte_idx >= self.len_bytes() { - byte_idx == self.len_bytes() - } else { - is_utf8_char_boundary(self.bytes_at(byte_idx).next().unwrap()) - } - } - - fn floor_grapheme_boundary(self, mut byte_idx: usize) -> usize { - if byte_idx >= self.len_bytes() { - return self.len_bytes(); - } - - byte_idx = self.ceil_char_boundary(byte_idx + 1); - - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - loop { - match cursor.prev_boundary(chunk, chunk_byte_idx) { - Ok(None) => return 0, - Ok(Some(boundary)) => return boundary, - Err(GraphemeIncomplete::PrevChunk) => { - let (ch, ch_byte_idx, _, _) = self.chunk_at_byte(chunk_byte_idx - 1); - chunk = ch; - chunk_byte_idx = ch_byte_idx; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - cursor.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - - fn ceil_grapheme_boundary(self, mut byte_idx: usize) -> usize { - if byte_idx >= self.len_bytes() { - return self.len_bytes(); - } - - if byte_idx == 0 { - return 0; - } - - byte_idx = self.floor_char_boundary(byte_idx - 1); - - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - loop { - match cursor.next_boundary(chunk, chunk_byte_idx) { - Ok(None) => return self.len_bytes(), - Ok(Some(boundary)) => return boundary, - Err(GraphemeIncomplete::NextChunk) => { - chunk_byte_idx += chunk.len(); - chunk = self.chunk_at_byte(chunk_byte_idx).0; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - cursor.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - - fn is_grapheme_boundary(self, byte_idx: usize) -> bool { - // The byte must lie on a character boundary to lie on a grapheme cluster boundary. - if !self.is_char_boundary(byte_idx) { - return false; - } - - let (chunk, chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - loop { - match cursor.is_boundary(chunk, chunk_byte_idx) { - Ok(n) => return n, - Err(GraphemeIncomplete::PreContext(n)) => { - let (ctx_chunk, ctx_byte_start, _, _) = self.chunk_at_byte(n - 1); - cursor.provide_context(ctx_chunk, ctx_byte_start); - } - Err(_) => unreachable!(), - } - } - } - - fn graphemes_rev(self) -> RopeGraphemes<'a> { - self.graphemes_at(self.len_bytes()).reversed() - } - - fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx); - let current_chunk = chunks.next().unwrap_or(""); - - RopeGraphemes { - text: self, - chunks, - current_chunk, - chunk_byte_idx, - cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true), - is_reversed: false, - } - } - - fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a> { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - RopeGraphemeIndices { - front_offset: byte_idx, - iter: self.graphemes_at(byte_idx), - is_reversed: false, - } - } - - fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - byte_idx = self.floor_char_boundary(byte_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - // Find the nth next grapheme cluster boundary. - for _ in 0..n { - loop { - match gc.next_boundary(chunk, chunk_byte_idx) { - Ok(None) => return self.len_bytes(), - Ok(Some(boundary)) => { - byte_idx = boundary; - break; - } - Err(GraphemeIncomplete::NextChunk) => { - chunk_byte_idx += chunk.len(); - let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx); - chunk = a; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - - byte_idx - } - - fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - byte_idx = self.ceil_char_boundary(byte_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - for _ in 0..n { - loop { - match gc.prev_boundary(chunk, chunk_byte_idx) { - Ok(None) => return 0, - Ok(Some(boundary)) => { - byte_idx = boundary; - break; - } - Err(GraphemeIncomplete::PrevChunk) => { - let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1); - chunk = a; - chunk_byte_idx = b; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } - } - - byte_idx + /// returns the char idx of `byte_idx`, if `byte_idx` is + /// a char boundary this function behaves the same as `byte_to_char` + fn byte_to_next_char(self, mut byte_idx: usize) -> usize { + let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx); + byte_idx -= chunk_byte_off; + let is_char_boundary = + is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0)); + chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize } } @@ -573,202 +112,6 @@ const fn is_utf8_char_boundary(b: u8) -> bool { (b as i8) >= -0x40 } -/// An iterator over the graphemes of a `RopeSlice`. -/// -/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed -/// like a cursor. This style matches `Bytes` and `Chars` iterator types in Ropey and is more -/// natural and useful for wrapping `GraphemeCursor`. -#[derive(Clone)] -pub struct RopeGraphemes<'a> { - text: RopeSlice<'a>, - chunks: Chunks<'a>, - current_chunk: &'a str, - /// Byte index of the start of the current chunk. - chunk_byte_idx: usize, - cursor: GraphemeCursor, - is_reversed: bool, -} - -impl fmt::Debug for RopeGraphemes<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("RopeGraphemes") - .field("text", &self.text) - .field("chunks", &self.chunks) - .field("current_chunk", &self.current_chunk) - .field("chunk_byte_idx", &self.chunk_byte_idx) - // .field("cursor", &self.cursor) - .field("is_reversed", &self.is_reversed) - .finish() - } -} - -impl<'a> RopeGraphemes<'a> { - #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Option<RopeSlice<'a>> { - if self.is_reversed { - self.prev_impl() - } else { - self.next_impl() - } - } - - pub fn prev(&mut self) -> Option<RopeSlice<'a>> { - if self.is_reversed { - self.next_impl() - } else { - self.prev_impl() - } - } - - pub fn reverse(&mut self) { - self.is_reversed = !self.is_reversed; - } - - #[must_use] - pub fn reversed(mut self) -> Self { - self.reverse(); - self - } - - fn next_impl(&mut self) -> Option<RopeSlice<'a>> { - let a = self.cursor.cur_cursor(); - let b; - loop { - match self - .cursor - .next_boundary(self.current_chunk, self.chunk_byte_idx) - { - Ok(None) => return None, - Ok(Some(boundary)) => { - b = boundary; - break; - } - Err(GraphemeIncomplete::NextChunk) => { - self.chunk_byte_idx += self.current_chunk.len(); - self.current_chunk = self.chunks.next().unwrap_or(""); - } - Err(GraphemeIncomplete::PreContext(idx)) => { - let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); - self.cursor.provide_context(chunk, byte_idx); - } - _ => unreachable!(), - } - } - - if a < self.chunk_byte_idx { - Some(self.text.byte_slice(a..b)) - } else { - let a2 = a - self.chunk_byte_idx; - let b2 = b - self.chunk_byte_idx; - Some((&self.current_chunk[a2..b2]).into()) - } - } - - fn prev_impl(&mut self) -> Option<RopeSlice<'a>> { - let a = self.cursor.cur_cursor(); - let b; - loop { - match self - .cursor - .prev_boundary(self.current_chunk, self.chunk_byte_idx) - { - Ok(None) => return None, - Ok(Some(boundary)) => { - b = boundary; - break; - } - Err(GraphemeIncomplete::PrevChunk) => { - self.current_chunk = self.chunks.prev().unwrap_or(""); - self.chunk_byte_idx -= self.current_chunk.len(); - } - Err(GraphemeIncomplete::PreContext(idx)) => { - let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); - self.cursor.provide_context(chunk, byte_idx); - } - _ => unreachable!(), - } - } - - if a >= self.chunk_byte_idx + self.current_chunk.len() { - Some(self.text.byte_slice(b..a)) - } else { - let a2 = a - self.chunk_byte_idx; - let b2 = b - self.chunk_byte_idx; - Some((&self.current_chunk[b2..a2]).into()) - } - } -} - -impl<'a> Iterator for RopeGraphemes<'a> { - type Item = RopeSlice<'a>; - - fn next(&mut self) -> Option<Self::Item> { - RopeGraphemes::next(self) - } -} - -/// An iterator over the grapheme clusters in a rope and the byte indices where each grapheme -/// cluster starts. -/// -/// This iterator wraps `RopeGraphemes` and is also cursor-like. Use `reverse` or `reversed` to -/// toggle the direction of the iterator. See [RopeGraphemes]. -#[derive(Debug, Clone)] -pub struct RopeGraphemeIndices<'a> { - front_offset: usize, - iter: RopeGraphemes<'a>, - is_reversed: bool, -} - -impl<'a> RopeGraphemeIndices<'a> { - #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Option<(usize, RopeSlice<'a>)> { - if self.is_reversed { - self.prev_impl() - } else { - self.next_impl() - } - } - - pub fn prev(&mut self) -> Option<(usize, RopeSlice<'a>)> { - if self.is_reversed { - self.next_impl() - } else { - self.prev_impl() - } - } - - pub fn reverse(&mut self) { - self.is_reversed = !self.is_reversed; - } - - #[must_use] - pub fn reversed(mut self) -> Self { - self.reverse(); - self - } - - fn next_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> { - let slice = self.iter.next()?; - let idx = self.front_offset; - self.front_offset += slice.len_bytes(); - Some((idx, slice)) - } - - fn prev_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> { - let slice = self.iter.prev()?; - self.front_offset -= slice.len_bytes(); - Some((self.front_offset, slice)) - } -} - -impl<'a> Iterator for RopeGraphemeIndices<'a> { - type Item = (usize, RopeSlice<'a>); - - fn next(&mut self) -> Option<Self::Item> { - RopeGraphemeIndices::next(self) - } -} - #[cfg(test)] mod tests { use ropey::RopeSlice; @@ -776,87 +119,22 @@ mod tests { use crate::rope::RopeSliceExt; #[test] - fn starts_with() { - assert!(RopeSlice::from("asdf").starts_with("a")); - } - - #[test] - fn ends_with() { - assert!(RopeSlice::from("asdf").ends_with("f")); - } - - #[test] - fn char_boundaries() { - let ascii = RopeSlice::from("ascii"); - // When the given index lies on a character boundary, the index should not change. - for byte_idx in 0..=ascii.len_bytes() { - assert_eq!(ascii.floor_char_boundary(byte_idx), byte_idx); - assert_eq!(ascii.ceil_char_boundary(byte_idx), byte_idx); - assert!(ascii.is_char_boundary(byte_idx)); - } - - // This is a polyfill of a method of this trait which was replaced by ceil_char_boundary. - // It returns the _character index_ of the given byte index, rounding up if it does not - // already lie on a character boundary. - fn byte_to_next_char(slice: RopeSlice, byte_idx: usize) -> usize { - slice.byte_to_char(slice.ceil_char_boundary(byte_idx)) - } - + fn next_char_at_byte() { for i in 0..=6 { - assert_eq!(byte_to_next_char(RopeSlice::from("foobar"), i), i); + assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i); } for char_idx in 0..10 { let len = "π".len(); assert_eq!( - byte_to_next_char(RopeSlice::from("ππππππππππ"), char_idx * len), + RopeSlice::from("ππππππππππ").byte_to_next_char(char_idx * len), char_idx ); for i in 1..=len { assert_eq!( - byte_to_next_char(RopeSlice::from("ππππππππππ"), char_idx * len + i), + RopeSlice::from("ππππππππππ").byte_to_next_char(char_idx * len + i), char_idx + 1 ); } } } - - #[test] - fn grapheme_boundaries() { - let ascii = RopeSlice::from("ascii"); - // When the given index lies on a grapheme boundary, the index should not change. - for byte_idx in 0..=ascii.len_bytes() { - assert_eq!(ascii.floor_char_boundary(byte_idx), byte_idx); - assert_eq!(ascii.ceil_char_boundary(byte_idx), byte_idx); - assert!(ascii.is_grapheme_boundary(byte_idx)); - } - - // π΄ββ οΈ: U+1F3F4 U+200D U+2620 U+FE0F - // 13 bytes, hex: f0 9f 8f b4 + e2 80 8d + e2 98 a0 + ef b8 8f - let g = RopeSlice::from("π΄ββ οΈ\r\n"); - let emoji_len = "π΄ββ οΈ".len(); - let end = g.len_bytes(); - - for byte_idx in 0..emoji_len { - assert_eq!(g.floor_grapheme_boundary(byte_idx), 0); - } - for byte_idx in emoji_len..end { - assert_eq!(g.floor_grapheme_boundary(byte_idx), emoji_len); - } - assert_eq!(g.floor_grapheme_boundary(end), end); - - assert_eq!(g.ceil_grapheme_boundary(0), 0); - for byte_idx in 1..=emoji_len { - assert_eq!(g.ceil_grapheme_boundary(byte_idx), emoji_len); - } - for byte_idx in emoji_len + 1..=end { - assert_eq!(g.ceil_grapheme_boundary(byte_idx), end); - } - - assert!(g.is_grapheme_boundary(0)); - assert!(g.is_grapheme_boundary(emoji_len)); - assert!(g.is_grapheme_boundary(end)); - for byte_idx in (1..emoji_len).chain(emoji_len + 1..end) { - assert!(!g.is_grapheme_boundary(byte_idx)); - } - } } |