Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-stdx/src/rope.rs')
| -rw-r--r-- | helix-stdx/src/rope.rs | 430 |
1 files changed, 77 insertions, 353 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 1ba4bc53..9fc348f5 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -1,4 +1,3 @@ -//! Functions and types for working with [RopeSlice] use std::fmt; use std::ops::{Bound, RangeBounds}; @@ -9,7 +8,6 @@ use ropey::iter::Chunks; use ropey::RopeSlice; use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; -/// Additional utility functions for [RopeSlice] pub trait RopeSliceExt<'a>: Sized { fn ends_with(self, text: &str) -> bool; fn starts_with(self, text: &str) -> bool; @@ -137,9 +135,7 @@ pub trait RopeSliceExt<'a>: Sized { /// let graphemes: Vec<_> = text.graphemes().collect(); /// assert_eq!(graphemes.as_slice(), &["πΆβπ«οΈ", "π΄ββ οΈ", "πΌοΈ"]); /// ``` - fn graphemes(self) -> RopeGraphemes<'a> { - self.graphemes_at(0) - } + fn graphemes(self) -> RopeGraphemes<'a>; /// Returns an iterator over the grapheme clusters in the slice, reversed. /// /// The returned iterator starts at the end of the slice and ends at the beginning of the @@ -154,127 +150,7 @@ pub trait RopeSliceExt<'a>: Sized { /// let graphemes: Vec<_> = text.graphemes_rev().collect(); /// assert_eq!(graphemes.as_slice(), &["πΌοΈ", "π΄ββ οΈ", "πΆβπ«οΈ"]); /// ``` - fn graphemes_rev(self) -> RopeGraphemes<'a>; - /// Returns an iterator over the grapheme clusters in the slice at the given byte index. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// // 14 is the byte index of the pirate flag's starting cluster boundary. - /// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect(); - /// assert_eq!(graphemes.as_slice(), &["π΄ββ οΈ", "πΌοΈ"]); - /// // 27 is the byte index of the pirate flag's ending cluster boundary. - /// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect(); - /// assert_eq!(graphemes.as_slice(), &["π΄ββ οΈ", "πΆβπ«οΈ"]); - /// ``` - fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>; - /// Returns an iterator over the grapheme clusters in a rope and the byte index where each - /// grapheme cluster starts. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let graphemes: Vec<_> = slice.grapheme_indices_at(0).collect(); - /// assert_eq!( - /// graphemes.as_slice(), - /// &[(0, "πΆβπ«οΈ".into()), (14, "π΄ββ οΈ".into()), (27, "πΌοΈ".into())] - /// ); - /// let graphemes: Vec<_> = slice.grapheme_indices_at(slice.len_bytes()).reversed().collect(); - /// assert_eq!( - /// graphemes.as_slice(), - /// &[(27, "πΌοΈ".into()), (14, "π΄ββ οΈ".into()), (0, "πΆβπ«οΈ".into())] - /// ); - /// ``` - fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a>; - /// Finds the byte index of the next grapheme boundary after `byte_idx`. - /// - /// If the byte index lies on the last grapheme cluster in the slice then this function - /// returns `RopeSlice::len_bytes`. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let mut byte_idx = 0; - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πΆβπ«οΈ".into())); - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("π΄ββ οΈ".into())); - /// - /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is - /// // functionally the same as `ceil_grapheme_boundary`. - /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 1), byte_idx); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 2), byte_idx); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 1), slice.next_grapheme_boundary(byte_idx)); - /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 2), slice.next_grapheme_boundary(byte_idx)); - /// - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πΌοΈ".into())); - /// byte_idx = slice.next_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).next(), None); - /// assert_eq!(byte_idx, slice.len_bytes()); - /// ``` - fn next_grapheme_boundary(self, byte_idx: usize) -> usize { - self.nth_next_grapheme_boundary(byte_idx, 1) - } - /// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`. - /// - /// If there are fewer than `n` grapheme clusters after `byte_idx` in the rope then this - /// function returns `RopeSlice::len_bytes`. - /// - /// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more - /// efficient. - fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; - /// Finds the byte index of the previous grapheme boundary before `byte_idx`. - /// - /// If the byte index lies on the first grapheme cluster in the slice then this function - /// returns zero. - /// - /// # Example - /// - /// ``` - /// # use ropey::Rope; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = Rope::from_str("πΆβπ«οΈπ΄ββ οΈπΌοΈ"); - /// let slice = text.slice(..); - /// let mut byte_idx = text.len_bytes(); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πΌοΈ".into())); - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("π΄ββ οΈ".into())); - /// - /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is - /// // functionally the same as `floor_grapheme_boundary`. - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 1), byte_idx); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 2), byte_idx); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 1), slice.prev_grapheme_boundary(byte_idx)); - /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 2), slice.prev_grapheme_boundary(byte_idx)); - /// - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πΆβπ«οΈ".into())); - /// byte_idx = slice.prev_grapheme_boundary(byte_idx); - /// assert_eq!(slice.graphemes_at(byte_idx).prev(), None); - /// assert_eq!(byte_idx, 0); - /// ``` - fn prev_grapheme_boundary(self, byte_idx: usize) -> usize { - self.nth_prev_grapheme_boundary(byte_idx, 1) - } - /// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`. - /// - /// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this - /// function returns zero. - /// - /// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more - /// efficient. - fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize; + fn graphemes_rev(self) -> RevRopeGraphemes<'a>; } impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { @@ -459,110 +335,30 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { } } - fn graphemes_rev(self) -> RopeGraphemes<'a> { - self.graphemes_at(self.len_bytes()).reversed() - } - - fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx); - let current_chunk = chunks.next().unwrap_or(""); - + fn graphemes(self) -> RopeGraphemes<'a> { + let mut chunks = self.chunks(); + let first_chunk = chunks.next().unwrap_or(""); RopeGraphemes { text: self, chunks, - current_chunk, - chunk_byte_idx, - cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true), - is_reversed: false, - } - } - - fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a> { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - RopeGraphemeIndices { - front_offset: byte_idx, - iter: self.graphemes_at(byte_idx), - is_reversed: false, - } - } - - fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - byte_idx = self.floor_char_boundary(byte_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - // Find the nth next grapheme cluster boundary. - for _ in 0..n { - loop { - match gc.next_boundary(chunk, chunk_byte_idx) { - Ok(None) => return self.len_bytes(), - Ok(Some(boundary)) => { - byte_idx = boundary; - break; - } - Err(GraphemeIncomplete::NextChunk) => { - chunk_byte_idx += chunk.len(); - let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx); - chunk = a; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } + cur_chunk: first_chunk, + cur_chunk_start: 0, + cursor: GraphemeCursor::new(0, self.len_bytes(), true), } - - byte_idx } - fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize { - // Bounds check - assert!(byte_idx <= self.len_bytes()); - - byte_idx = self.ceil_char_boundary(byte_idx); - - // Get the chunk with our byte index in it. - let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx); - - // Set up the grapheme cursor. - let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true); - - for _ in 0..n { - loop { - match gc.prev_boundary(chunk, chunk_byte_idx) { - Ok(None) => return 0, - Ok(Some(boundary)) => { - byte_idx = boundary; - break; - } - Err(GraphemeIncomplete::PrevChunk) => { - let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1); - chunk = a; - chunk_byte_idx = b; - } - Err(GraphemeIncomplete::PreContext(n)) => { - let ctx_chunk = self.chunk_at_byte(n - 1).0; - gc.provide_context(ctx_chunk, n - ctx_chunk.len()); - } - _ => unreachable!(), - } - } + fn graphemes_rev(self) -> RevRopeGraphemes<'a> { + let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes()); + chunks.reverse(); + let first_chunk = chunks.next().unwrap_or(""); + cur_chunk_start -= first_chunk.len(); + RevRopeGraphemes { + text: self, + chunks, + cur_chunk: first_chunk, + cur_chunk_start, + cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true), } - - byte_idx } } @@ -574,19 +370,13 @@ const fn is_utf8_char_boundary(b: u8) -> bool { } /// An iterator over the graphemes of a `RopeSlice`. -/// -/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed -/// like a cursor. This style matches `Bytes` and `Chars` iterator types in Ropey and is more -/// natural and useful for wrapping `GraphemeCursor`. #[derive(Clone)] pub struct RopeGraphemes<'a> { text: RopeSlice<'a>, chunks: Chunks<'a>, - current_chunk: &'a str, - /// Byte index of the start of the current chunk. - chunk_byte_idx: usize, + cur_chunk: &'a str, + cur_chunk_start: usize, cursor: GraphemeCursor, - is_reversed: bool, } impl fmt::Debug for RopeGraphemes<'_> { @@ -594,58 +384,34 @@ impl fmt::Debug for RopeGraphemes<'_> { f.debug_struct("RopeGraphemes") .field("text", &self.text) .field("chunks", &self.chunks) - .field("current_chunk", &self.current_chunk) - .field("chunk_byte_idx", &self.chunk_byte_idx) + .field("cur_chunk", &self.cur_chunk) + .field("cur_chunk_start", &self.cur_chunk_start) // .field("cursor", &self.cursor) - .field("is_reversed", &self.is_reversed) .finish() } } -impl<'a> RopeGraphemes<'a> { - #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Option<RopeSlice<'a>> { - if self.is_reversed { - self.prev_impl() - } else { - self.next_impl() - } - } - - pub fn prev(&mut self) -> Option<RopeSlice<'a>> { - if self.is_reversed { - self.next_impl() - } else { - self.prev_impl() - } - } - - pub fn reverse(&mut self) { - self.is_reversed = !self.is_reversed; - } - - #[must_use] - pub fn reversed(mut self) -> Self { - self.reverse(); - self - } +impl<'a> Iterator for RopeGraphemes<'a> { + type Item = RopeSlice<'a>; - fn next_impl(&mut self) -> Option<RopeSlice<'a>> { + fn next(&mut self) -> Option<Self::Item> { let a = self.cursor.cur_cursor(); let b; loop { match self .cursor - .next_boundary(self.current_chunk, self.chunk_byte_idx) + .next_boundary(self.cur_chunk, self.cur_chunk_start) { - Ok(None) => return None, - Ok(Some(boundary)) => { - b = boundary; + Ok(None) => { + return None; + } + Ok(Some(n)) => { + b = n; break; } Err(GraphemeIncomplete::NextChunk) => { - self.chunk_byte_idx += self.current_chunk.len(); - self.current_chunk = self.chunks.next().unwrap_or(""); + self.cur_chunk_start += self.cur_chunk.len(); + self.cur_chunk = self.chunks.next().unwrap_or(""); } Err(GraphemeIncomplete::PreContext(idx)) => { let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); @@ -655,31 +421,59 @@ impl<'a> RopeGraphemes<'a> { } } - if a < self.chunk_byte_idx { + if a < self.cur_chunk_start { Some(self.text.byte_slice(a..b)) } else { - let a2 = a - self.chunk_byte_idx; - let b2 = b - self.chunk_byte_idx; - Some((&self.current_chunk[a2..b2]).into()) + let a2 = a - self.cur_chunk_start; + let b2 = b - self.cur_chunk_start; + Some((&self.cur_chunk[a2..b2]).into()) } } +} - fn prev_impl(&mut self) -> Option<RopeSlice<'a>> { +/// An iterator over the graphemes of a `RopeSlice` in reverse. +#[derive(Clone)] +pub struct RevRopeGraphemes<'a> { + text: RopeSlice<'a>, + chunks: Chunks<'a>, + cur_chunk: &'a str, + cur_chunk_start: usize, + cursor: GraphemeCursor, +} + +impl fmt::Debug for RevRopeGraphemes<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RevRopeGraphemes") + .field("text", &self.text) + .field("chunks", &self.chunks) + .field("cur_chunk", &self.cur_chunk) + .field("cur_chunk_start", &self.cur_chunk_start) + // .field("cursor", &self.cursor) + .finish() + } +} + +impl<'a> Iterator for RevRopeGraphemes<'a> { + type Item = RopeSlice<'a>; + + fn next(&mut self) -> Option<Self::Item> { let a = self.cursor.cur_cursor(); let b; loop { match self .cursor - .prev_boundary(self.current_chunk, self.chunk_byte_idx) + .prev_boundary(self.cur_chunk, self.cur_chunk_start) { - Ok(None) => return None, - Ok(Some(boundary)) => { - b = boundary; + Ok(None) => { + return None; + } + Ok(Some(n)) => { + b = n; break; } Err(GraphemeIncomplete::PrevChunk) => { - self.current_chunk = self.chunks.prev().unwrap_or(""); - self.chunk_byte_idx -= self.current_chunk.len(); + self.cur_chunk = self.chunks.next().unwrap_or(""); + self.cur_chunk_start -= self.cur_chunk.len(); } Err(GraphemeIncomplete::PreContext(idx)) => { let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1)); @@ -689,84 +483,14 @@ impl<'a> RopeGraphemes<'a> { } } - if a >= self.chunk_byte_idx + self.current_chunk.len() { + if a >= self.cur_chunk_start + self.cur_chunk.len() { Some(self.text.byte_slice(b..a)) } else { - let a2 = a - self.chunk_byte_idx; - let b2 = b - self.chunk_byte_idx; - Some((&self.current_chunk[b2..a2]).into()) - } - } -} - -impl<'a> Iterator for RopeGraphemes<'a> { - type Item = RopeSlice<'a>; - - fn next(&mut self) -> Option<Self::Item> { - RopeGraphemes::next(self) - } -} - -/// An iterator over the grapheme clusters in a rope and the byte indices where each grapheme -/// cluster starts. -/// -/// This iterator wraps `RopeGraphemes` and is also cursor-like. Use `reverse` or `reversed` to -/// toggle the direction of the iterator. See [RopeGraphemes]. -#[derive(Debug, Clone)] -pub struct RopeGraphemeIndices<'a> { - front_offset: usize, - iter: RopeGraphemes<'a>, - is_reversed: bool, -} - -impl<'a> RopeGraphemeIndices<'a> { - #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> Option<(usize, RopeSlice<'a>)> { - if self.is_reversed { - self.prev_impl() - } else { - self.next_impl() - } - } - - pub fn prev(&mut self) -> Option<(usize, RopeSlice<'a>)> { - if self.is_reversed { - self.next_impl() - } else { - self.prev_impl() + let a2 = a - self.cur_chunk_start; + let b2 = b - self.cur_chunk_start; + Some((&self.cur_chunk[b2..a2]).into()) } } - - pub fn reverse(&mut self) { - self.is_reversed = !self.is_reversed; - } - - #[must_use] - pub fn reversed(mut self) -> Self { - self.reverse(); - self - } - - fn next_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> { - let slice = self.iter.next()?; - let idx = self.front_offset; - self.front_offset += slice.len_bytes(); - Some((idx, slice)) - } - - fn prev_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> { - let slice = self.iter.prev()?; - self.front_offset -= slice.len_bytes(); - Some((self.front_offset, slice)) - } -} - -impl<'a> Iterator for RopeGraphemeIndices<'a> { - type Item = (usize, RopeSlice<'a>); - - fn next(&mut self) -> Option<Self::Item> { - RopeGraphemeIndices::next(self) - } } #[cfg(test)] |