Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-stdx/src/rope.rs')
-rw-r--r--helix-stdx/src/rope.rs430
1 files changed, 77 insertions, 353 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index 1ba4bc53..9fc348f5 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -1,4 +1,3 @@
-//! Functions and types for working with [RopeSlice]
use std::fmt;
use std::ops::{Bound, RangeBounds};
@@ -9,7 +8,6 @@ use ropey::iter::Chunks;
use ropey::RopeSlice;
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
-/// Additional utility functions for [RopeSlice]
pub trait RopeSliceExt<'a>: Sized {
fn ends_with(self, text: &str) -> bool;
fn starts_with(self, text: &str) -> bool;
@@ -137,9 +135,7 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes().collect();
/// assert_eq!(graphemes.as_slice(), &["πŸ˜Άβ€πŸŒ«οΈ", "πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
/// ```
- fn graphemes(self) -> RopeGraphemes<'a> {
- self.graphemes_at(0)
- }
+ fn graphemes(self) -> RopeGraphemes<'a>;
/// Returns an iterator over the grapheme clusters in the slice, reversed.
///
/// The returned iterator starts at the end of the slice and ends at the beginning of the
@@ -154,127 +150,7 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
/// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
/// ```
- fn graphemes_rev(self) -> RopeGraphemes<'a>;
- /// Returns an iterator over the grapheme clusters in the slice at the given byte index.
- ///
- /// # Example
- ///
- /// ```
- /// # use ropey::Rope;
- /// # use helix_stdx::rope::RopeSliceExt;
- /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
- /// // 14 is the byte index of the pirate flag's starting cluster boundary.
- /// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect();
- /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
- /// // 27 is the byte index of the pirate flag's ending cluster boundary.
- /// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect();
- /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
- /// ```
- fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>;
- /// Returns an iterator over the grapheme clusters in a rope and the byte index where each
- /// grapheme cluster starts.
- ///
- /// # Example
- ///
- /// ```
- /// # use ropey::Rope;
- /// # use helix_stdx::rope::RopeSliceExt;
- /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
- /// let slice = text.slice(..);
- /// let graphemes: Vec<_> = slice.grapheme_indices_at(0).collect();
- /// assert_eq!(
- /// graphemes.as_slice(),
- /// &[(0, "πŸ˜Άβ€πŸŒ«οΈ".into()), (14, "πŸ΄β€β˜ οΈ".into()), (27, "πŸ–ΌοΈ".into())]
- /// );
- /// let graphemes: Vec<_> = slice.grapheme_indices_at(slice.len_bytes()).reversed().collect();
- /// assert_eq!(
- /// graphemes.as_slice(),
- /// &[(27, "πŸ–ΌοΈ".into()), (14, "πŸ΄β€β˜ οΈ".into()), (0, "πŸ˜Άβ€πŸŒ«οΈ".into())]
- /// );
- /// ```
- fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a>;
- /// Finds the byte index of the next grapheme boundary after `byte_idx`.
- ///
- /// If the byte index lies on the last grapheme cluster in the slice then this function
- /// returns `RopeSlice::len_bytes`.
- ///
- /// # Example
- ///
- /// ```
- /// # use ropey::Rope;
- /// # use helix_stdx::rope::RopeSliceExt;
- /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
- /// let slice = text.slice(..);
- /// let mut byte_idx = 0;
- /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ˜Άβ€πŸŒ«οΈ".into()));
- /// byte_idx = slice.next_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ΄β€β˜ οΈ".into()));
- ///
- /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
- /// // functionally the same as `ceil_grapheme_boundary`.
- /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 1), byte_idx);
- /// assert_eq!(slice.next_grapheme_boundary(byte_idx - 2), byte_idx);
- /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 1), slice.next_grapheme_boundary(byte_idx));
- /// assert_eq!(slice.next_grapheme_boundary(byte_idx + 2), slice.next_grapheme_boundary(byte_idx));
- ///
- /// byte_idx = slice.next_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).next(), Some("πŸ–ΌοΈ".into()));
- /// byte_idx = slice.next_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).next(), None);
- /// assert_eq!(byte_idx, slice.len_bytes());
- /// ```
- fn next_grapheme_boundary(self, byte_idx: usize) -> usize {
- self.nth_next_grapheme_boundary(byte_idx, 1)
- }
- /// Finds the byte index of the `n`th grapheme cluster after the given `byte_idx`.
- ///
- /// If there are fewer than `n` grapheme clusters after `byte_idx` in the rope then this
- /// function returns `RopeSlice::len_bytes`.
- ///
- /// This is functionally equivalent to calling `next_grapheme_boundary` `n` times but is more
- /// efficient.
- fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
- /// Finds the byte index of the previous grapheme boundary before `byte_idx`.
- ///
- /// If the byte index lies on the first grapheme cluster in the slice then this function
- /// returns zero.
- ///
- /// # Example
- ///
- /// ```
- /// # use ropey::Rope;
- /// # use helix_stdx::rope::RopeSliceExt;
- /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
- /// let slice = text.slice(..);
- /// let mut byte_idx = text.len_bytes();
- /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ–ΌοΈ".into()));
- /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ΄β€β˜ οΈ".into()));
- ///
- /// // If `byte_idx` does not lie on a character or grapheme boundary then this function is
- /// // functionally the same as `floor_grapheme_boundary`.
- /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 1), byte_idx);
- /// assert_eq!(slice.prev_grapheme_boundary(byte_idx + 2), byte_idx);
- /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 1), slice.prev_grapheme_boundary(byte_idx));
- /// assert_eq!(slice.prev_grapheme_boundary(byte_idx - 2), slice.prev_grapheme_boundary(byte_idx));
- ///
- /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).prev(), Some("πŸ˜Άβ€πŸŒ«οΈ".into()));
- /// byte_idx = slice.prev_grapheme_boundary(byte_idx);
- /// assert_eq!(slice.graphemes_at(byte_idx).prev(), None);
- /// assert_eq!(byte_idx, 0);
- /// ```
- fn prev_grapheme_boundary(self, byte_idx: usize) -> usize {
- self.nth_prev_grapheme_boundary(byte_idx, 1)
- }
- /// Finds the byte index of the `n`th grapheme cluster before the given `byte_idx`.
- ///
- /// If there are fewer than `n` grapheme clusters before `byte_idx` in the rope then this
- /// function returns zero.
- ///
- /// This is functionally equivalent to calling `prev_grapheme_boundary` `n` times but is more
- /// efficient.
- fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
+ fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@@ -459,110 +335,30 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
}
}
- fn graphemes_rev(self) -> RopeGraphemes<'a> {
- self.graphemes_at(self.len_bytes()).reversed()
- }
-
- fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> {
- // Bounds check
- assert!(byte_idx <= self.len_bytes());
-
- let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx);
- let current_chunk = chunks.next().unwrap_or("");
-
+ fn graphemes(self) -> RopeGraphemes<'a> {
+ let mut chunks = self.chunks();
+ let first_chunk = chunks.next().unwrap_or("");
RopeGraphemes {
text: self,
chunks,
- current_chunk,
- chunk_byte_idx,
- cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true),
- is_reversed: false,
- }
- }
-
- fn grapheme_indices_at(self, byte_idx: usize) -> RopeGraphemeIndices<'a> {
- // Bounds check
- assert!(byte_idx <= self.len_bytes());
- RopeGraphemeIndices {
- front_offset: byte_idx,
- iter: self.graphemes_at(byte_idx),
- is_reversed: false,
- }
- }
-
- fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
- // Bounds check
- assert!(byte_idx <= self.len_bytes());
-
- byte_idx = self.floor_char_boundary(byte_idx);
-
- // Get the chunk with our byte index in it.
- let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
-
- // Set up the grapheme cursor.
- let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
-
- // Find the nth next grapheme cluster boundary.
- for _ in 0..n {
- loop {
- match gc.next_boundary(chunk, chunk_byte_idx) {
- Ok(None) => return self.len_bytes(),
- Ok(Some(boundary)) => {
- byte_idx = boundary;
- break;
- }
- Err(GraphemeIncomplete::NextChunk) => {
- chunk_byte_idx += chunk.len();
- let (a, _, _, _) = self.chunk_at_byte(chunk_byte_idx);
- chunk = a;
- }
- Err(GraphemeIncomplete::PreContext(n)) => {
- let ctx_chunk = self.chunk_at_byte(n - 1).0;
- gc.provide_context(ctx_chunk, n - ctx_chunk.len());
- }
- _ => unreachable!(),
- }
- }
+ cur_chunk: first_chunk,
+ cur_chunk_start: 0,
+ cursor: GraphemeCursor::new(0, self.len_bytes(), true),
}
-
- byte_idx
}
- fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
- // Bounds check
- assert!(byte_idx <= self.len_bytes());
-
- byte_idx = self.ceil_char_boundary(byte_idx);
-
- // Get the chunk with our byte index in it.
- let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
-
- // Set up the grapheme cursor.
- let mut gc = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
-
- for _ in 0..n {
- loop {
- match gc.prev_boundary(chunk, chunk_byte_idx) {
- Ok(None) => return 0,
- Ok(Some(boundary)) => {
- byte_idx = boundary;
- break;
- }
- Err(GraphemeIncomplete::PrevChunk) => {
- let (a, b, _, _) = self.chunk_at_byte(chunk_byte_idx - 1);
- chunk = a;
- chunk_byte_idx = b;
- }
- Err(GraphemeIncomplete::PreContext(n)) => {
- let ctx_chunk = self.chunk_at_byte(n - 1).0;
- gc.provide_context(ctx_chunk, n - ctx_chunk.len());
- }
- _ => unreachable!(),
- }
- }
+ fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
+ let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
+ chunks.reverse();
+ let first_chunk = chunks.next().unwrap_or("");
+ cur_chunk_start -= first_chunk.len();
+ RevRopeGraphemes {
+ text: self,
+ chunks,
+ cur_chunk: first_chunk,
+ cur_chunk_start,
+ cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
}
-
- byte_idx
}
}
@@ -574,19 +370,13 @@ const fn is_utf8_char_boundary(b: u8) -> bool {
}
/// An iterator over the graphemes of a `RopeSlice`.
-///
-/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed
-/// like a cursor. This style matches `Bytes` and `Chars` iterator types in Ropey and is more
-/// natural and useful for wrapping `GraphemeCursor`.
#[derive(Clone)]
pub struct RopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
- current_chunk: &'a str,
- /// Byte index of the start of the current chunk.
- chunk_byte_idx: usize,
+ cur_chunk: &'a str,
+ cur_chunk_start: usize,
cursor: GraphemeCursor,
- is_reversed: bool,
}
impl fmt::Debug for RopeGraphemes<'_> {
@@ -594,58 +384,34 @@ impl fmt::Debug for RopeGraphemes<'_> {
f.debug_struct("RopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
- .field("current_chunk", &self.current_chunk)
- .field("chunk_byte_idx", &self.chunk_byte_idx)
+ .field("cur_chunk", &self.cur_chunk)
+ .field("cur_chunk_start", &self.cur_chunk_start)
// .field("cursor", &self.cursor)
- .field("is_reversed", &self.is_reversed)
.finish()
}
}
-impl<'a> RopeGraphemes<'a> {
- #[allow(clippy::should_implement_trait)]
- pub fn next(&mut self) -> Option<RopeSlice<'a>> {
- if self.is_reversed {
- self.prev_impl()
- } else {
- self.next_impl()
- }
- }
-
- pub fn prev(&mut self) -> Option<RopeSlice<'a>> {
- if self.is_reversed {
- self.next_impl()
- } else {
- self.prev_impl()
- }
- }
-
- pub fn reverse(&mut self) {
- self.is_reversed = !self.is_reversed;
- }
-
- #[must_use]
- pub fn reversed(mut self) -> Self {
- self.reverse();
- self
- }
+impl<'a> Iterator for RopeGraphemes<'a> {
+ type Item = RopeSlice<'a>;
- fn next_impl(&mut self) -> Option<RopeSlice<'a>> {
+ fn next(&mut self) -> Option<Self::Item> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
- .next_boundary(self.current_chunk, self.chunk_byte_idx)
+ .next_boundary(self.cur_chunk, self.cur_chunk_start)
{
- Ok(None) => return None,
- Ok(Some(boundary)) => {
- b = boundary;
+ Ok(None) => {
+ return None;
+ }
+ Ok(Some(n)) => {
+ b = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
- self.chunk_byte_idx += self.current_chunk.len();
- self.current_chunk = self.chunks.next().unwrap_or("");
+ self.cur_chunk_start += self.cur_chunk.len();
+ self.cur_chunk = self.chunks.next().unwrap_or("");
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
@@ -655,31 +421,59 @@ impl<'a> RopeGraphemes<'a> {
}
}
- if a < self.chunk_byte_idx {
+ if a < self.cur_chunk_start {
Some(self.text.byte_slice(a..b))
} else {
- let a2 = a - self.chunk_byte_idx;
- let b2 = b - self.chunk_byte_idx;
- Some((&self.current_chunk[a2..b2]).into())
+ let a2 = a - self.cur_chunk_start;
+ let b2 = b - self.cur_chunk_start;
+ Some((&self.cur_chunk[a2..b2]).into())
}
}
+}
- fn prev_impl(&mut self) -> Option<RopeSlice<'a>> {
+/// An iterator over the graphemes of a `RopeSlice` in reverse.
+#[derive(Clone)]
+pub struct RevRopeGraphemes<'a> {
+ text: RopeSlice<'a>,
+ chunks: Chunks<'a>,
+ cur_chunk: &'a str,
+ cur_chunk_start: usize,
+ cursor: GraphemeCursor,
+}
+
+impl fmt::Debug for RevRopeGraphemes<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("RevRopeGraphemes")
+ .field("text", &self.text)
+ .field("chunks", &self.chunks)
+ .field("cur_chunk", &self.cur_chunk)
+ .field("cur_chunk_start", &self.cur_chunk_start)
+ // .field("cursor", &self.cursor)
+ .finish()
+ }
+}
+
+impl<'a> Iterator for RevRopeGraphemes<'a> {
+ type Item = RopeSlice<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
- .prev_boundary(self.current_chunk, self.chunk_byte_idx)
+ .prev_boundary(self.cur_chunk, self.cur_chunk_start)
{
- Ok(None) => return None,
- Ok(Some(boundary)) => {
- b = boundary;
+ Ok(None) => {
+ return None;
+ }
+ Ok(Some(n)) => {
+ b = n;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
- self.current_chunk = self.chunks.prev().unwrap_or("");
- self.chunk_byte_idx -= self.current_chunk.len();
+ self.cur_chunk = self.chunks.next().unwrap_or("");
+ self.cur_chunk_start -= self.cur_chunk.len();
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
@@ -689,84 +483,14 @@ impl<'a> RopeGraphemes<'a> {
}
}
- if a >= self.chunk_byte_idx + self.current_chunk.len() {
+ if a >= self.cur_chunk_start + self.cur_chunk.len() {
Some(self.text.byte_slice(b..a))
} else {
- let a2 = a - self.chunk_byte_idx;
- let b2 = b - self.chunk_byte_idx;
- Some((&self.current_chunk[b2..a2]).into())
- }
- }
-}
-
-impl<'a> Iterator for RopeGraphemes<'a> {
- type Item = RopeSlice<'a>;
-
- fn next(&mut self) -> Option<Self::Item> {
- RopeGraphemes::next(self)
- }
-}
-
-/// An iterator over the grapheme clusters in a rope and the byte indices where each grapheme
-/// cluster starts.
-///
-/// This iterator wraps `RopeGraphemes` and is also cursor-like. Use `reverse` or `reversed` to
-/// toggle the direction of the iterator. See [RopeGraphemes].
-#[derive(Debug, Clone)]
-pub struct RopeGraphemeIndices<'a> {
- front_offset: usize,
- iter: RopeGraphemes<'a>,
- is_reversed: bool,
-}
-
-impl<'a> RopeGraphemeIndices<'a> {
- #[allow(clippy::should_implement_trait)]
- pub fn next(&mut self) -> Option<(usize, RopeSlice<'a>)> {
- if self.is_reversed {
- self.prev_impl()
- } else {
- self.next_impl()
- }
- }
-
- pub fn prev(&mut self) -> Option<(usize, RopeSlice<'a>)> {
- if self.is_reversed {
- self.next_impl()
- } else {
- self.prev_impl()
+ let a2 = a - self.cur_chunk_start;
+ let b2 = b - self.cur_chunk_start;
+ Some((&self.cur_chunk[b2..a2]).into())
}
}
-
- pub fn reverse(&mut self) {
- self.is_reversed = !self.is_reversed;
- }
-
- #[must_use]
- pub fn reversed(mut self) -> Self {
- self.reverse();
- self
- }
-
- fn next_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> {
- let slice = self.iter.next()?;
- let idx = self.front_offset;
- self.front_offset += slice.len_bytes();
- Some((idx, slice))
- }
-
- fn prev_impl(&mut self) -> Option<(usize, RopeSlice<'a>)> {
- let slice = self.iter.prev()?;
- self.front_offset -= slice.len_bytes();
- Some((self.front_offset, slice))
- }
-}
-
-impl<'a> Iterator for RopeGraphemeIndices<'a> {
- type Item = (usize, RopeSlice<'a>);
-
- fn next(&mut self) -> Option<Self::Item> {
- RopeGraphemeIndices::next(self)
- }
}
#[cfg(test)]