Unnamed repository; edit this file 'description' to name the repository.
stdx: Unify RopeSlice grapheme cluster iterators
This style for RopeGraphemes is identical to Ropey's Chars and Bytes iterators. Being able to move the iterator types like cursors over the bytes/chars/graphemes is useful in some cases. For example see `helix_core::movement::<Chars as CharHelpers>::range_to_target`. This change also adds `RopeSliceExt::graphemes_at` for flexibility. `graphemes` and `graphemes_rev` are now implemented in terms of `graphemes_at` and `RopeGraphemes::reversed`.
Michael Davis 8 months ago
parent f5dc824 · commit 9100bce
-rw-r--r--helix-stdx/src/rope.rs181
1 files changed, 102 insertions, 79 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index 315e98e5..df7a2873 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -135,7 +135,9 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes().collect();
/// assert_eq!(graphemes.as_slice(), &["πŸ˜Άβ€πŸŒ«οΈ", "πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
/// ```
- fn graphemes(self) -> RopeGraphemes<'a>;
+ fn graphemes(self) -> RopeGraphemes<'a> {
+ self.graphemes_at(0)
+ }
/// Returns an iterator over the grapheme clusters in the slice, reversed.
///
/// The returned iterator starts at the end of the slice and ends at the beginning of the
@@ -150,7 +152,23 @@ pub trait RopeSliceExt<'a>: Sized {
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
/// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
/// ```
- fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
+ fn graphemes_rev(self) -> RopeGraphemes<'a>;
+ /// Returns an iterator over the grapheme clusters in the slice at the given byte index.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::Rope;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = Rope::from_str("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+ /// // 14 is the byte index of the pirate flag's starting cluster boundary.
+ /// let graphemes: Vec<_> = text.slice(..).graphemes_at(14).collect();
+ /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
+ /// // 27 is the byte index of the pirate flag's ending cluster boundary.
+ /// let graphemes: Vec<_> = text.slice(..).graphemes_at(27).reversed().collect();
+ /// assert_eq!(graphemes.as_slice(), &["πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
+ /// ```
+ fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a>;
/// Finds the byte index of the next grapheme boundary after `byte_idx`.
///
/// If the byte index lies on the last grapheme cluster in the slice then this function
@@ -417,29 +435,24 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
}
}
- fn graphemes(self) -> RopeGraphemes<'a> {
- let mut chunks = self.chunks();
- let first_chunk = chunks.next().unwrap_or("");
- RopeGraphemes {
- text: self,
- chunks,
- cur_chunk: first_chunk,
- cur_chunk_start: 0,
- cursor: GraphemeCursor::new(0, self.len_bytes(), true),
- }
+ fn graphemes_rev(self) -> RopeGraphemes<'a> {
+ self.graphemes_at(self.len_bytes()).reversed()
}
- fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
- let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
- chunks.reverse();
- let first_chunk = chunks.next().unwrap_or("");
- cur_chunk_start -= first_chunk.len();
- RevRopeGraphemes {
+ fn graphemes_at(self, byte_idx: usize) -> RopeGraphemes<'a> {
+ // Bounds check
+ assert!(byte_idx <= self.len_bytes());
+
+ let (mut chunks, chunk_byte_idx, _, _) = self.chunks_at_byte(byte_idx);
+ let current_chunk = chunks.next().unwrap_or("");
+
+ RopeGraphemes {
text: self,
chunks,
- cur_chunk: first_chunk,
- cur_chunk_start,
- cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
+ current_chunk,
+ chunk_byte_idx,
+ cursor: GraphemeCursor::new(byte_idx, self.len_bytes(), true),
+ is_reversed: false,
}
}
@@ -527,13 +540,19 @@ const fn is_utf8_char_boundary(b: u8) -> bool {
}
/// An iterator over the graphemes of a `RopeSlice`.
+///
+/// This iterator is cursor-like: rather than implementing DoubleEndedIterator it can be reversed
+/// like a cursor. This style matches `Bytes` and `Chars` iterator types in Ropey and is more
+/// natural and useful for wrapping `GraphemeCursor`.
#[derive(Clone)]
pub struct RopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
- cur_chunk: &'a str,
- cur_chunk_start: usize,
+ current_chunk: &'a str,
+ /// Byte index of the start of the current chunk.
+ chunk_byte_idx: usize,
cursor: GraphemeCursor,
+ is_reversed: bool,
}
impl fmt::Debug for RopeGraphemes<'_> {
@@ -541,34 +560,58 @@ impl fmt::Debug for RopeGraphemes<'_> {
f.debug_struct("RopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
- .field("cur_chunk", &self.cur_chunk)
- .field("cur_chunk_start", &self.cur_chunk_start)
+ .field("current_chunk", &self.current_chunk)
+ .field("chunk_byte_idx", &self.chunk_byte_idx)
// .field("cursor", &self.cursor)
+ .field("is_reversed", &self.is_reversed)
.finish()
}
}
-impl<'a> Iterator for RopeGraphemes<'a> {
- type Item = RopeSlice<'a>;
+impl<'a> RopeGraphemes<'a> {
+ #[allow(clippy::should_implement_trait)]
+ pub fn next(&mut self) -> Option<RopeSlice<'a>> {
+ if self.is_reversed {
+ self.prev_impl()
+ } else {
+ self.next_impl()
+ }
+ }
- fn next(&mut self) -> Option<Self::Item> {
+ pub fn prev(&mut self) -> Option<RopeSlice<'a>> {
+ if self.is_reversed {
+ self.next_impl()
+ } else {
+ self.prev_impl()
+ }
+ }
+
+ pub fn reverse(&mut self) {
+ self.is_reversed = !self.is_reversed;
+ }
+
+ #[must_use]
+ pub fn reversed(mut self) -> Self {
+ self.reverse();
+ self
+ }
+
+ fn next_impl(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
- .next_boundary(self.cur_chunk, self.cur_chunk_start)
+ .next_boundary(self.current_chunk, self.chunk_byte_idx)
{
- Ok(None) => {
- return None;
- }
- Ok(Some(n)) => {
- b = n;
+ Ok(None) => return None,
+ Ok(Some(boundary)) => {
+ b = boundary;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
- self.cur_chunk_start += self.cur_chunk.len();
- self.cur_chunk = self.chunks.next().unwrap_or("");
+ self.chunk_byte_idx += self.current_chunk.len();
+ self.current_chunk = self.chunks.next().unwrap_or("");
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
@@ -578,59 +621,31 @@ impl<'a> Iterator for RopeGraphemes<'a> {
}
}
- if a < self.cur_chunk_start {
+ if a < self.chunk_byte_idx {
Some(self.text.byte_slice(a..b))
} else {
- let a2 = a - self.cur_chunk_start;
- let b2 = b - self.cur_chunk_start;
- Some((&self.cur_chunk[a2..b2]).into())
+ let a2 = a - self.chunk_byte_idx;
+ let b2 = b - self.chunk_byte_idx;
+ Some((&self.current_chunk[a2..b2]).into())
}
}
-}
-
-/// An iterator over the graphemes of a `RopeSlice` in reverse.
-#[derive(Clone)]
-pub struct RevRopeGraphemes<'a> {
- text: RopeSlice<'a>,
- chunks: Chunks<'a>,
- cur_chunk: &'a str,
- cur_chunk_start: usize,
- cursor: GraphemeCursor,
-}
-
-impl fmt::Debug for RevRopeGraphemes<'_> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- f.debug_struct("RevRopeGraphemes")
- .field("text", &self.text)
- .field("chunks", &self.chunks)
- .field("cur_chunk", &self.cur_chunk)
- .field("cur_chunk_start", &self.cur_chunk_start)
- // .field("cursor", &self.cursor)
- .finish()
- }
-}
-impl<'a> Iterator for RevRopeGraphemes<'a> {
- type Item = RopeSlice<'a>;
-
- fn next(&mut self) -> Option<Self::Item> {
+ fn prev_impl(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
- .prev_boundary(self.cur_chunk, self.cur_chunk_start)
+ .prev_boundary(self.current_chunk, self.chunk_byte_idx)
{
- Ok(None) => {
- return None;
- }
- Ok(Some(n)) => {
- b = n;
+ Ok(None) => return None,
+ Ok(Some(boundary)) => {
+ b = boundary;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
- self.cur_chunk = self.chunks.next().unwrap_or("");
- self.cur_chunk_start -= self.cur_chunk.len();
+ self.current_chunk = self.chunks.prev().unwrap_or("");
+ self.chunk_byte_idx -= self.current_chunk.len();
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
@@ -640,16 +655,24 @@ impl<'a> Iterator for RevRopeGraphemes<'a> {
}
}
- if a >= self.cur_chunk_start + self.cur_chunk.len() {
+ if a >= self.chunk_byte_idx + self.current_chunk.len() {
Some(self.text.byte_slice(b..a))
} else {
- let a2 = a - self.cur_chunk_start;
- let b2 = b - self.cur_chunk_start;
- Some((&self.cur_chunk[b2..a2]).into())
+ let a2 = a - self.chunk_byte_idx;
+ let b2 = b - self.chunk_byte_idx;
+ Some((&self.current_chunk[b2..a2]).into())
}
}
}
+impl<'a> Iterator for RopeGraphemes<'a> {
+ type Item = RopeSlice<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ RopeGraphemes::next(self)
+ }
+}
+
#[cfg(test)]
mod tests {
use ropey::RopeSlice;