Unnamed repository; edit this file 'description' to name the repository.
stdx: Replace RopeSliceExt::byte_to_next_char with ceil_char_boundary
The new `RopeSliceExt::ceil_char_boundary` from the parent commits can
be used to implement `RopeSliceExt::byte_to_next_char` when used with
`RopeSlice::byte_to_char`. That function had only one caller and that
caller will eventually disappear when we switch to Ropey v2 and drop
character indexing, so we can drop `byte_to_next_char` now and replace
its caller with `byte_to_char` plus `ceil_char_boundary`.
This change keeps the unit tests for `byte_to_next_char` and checks them
against a polyfill of `byte_to_char` plus `ceil_char_boundary` to ensure
that `byte_to_next_char`'s intended behavior is not changed.
| -rw-r--r-- | helix-stdx/src/rope.rs | 72 | ||||
| -rw-r--r-- | helix-term/src/ui/document.rs | 4 |
2 files changed, 27 insertions, 49 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 0db9bf98..4a1bc59c 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -3,7 +3,6 @@ use std::ops::{Bound, RangeBounds}; pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex}; pub use regex_cursor::regex_automata::util::syntax::Config; use regex_cursor::{Input as RegexInput, RopeyCursor}; -use ropey::str_utils::byte_to_char_idx; use ropey::RopeSlice; pub trait RopeSliceExt<'a>: Sized { @@ -17,23 +16,6 @@ pub trait RopeSliceExt<'a>: Sized { fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>; fn first_non_whitespace_char(self) -> Option<usize>; fn last_non_whitespace_char(self) -> Option<usize>; - /// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary - /// this function behaves the same as `byte_to_char` but if `byte_idx` is - /// not a valid char boundary (so within a char) this will return the next - /// char index. - /// - /// # Example - /// - /// ``` - /// # use ropey::RopeSlice; - /// # use helix_stdx::rope::RopeSliceExt; - /// let text = RopeSlice::from("😆"); - /// for i in 1..text.len_bytes() { - /// assert_eq!(text.byte_to_char(i), 0); - /// assert_eq!(text.byte_to_next_char(i), 1); - /// } - /// ``` - fn byte_to_next_char(self, byte_idx: usize) -> usize; /// Finds the closest byte index not exceeding `byte_idx` which lies on a character boundary. /// /// If `byte_idx` already lies on a character boundary then it is returned as-is. When @@ -130,16 +112,6 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { .map(|pos| self.len_chars() - pos - 1) } - /// returns the char idx of `byte_idx`, if `byte_idx` is - /// a char boundary this function behaves the same as `byte_to_char` - fn byte_to_next_char(self, mut byte_idx: usize) -> usize { - let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx); - byte_idx -= chunk_byte_off; - let is_char_boundary = - is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0)); - chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize - } - // These two are adapted from std's `round_char_boundary` functions: fn floor_char_boundary(self, byte_idx: usize) -> usize { @@ -184,26 +156,6 @@ mod tests { use crate::rope::RopeSliceExt; #[test] - fn next_char_at_byte() { - for i in 0..=6 { - assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i); - } - for char_idx in 0..10 { - let len = "😆".len(); - assert_eq!( - RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len), - char_idx - ); - for i in 1..=len { - assert_eq!( - RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len + i), - char_idx + 1 - ); - } - } - } - - #[test] fn starts_with() { assert!(RopeSlice::from("asdf").starts_with("a")); } @@ -221,5 +173,29 @@ mod tests { assert_eq!(ascii.floor_char_boundary(byte_idx), byte_idx); assert_eq!(ascii.ceil_char_boundary(byte_idx), byte_idx); } + + // This is a polyfill of a method of this trait which was replaced by ceil_char_boundary. + // It returns the _character index_ of the given byte index, rounding up if it does not + // already lie on a character boundary. + fn byte_to_next_char(slice: RopeSlice, byte_idx: usize) -> usize { + slice.byte_to_char(slice.ceil_char_boundary(byte_idx)) + } + + for i in 0..=6 { + assert_eq!(byte_to_next_char(RopeSlice::from("foobar"), i), i); + } + for char_idx in 0..10 { + let len = "😆".len(); + assert_eq!( + byte_to_next_char(RopeSlice::from("😆😆😆😆😆😆😆😆😆😆"), char_idx * len), + char_idx + ); + for i in 1..=len { + assert_eq!( + byte_to_next_char(RopeSlice::from("😆😆😆😆😆😆😆😆😆😆"), char_idx * len + i), + char_idx + 1 + ); + } + } } } diff --git a/helix-term/src/ui/document.rs b/helix-term/src/ui/document.rs index d1a74e7e..8423ae8e 100644 --- a/helix-term/src/ui/document.rs +++ b/helix-term/src/ui/document.rs @@ -60,7 +60,9 @@ impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> { acc.patch(self.theme.highlight(span.0)) }); if self.kind == StyleIterKind::BaseHighlights { - end = self.text.byte_to_next_char(end); + // Move the end byte index to the nearest character boundary (rounding up) + // and convert it to a character index. + end = self.text.byte_to_char(self.text.ceil_char_boundary(end)); } return Some((style, end)); } |