Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-stdx/src/rope.rs')
| -rw-r--r-- | helix-stdx/src/rope.rs | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs index 7e2549f5..2695555e 100644 --- a/helix-stdx/src/rope.rs +++ b/helix-stdx/src/rope.rs @@ -3,6 +3,7 @@ use std::ops::{Bound, RangeBounds}; pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex}; pub use regex_cursor::regex_automata::util::syntax::Config; use regex_cursor::{Input as RegexInput, RopeyCursor}; +use ropey::str_utils::byte_to_char_idx; use ropey::RopeSlice; pub trait RopeSliceExt<'a>: Sized { @@ -16,6 +17,23 @@ pub trait RopeSliceExt<'a>: Sized { fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>; fn first_non_whitespace_char(self) -> Option<usize>; fn last_non_whitespace_char(self) -> Option<usize>; + /// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary + /// this function behaves the same as `byte_to_char` but if `byte_idx` is + /// not a valid char boundary (so within a char) this will return the next + /// char index. + /// + /// # Example + /// + /// ``` + /// # use ropey::RopeSlice; + /// # use helix_stdx::rope::RopeSliceExt; + /// let text = RopeSlice::from("😆"); + /// for i in 1..text.len_bytes() { + /// assert_eq!(text.byte_to_char(i), 0); + /// assert_eq!(text.byte_to_next_char(i), 1); + /// } + /// ``` + fn byte_to_next_char(self, byte_idx: usize) -> usize; } impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { @@ -75,4 +93,48 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> { .position(|ch| !ch.is_whitespace()) .map(|pos| self.len_chars() - pos - 1) } + + /// returns the char idx of `byte_idx`, if `byte_idx` is + /// a char boundary this function behaves the same as `byte_to_char` + fn byte_to_next_char(self, mut byte_idx: usize) -> usize { + let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx); + byte_idx -= chunk_byte_off; + let is_char_boundary = + is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0)); + chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize + } +} + +// copied from std +#[inline] +const fn is_utf8_char_boundary(b: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (b as i8) >= -0x40 +} + +#[cfg(test)] +mod tests { + use ropey::RopeSlice; + + use crate::rope::RopeSliceExt; + + #[test] + fn next_char_at_byte() { + for i in 0..=6 { + assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i); + } + for char_idx in 0..10 { + let len = "😆".len(); + assert_eq!( + RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len), + char_idx + ); + for i in 1..=len { + assert_eq!( + RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len + i), + char_idx + 1 + ); + } + } + } } |