Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--helix-core/src/graphemes.rs17
-rw-r--r--helix-stdx/src/rope.rs62
-rw-r--r--helix-term/src/ui/document.rs23
-rw-r--r--helix-term/src/ui/editor.rs22
4 files changed, 88 insertions, 36 deletions
diff --git a/helix-core/src/graphemes.rs b/helix-core/src/graphemes.rs
index 7cb5cd06..a02d6e4d 100644
--- a/helix-core/src/graphemes.rs
+++ b/helix-core/src/graphemes.rs
@@ -278,23 +278,6 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize
}
}
-/// Returns the passed byte index if it's already a grapheme boundary,
-/// or the next grapheme boundary byte index if not.
-#[must_use]
-#[inline]
-pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize {
- if byte_idx == 0 {
- byte_idx
- } else {
- // TODO: optimize so we're not constructing grapheme cursor twice
- if is_grapheme_boundary_byte(slice, byte_idx) {
- byte_idx
- } else {
- next_grapheme_boundary_byte(slice, byte_idx)
- }
- }
-}
-
/// Returns whether the given char position is a grapheme boundary.
#[must_use]
pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {
diff --git a/helix-stdx/src/rope.rs b/helix-stdx/src/rope.rs
index 7e2549f5..2695555e 100644
--- a/helix-stdx/src/rope.rs
+++ b/helix-stdx/src/rope.rs
@@ -3,6 +3,7 @@ use std::ops::{Bound, RangeBounds};
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
pub use regex_cursor::regex_automata::util::syntax::Config;
use regex_cursor::{Input as RegexInput, RopeyCursor};
+use ropey::str_utils::byte_to_char_idx;
use ropey::RopeSlice;
pub trait RopeSliceExt<'a>: Sized {
@@ -16,6 +17,23 @@ pub trait RopeSliceExt<'a>: Sized {
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
fn first_non_whitespace_char(self) -> Option<usize>;
fn last_non_whitespace_char(self) -> Option<usize>;
+ /// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary
+ /// this function behaves the same as `byte_to_char` but if `byte_idx` is
+ /// not a valid char boundary (so within a char) this will return the next
+ /// char index.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use ropey::RopeSlice;
+ /// # use helix_stdx::rope::RopeSliceExt;
+ /// let text = RopeSlice::from("😆");
+ /// for i in 1..text.len_bytes() {
+ /// assert_eq!(text.byte_to_char(i), 0);
+ /// assert_eq!(text.byte_to_next_char(i), 1);
+ /// }
+ /// ```
+ fn byte_to_next_char(self, byte_idx: usize) -> usize;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@@ -75,4 +93,48 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
.position(|ch| !ch.is_whitespace())
.map(|pos| self.len_chars() - pos - 1)
}
+
+ /// returns the char idx of `byte_idx`, if `byte_idx` is
+ /// a char boundary this function behaves the same as `byte_to_char`
+ fn byte_to_next_char(self, mut byte_idx: usize) -> usize {
+ let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx);
+ byte_idx -= chunk_byte_off;
+ let is_char_boundary =
+ is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0));
+ chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize
+ }
+}
+
+// copied from std
+#[inline]
+const fn is_utf8_char_boundary(b: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (b as i8) >= -0x40
+}
+
+#[cfg(test)]
+mod tests {
+ use ropey::RopeSlice;
+
+ use crate::rope::RopeSliceExt;
+
+ #[test]
+ fn next_char_at_byte() {
+ for i in 0..=6 {
+ assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i);
+ }
+ for char_idx in 0..10 {
+ let len = "😆".len();
+ assert_eq!(
+ RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len),
+ char_idx
+ );
+ for i in 1..=len {
+ assert_eq!(
+ RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len + i),
+ char_idx + 1
+ );
+ }
+ }
+ }
}
diff --git a/helix-term/src/ui/document.rs b/helix-term/src/ui/document.rs
index b571b83c..bcbaa351 100644
--- a/helix-term/src/ui/document.rs
+++ b/helix-term/src/ui/document.rs
@@ -7,6 +7,7 @@ use helix_core::syntax::Highlight;
use helix_core::syntax::HighlightEvent;
use helix_core::text_annotations::TextAnnotations;
use helix_core::{visual_offset_from_block, Position, RopeSlice};
+use helix_stdx::rope::RopeSliceExt;
use helix_view::editor::{WhitespaceConfig, WhitespaceRenderValue};
use helix_view::graphics::Rect;
use helix_view::theme::Style;
@@ -32,14 +33,27 @@ impl<F: FnMut(&mut TextRenderer, LinePos)> LineDecoration for F {
}
}
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+enum StyleIterKind {
+ /// base highlights (usually emitted by TS), byte indices (potentially not codepoint aligned)
+ BaseHighlights,
+ /// overlay highlights (emitted by custom code from selections), char indices
+ Overlay,
+}
+
/// A wrapper around a HighlightIterator
/// that merges the layered highlights to create the final text style
/// and yields the active text style and the char_idx where the active
/// style will have to be recomputed.
+///
+/// TODO(ropey2): hopefully one day helix and ropey will operate entirely
+/// on byte ranges and we can remove this
struct StyleIter<'a, H: Iterator<Item = HighlightEvent>> {
text_style: Style,
active_highlights: Vec<Highlight>,
highlight_iter: H,
+ kind: StyleIterKind,
+ text: RopeSlice<'a>,
theme: &'a Theme,
}
@@ -54,7 +68,7 @@ impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> {
HighlightEvent::HighlightEnd => {
self.active_highlights.pop();
}
- HighlightEvent::Source { start, end } => {
+ HighlightEvent::Source { start, mut end } => {
if start == end {
continue;
}
@@ -64,6 +78,9 @@ impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> {
.fold(self.text_style, |acc, span| {
acc.patch(self.theme.highlight(span.0))
});
+ if self.kind == StyleIterKind::BaseHighlights {
+ end = self.text.byte_to_next_char(end);
+ }
return Some((style, end));
}
}
@@ -185,13 +202,17 @@ pub fn render_text<'t>(
text_style: renderer.text_style,
active_highlights: Vec::with_capacity(64),
highlight_iter: syntax_highlight_iter,
+ kind: StyleIterKind::BaseHighlights,
theme,
+ text,
};
let mut overlay_styles = StyleIter {
text_style: Style::default(),
active_highlights: Vec::with_capacity(64),
highlight_iter: overlay_highlight_iter,
+ kind: StyleIterKind::Overlay,
theme,
+ text,
};
let mut last_line_pos = LinePos {
diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs
index 2aac3d33..7b130a38 100644
--- a/helix-term/src/ui/editor.rs
+++ b/helix-term/src/ui/editor.rs
@@ -12,9 +12,7 @@ use crate::{
use helix_core::{
diagnostic::NumberOrString,
- graphemes::{
- ensure_grapheme_boundary_next_byte, next_grapheme_boundary, prev_grapheme_boundary,
- },
+ graphemes::{next_grapheme_boundary, prev_grapheme_boundary},
movement::Direction,
syntax::{self, HighlightEvent},
text_annotations::TextAnnotations,
@@ -315,26 +313,14 @@ impl EditorView {
let iter = syntax
// TODO: range doesn't actually restrict source, just highlight range
.highlight_iter(text.slice(..), Some(range), None)
- .map(|event| event.unwrap())
- .map(move |event| match event {
- // TODO: use byte slices directly
- // convert byte offsets to char offset
- HighlightEvent::Source { start, end } => {
- let start =
- text.byte_to_char(ensure_grapheme_boundary_next_byte(text, start));
- let end =
- text.byte_to_char(ensure_grapheme_boundary_next_byte(text, end));
- HighlightEvent::Source { start, end }
- }
- event => event,
- });
+ .map(|event| event.unwrap());
Box::new(iter)
}
None => Box::new(
[HighlightEvent::Source {
- start: text.byte_to_char(range.start),
- end: text.byte_to_char(range.end),
+ start: range.start,
+ end: range.end,
}]
.into_iter(),
),