Unnamed repository; edit this file 'description' to name the repository.
Fix prompt truncation for non-ASCII lines
The prompt was previously assuming that each grapheme cluster in the line was single-width and single-byte. Lines like the one in the new integration test would cause panics because the anchor attempted to slice into a character. This change rewrites the anchor and truncation code in the prompt to account for Unicode segmentation and width. Now multi-width graphemes can be hidden by multiple consecutive elipses - for example "十" is hidden by "……" (2-width). Co-authored-by: Narazaki, Shuji <[email protected]>
Michael Davis 8 months ago
parent 684e108 · commit d3fb8fc
-rw-r--r--helix-term/src/ui/prompt.rs66
-rw-r--r--helix-term/tests/test/commands.rs22
-rw-r--r--helix-tui/src/buffer.rs47
3 files changed, 97 insertions, 38 deletions
diff --git a/helix-term/src/ui/prompt.rs b/helix-term/src/ui/prompt.rs
index ee5c46e7..ff4ca1fc 100644
--- a/helix-term/src/ui/prompt.rs
+++ b/helix-term/src/ui/prompt.rs
@@ -12,7 +12,9 @@ use tui::text::Span;
use tui::widgets::{Block, Widget};
use helix_core::{
- unicode::segmentation::GraphemeCursor, unicode::width::UnicodeWidthStr, Position,
+ unicode::segmentation::{GraphemeCursor, UnicodeSegmentation},
+ unicode::width::UnicodeWidthStr,
+ Position,
};
use helix_view::{
graphics::{CursorKind, Margin, Rect},
@@ -535,21 +537,51 @@ impl Prompt {
.into();
text.render(self.line_area, surface, cx);
} else {
- if self.line.len() < self.line_area.width as usize {
+ let line_width = self.line_area.width as usize;
+
+ if self.line.width() < line_width {
self.anchor = 0;
- } else if self.cursor < self.anchor {
- self.anchor = self.cursor;
- } else if self.cursor - self.anchor > self.line_area.width as usize {
- self.anchor = self.cursor - self.line_area.width as usize;
+ } else if self.cursor <= self.anchor {
+ // Ensure the grapheme under the cursor is in view.
+ self.anchor = self.line[..self.cursor]
+ .grapheme_indices(true)
+ .next_back()
+ .map(|(i, _)| i)
+ .unwrap_or_default();
+ } else if self.line[self.anchor..self.cursor].width() > line_width {
+ // Set the anchor to the last grapheme cluster before the width is exceeded.
+ let mut width = 0;
+ self.anchor = self.line[..self.cursor]
+ .grapheme_indices(true)
+ .rev()
+ .find_map(|(idx, g)| {
+ width += g.width();
+ if width > line_width {
+ Some(idx + g.len())
+ } else {
+ None
+ }
+ })
+ .unwrap();
}
self.truncate_start = self.anchor > 0;
- self.truncate_end = self.line.len() - self.anchor > self.line_area.width as usize;
+ self.truncate_end = self.line[self.anchor..].width() > line_width;
// if we keep inserting characters just before the end elipsis, we move the anchor
// so that those new characters are displayed
- if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
- self.anchor += 1;
+ if self.truncate_end && self.line[self.anchor..self.cursor].width() >= line_width {
+ // Move the anchor forward by one non-zero-width grapheme.
+ self.anchor += self.line[self.anchor..]
+ .grapheme_indices(true)
+ .find_map(|(idx, g)| {
+ if g.width() > 0 {
+ Some(idx + g.len())
+ } else {
+ None
+ }
+ })
+ .unwrap();
}
surface.set_string_anchored(
@@ -558,7 +590,7 @@ impl Prompt {
self.truncate_start,
self.truncate_end,
&self.line.as_str()[self.anchor..],
- self.line_area.width as usize - self.truncate_end as usize,
+ line_width,
|_| prompt_color,
);
}
@@ -734,17 +766,21 @@ impl Component for Prompt {
.clip_left(self.prompt.len() as u16)
.clip_right(if self.prompt.is_empty() { 2 } else { 0 });
- let anchor = self.anchor.min(self.line.len().saturating_sub(1));
- let mut col = area.left() as usize
- + UnicodeWidthStr::width(&self.line[anchor..self.cursor.max(anchor)]);
+ let mut col = area.left() as usize + self.line[self.anchor..self.cursor].width();
// ensure the cursor does not go beyond elipses
- if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
+ if self.truncate_end
+ && self.line[self.anchor..self.cursor].width() >= self.line_area.width as usize
+ {
col -= 1;
}
if self.truncate_start && self.cursor == self.anchor {
- col += 1;
+ col += self.line[self.cursor..]
+ .graphemes(true)
+ .next()
+ .unwrap()
+ .width();
}
let line = area.height as usize - 1;
diff --git a/helix-term/tests/test/commands.rs b/helix-term/tests/test/commands.rs
index 29f76cfb..20e8ac9a 100644
--- a/helix-term/tests/test/commands.rs
+++ b/helix-term/tests/test/commands.rs
@@ -820,3 +820,25 @@ async fn macro_play_within_macro_record() -> anyhow::Result<()> {
Ok(())
}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn global_search_with_multibyte_chars() -> anyhow::Result<()> {
+ // Assert that `helix_term::commands::global_search` handles multibyte characters correctly.
+ test((
+ indoc! {"\
+ // Hello world!
+ // #[|
+ ]#
+ "},
+ // start global search
+ " /«十分に長い マルチバイトキャラクター列» で検索<ret><esc>",
+ indoc! {"\
+ // Hello world!
+ // #[|
+ ]#
+ "},
+ ))
+ .await?;
+
+ Ok(())
+}
diff --git a/helix-tui/src/buffer.rs b/helix-tui/src/buffer.rs
index bfcf35ac..2cd30324 100644
--- a/helix-tui/src/buffer.rs
+++ b/helix-tui/src/buffer.rs
@@ -326,43 +326,44 @@ impl Buffer {
return (x, y);
}
- let max_offset = min(
- self.area.right() as usize - 1,
- width.saturating_add(x as usize),
- );
- let mut start_index = self.index_of(x, y);
- let mut end_index = self.index_of(max_offset as u16, y);
-
- if truncate_end {
- self.content[end_index].set_symbol("…");
- end_index -= 1;
- }
+ let mut index = self.index_of(x, y);
+ let mut rendered_width = 0;
+ let mut graphemes = string.grapheme_indices(true);
if truncate_start {
- self.content[start_index].set_symbol("…");
- start_index += 1;
+ for _ in 0..graphemes.next().map(|(_, g)| g.width()).unwrap_or_default() {
+ self.content[index].set_symbol("…");
+ index += 1;
+ rendered_width += 1;
+ }
}
- let graphemes = string.grapheme_indices(true);
-
- for (byte_offset, s) in graphemes.skip(truncate_start as usize) {
- if start_index > end_index {
+ for (byte_offset, s) in graphemes {
+ let grapheme_width = s.width();
+ if truncate_end && rendered_width + grapheme_width >= width {
break;
}
- let width = s.width();
- if width == 0 {
+ if grapheme_width == 0 {
continue;
}
- self.content[start_index].set_symbol(s);
- self.content[start_index].set_style(style(byte_offset));
+ self.content[index].set_symbol(s);
+ self.content[index].set_style(style(byte_offset));
// Reset following cells if multi-width (they would be hidden by the grapheme):
- for i in start_index + 1..start_index + width {
+ for i in index + 1..index + grapheme_width {
self.content[i].reset();
}
- start_index += width;
+ index += grapheme_width;
+ rendered_width += grapheme_width;
+ }
+
+ if truncate_end {
+ for _ in 0..width.saturating_sub(rendered_width) {
+ self.content[index].set_symbol("…");
+ index += 1;
+ }
}
(x, y)