Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/doc_formatter.rs')
-rw-r--r--helix-core/src/doc_formatter.rs480
1 files changed, 0 insertions, 480 deletions
diff --git a/helix-core/src/doc_formatter.rs b/helix-core/src/doc_formatter.rs
deleted file mode 100644
index d7470942..00000000
--- a/helix-core/src/doc_formatter.rs
+++ /dev/null
@@ -1,480 +0,0 @@
-//! The `DocumentFormatter` forms the bridge between the raw document text
-//! and onscreen positioning. It yields the text graphemes as an iterator
-//! and traverses (part) of the document text. During that traversal it
-//! handles grapheme detection, softwrapping and annotations.
-//! It yields `FormattedGrapheme`s and their corresponding visual coordinates.
-//!
-//! As both virtual text and softwrapping can insert additional lines into the document
-//! it is generally not possible to find the start of the previous visual line.
-//! Instead the `DocumentFormatter` starts at the last "checkpoint" (usually a linebreak)
-//! called a "block" and the caller must advance it as needed.
-
-use std::borrow::Cow;
-use std::cmp::Ordering;
-use std::fmt::Debug;
-use std::mem::replace;
-
-#[cfg(test)]
-mod test;
-
-use unicode_segmentation::{Graphemes, UnicodeSegmentation};
-
-use helix_stdx::rope::{RopeGraphemes, RopeSliceExt};
-
-use crate::graphemes::{Grapheme, GraphemeStr};
-use crate::syntax::Highlight;
-use crate::text_annotations::TextAnnotations;
-use crate::{Position, RopeSlice};
-
-/// TODO make Highlight a u32 to reduce the size of this enum to a single word.
-#[derive(Debug, Clone, Copy)]
-pub enum GraphemeSource {
- Document {
- codepoints: u32,
- },
- /// Inline virtual text can not be highlighted with a `Highlight` iterator
- /// because it's not part of the document. Instead the `Highlight`
- /// is emitted right by the document formatter
- VirtualText {
- highlight: Option<Highlight>,
- },
-}
-
-impl GraphemeSource {
- /// Returns whether this grapheme is virtual inline text
- pub fn is_virtual(self) -> bool {
- matches!(self, GraphemeSource::VirtualText { .. })
- }
-
- pub fn is_eof(self) -> bool {
- // all doc chars except the EOF char have non-zero codepoints
- matches!(self, GraphemeSource::Document { codepoints: 0 })
- }
-
- pub fn doc_chars(self) -> usize {
- match self {
- GraphemeSource::Document { codepoints } => codepoints as usize,
- GraphemeSource::VirtualText { .. } => 0,
- }
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct FormattedGrapheme<'a> {
- pub raw: Grapheme<'a>,
- pub source: GraphemeSource,
- pub visual_pos: Position,
- /// Document line at the start of the grapheme
- pub line_idx: usize,
- /// Document char position at the start of the grapheme
- pub char_idx: usize,
-}
-
-impl FormattedGrapheme<'_> {
- pub fn is_virtual(&self) -> bool {
- self.source.is_virtual()
- }
-
- pub fn doc_chars(&self) -> usize {
- self.source.doc_chars()
- }
-
- pub fn is_whitespace(&self) -> bool {
- self.raw.is_whitespace()
- }
-
- pub fn width(&self) -> usize {
- self.raw.width()
- }
-
- pub fn is_word_boundary(&self) -> bool {
- self.raw.is_word_boundary()
- }
-}
-
-#[derive(Debug, Clone)]
-struct GraphemeWithSource<'a> {
- grapheme: Grapheme<'a>,
- source: GraphemeSource,
-}
-
-impl<'a> GraphemeWithSource<'a> {
- fn new(
- g: GraphemeStr<'a>,
- visual_x: usize,
- tab_width: u16,
- source: GraphemeSource,
- ) -> GraphemeWithSource<'a> {
- GraphemeWithSource {
- grapheme: Grapheme::new(g, visual_x, tab_width),
- source,
- }
- }
- fn placeholder() -> Self {
- GraphemeWithSource {
- grapheme: Grapheme::Other { g: " ".into() },
- source: GraphemeSource::Document { codepoints: 0 },
- }
- }
-
- fn doc_chars(&self) -> usize {
- self.source.doc_chars()
- }
-
- fn is_whitespace(&self) -> bool {
- self.grapheme.is_whitespace()
- }
-
- fn is_newline(&self) -> bool {
- matches!(self.grapheme, Grapheme::Newline)
- }
-
- fn is_eof(&self) -> bool {
- self.source.is_eof()
- }
-
- fn width(&self) -> usize {
- self.grapheme.width()
- }
-
- fn is_word_boundary(&self) -> bool {
- self.grapheme.is_word_boundary()
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct TextFormat {
- pub soft_wrap: bool,
- pub tab_width: u16,
- pub max_wrap: u16,
- pub max_indent_retain: u16,
- pub wrap_indicator: Box<str>,
- pub wrap_indicator_highlight: Option<Highlight>,
- pub viewport_width: u16,
- pub soft_wrap_at_text_width: bool,
-}
-
-// test implementation is basically only used for testing or when softwrap is always disabled
-impl Default for TextFormat {
- fn default() -> Self {
- TextFormat {
- soft_wrap: false,
- tab_width: 4,
- max_wrap: 3,
- max_indent_retain: 4,
- wrap_indicator: Box::from(" "),
- viewport_width: 17,
- wrap_indicator_highlight: None,
- soft_wrap_at_text_width: false,
- }
- }
-}
-
-#[derive(Debug)]
-pub struct DocumentFormatter<'t> {
- text_fmt: &'t TextFormat,
- annotations: &'t TextAnnotations<'t>,
-
- /// The visual position at the end of the last yielded word boundary
- visual_pos: Position,
- graphemes: RopeGraphemes<'t>,
- /// The character pos of the `graphemes` iter used for inserting annotations
- char_pos: usize,
- /// The line pos of the `graphemes` iter used for inserting annotations
- line_pos: usize,
- exhausted: bool,
-
- inline_annotation_graphemes: Option<(Graphemes<'t>, Option<Highlight>)>,
-
- // softwrap specific
- /// The indentation of the current line
- /// Is set to `None` if the indentation level is not yet known
- /// because no non-whitespace graphemes have been encountered yet
- indent_level: Option<usize>,
- /// In case a long word needs to be split a single grapheme might need to be wrapped
- /// while the rest of the word stays on the same line
- peeked_grapheme: Option<GraphemeWithSource<'t>>,
- /// A first-in first-out (fifo) buffer for the Graphemes of any given word
- word_buf: Vec<GraphemeWithSource<'t>>,
- /// The index of the next grapheme that will be yielded from the `word_buf`
- word_i: usize,
-}
-
-impl<'t> DocumentFormatter<'t> {
- /// Creates a new formatter at the last block before `char_idx`.
- /// A block is a chunk which always ends with a linebreak.
- /// This is usually just a normal line break.
- /// However very long lines are always wrapped at constant intervals that can be cheaply calculated
- /// to avoid pathological behaviour.
- pub fn new_at_prev_checkpoint(
- text: RopeSlice<'t>,
- text_fmt: &'t TextFormat,
- annotations: &'t TextAnnotations,
- char_idx: usize,
- ) -> Self {
- // TODO divide long lines into blocks to avoid bad performance for long lines
- let block_line_idx = text.char_to_line(char_idx.min(text.len_chars()));
- let block_char_idx = text.line_to_char(block_line_idx);
- annotations.reset_pos(block_char_idx);
-
- DocumentFormatter {
- text_fmt,
- annotations,
- visual_pos: Position { row: 0, col: 0 },
- graphemes: text.slice(block_char_idx..).graphemes(),
- char_pos: block_char_idx,
- exhausted: false,
- indent_level: None,
- peeked_grapheme: None,
- word_buf: Vec::with_capacity(64),
- word_i: 0,
- line_pos: block_line_idx,
- inline_annotation_graphemes: None,
- }
- }
-
- fn next_inline_annotation_grapheme(
- &mut self,
- char_pos: usize,
- ) -> Option<(&'t str, Option<Highlight>)> {
- loop {
- if let Some(&mut (ref mut annotation, highlight)) =
- self.inline_annotation_graphemes.as_mut()
- {
- if let Some(grapheme) = annotation.next() {
- return Some((grapheme, highlight));
- }
- }
-
- if let Some((annotation, highlight)) =
- self.annotations.next_inline_annotation_at(char_pos)
- {
- self.inline_annotation_graphemes = Some((
- UnicodeSegmentation::graphemes(&*annotation.text, true),
- highlight,
- ))
- } else {
- return None;
- }
- }
- }
-
- fn advance_grapheme(&mut self, col: usize, char_pos: usize) -> Option<GraphemeWithSource<'t>> {
- let (grapheme, source) =
- if let Some((grapheme, highlight)) = self.next_inline_annotation_grapheme(char_pos) {
- (grapheme.into(), GraphemeSource::VirtualText { highlight })
- } else if let Some(grapheme) = self.graphemes.next() {
- let codepoints = grapheme.len_chars() as u32;
-
- let overlay = self.annotations.overlay_at(char_pos);
- let grapheme = match overlay {
- Some((overlay, _)) => overlay.grapheme.as_str().into(),
- None => Cow::from(grapheme).into(),
- };
-
- (grapheme, GraphemeSource::Document { codepoints })
- } else {
- if self.exhausted {
- return None;
- }
- self.exhausted = true;
- // EOF grapheme is required for rendering
- // and correct position computations
- return Some(GraphemeWithSource {
- grapheme: Grapheme::Other { g: " ".into() },
- source: GraphemeSource::Document { codepoints: 0 },
- });
- };
-
- let grapheme = GraphemeWithSource::new(grapheme, col, self.text_fmt.tab_width, source);
-
- Some(grapheme)
- }
-
- /// Move a word to the next visual line
- fn wrap_word(&mut self) -> usize {
- // softwrap this word to the next line
- let indent_carry_over = if let Some(indent) = self.indent_level {
- if indent as u16 <= self.text_fmt.max_indent_retain {
- indent as u16
- } else {
- 0
- }
- } else {
- // ensure the indent stays 0
- self.indent_level = Some(0);
- 0
- };
-
- let virtual_lines =
- self.annotations
- .virtual_lines_at(self.char_pos, self.visual_pos, self.line_pos);
- self.visual_pos.col = indent_carry_over as usize;
- self.visual_pos.row += 1 + virtual_lines;
- let mut i = 0;
- let mut word_width = 0;
- let wrap_indicator = UnicodeSegmentation::graphemes(&*self.text_fmt.wrap_indicator, true)
- .map(|g| {
- i += 1;
- let grapheme = GraphemeWithSource::new(
- g.into(),
- self.visual_pos.col + word_width,
- self.text_fmt.tab_width,
- GraphemeSource::VirtualText {
- highlight: self.text_fmt.wrap_indicator_highlight,
- },
- );
- word_width += grapheme.width();
- grapheme
- });
- self.word_buf.splice(0..0, wrap_indicator);
-
- for grapheme in &mut self.word_buf[i..] {
- let visual_x = self.visual_pos.col + word_width;
- grapheme
- .grapheme
- .change_position(visual_x, self.text_fmt.tab_width);
- word_width += grapheme.width();
- }
- if let Some(grapheme) = &mut self.peeked_grapheme {
- let visual_x = self.visual_pos.col + word_width;
- grapheme
- .grapheme
- .change_position(visual_x, self.text_fmt.tab_width);
- }
- word_width
- }
-
- fn peek_grapheme(&mut self, col: usize, char_pos: usize) -> Option<&GraphemeWithSource<'t>> {
- if self.peeked_grapheme.is_none() {
- self.peeked_grapheme = self.advance_grapheme(col, char_pos);
- }
- self.peeked_grapheme.as_ref()
- }
-
- fn next_grapheme(&mut self, col: usize, char_pos: usize) -> Option<GraphemeWithSource<'t>> {
- self.peek_grapheme(col, char_pos);
- self.peeked_grapheme.take()
- }
-
- fn advance_to_next_word(&mut self) {
- self.word_buf.clear();
- let mut word_width = 0;
- let mut word_chars = 0;
-
- if self.exhausted {
- return;
- }
-
- loop {
- let mut col = self.visual_pos.col + word_width;
- let char_pos = self.char_pos + word_chars;
- match col.cmp(&(self.text_fmt.viewport_width as usize)) {
- // The EOF char and newline chars are always selectable in helix. That means
- // that wrapping happens "too-early" if a word fits a line perfectly. This
- // is intentional so that all selectable graphemes are always visible (and
- // therefore the cursor never disappears). However if the user manually set a
- // lower softwrap width then this is undesirable. Just increasing the viewport-
- // width by one doesn't work because if a line is wrapped multiple times then
- // some words may extend past the specified width.
- //
- // So we special case a word that ends exactly at line bounds and is followed
- // by a newline/eof character here.
- Ordering::Equal
- if self.text_fmt.soft_wrap_at_text_width
- && self
- .peek_grapheme(col, char_pos)
- .is_some_and(|grapheme| grapheme.is_newline() || grapheme.is_eof()) => {
- }
- Ordering::Equal if word_width > self.text_fmt.max_wrap as usize => return,
- Ordering::Greater if word_width > self.text_fmt.max_wrap as usize => {
- self.peeked_grapheme = self.word_buf.pop();
- return;
- }
- Ordering::Equal | Ordering::Greater => {
- word_width = self.wrap_word();
- col = self.visual_pos.col + word_width;
- }
- Ordering::Less => (),
- }
-
- let Some(grapheme) = self.next_grapheme(col, char_pos) else {
- return;
- };
- word_chars += grapheme.doc_chars();
-
- // Track indentation
- if !grapheme.is_whitespace() && self.indent_level.is_none() {
- self.indent_level = Some(self.visual_pos.col);
- } else if grapheme.grapheme == Grapheme::Newline {
- self.indent_level = None;
- }
-
- let is_word_boundary = grapheme.is_word_boundary();
- word_width += grapheme.width();
- self.word_buf.push(grapheme);
-
- if is_word_boundary {
- return;
- }
- }
- }
-
- /// returns the char index at the end of the last yielded grapheme
- pub fn next_char_pos(&self) -> usize {
- self.char_pos
- }
- /// returns the visual position at the end of the last yielded grapheme
- pub fn next_visual_pos(&self) -> Position {
- self.visual_pos
- }
-}
-
-impl<'t> Iterator for DocumentFormatter<'t> {
- type Item = FormattedGrapheme<'t>;
-
- fn next(&mut self) -> Option<Self::Item> {
- let grapheme = if self.text_fmt.soft_wrap {
- if self.word_i >= self.word_buf.len() {
- self.advance_to_next_word();
- self.word_i = 0;
- }
- let grapheme = replace(
- self.word_buf.get_mut(self.word_i)?,
- GraphemeWithSource::placeholder(),
- );
- self.word_i += 1;
- grapheme
- } else {
- self.advance_grapheme(self.visual_pos.col, self.char_pos)?
- };
-
- let grapheme = FormattedGrapheme {
- raw: grapheme.grapheme,
- source: grapheme.source,
- visual_pos: self.visual_pos,
- line_idx: self.line_pos,
- char_idx: self.char_pos,
- };
-
- self.char_pos += grapheme.doc_chars();
- if !grapheme.is_virtual() {
- self.annotations.process_virtual_text_anchors(&grapheme);
- }
- if grapheme.raw == Grapheme::Newline {
- // move to end of newline char
- self.visual_pos.col += 1;
- let virtual_lines =
- self.annotations
- .virtual_lines_at(self.char_pos, self.visual_pos, self.line_pos);
- self.visual_pos.row += 1 + virtual_lines;
- self.visual_pos.col = 0;
- if !grapheme.is_virtual() {
- self.line_pos += 1;
- }
- } else {
- self.visual_pos.col += grapheme.width();
- }
- Some(grapheme)
- }
-}