helix

diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs
index 74e25ac9..746f201a 100644
--- a/helix-core/src/auto_pairs.rs
+++ b/helix-core/src/auto_pairs.rs

@@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[

('`', '`'),

];

-const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline

+const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines

// insert hook:

// Fn(doc, selection, char) => Option<Transaction>

diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
index 243a1374..24133dd3 100644
--- a/helix-core/src/chars.rs
+++ b/helix-core/src/chars.rs

@@ -1,25 +1,44 @@

-/// Determine whether a character is a line break.

-pub fn char_is_linebreak(c: char) -> bool {

- matches!(

- c,

- '\u{000A}' | // LineFeed

- '\u{000B}' | // VerticalTab

- '\u{000C}' | // FormFeed

- '\u{000D}' | // CarriageReturn

- '\u{0085}' | // NextLine

- '\u{2028}' | // Line Separator

- '\u{2029}' // ParagraphSeparator

- )

+use crate::LineEnding;

+#[derive(Debug, Eq, PartialEq)]

+pub enum CharCategory {

+ Whitespace,

+ Eol,

+ Word,

+ Punctuation,

+ Unknown,

+#[inline]

+pub fn categorize_char(ch: char) -> CharCategory {

+ if char_is_line_ending(ch) {

+ CharCategory::Eol

+ } else if ch.is_whitespace() {

+ CharCategory::Whitespace

+ } else if char_is_word(ch) {

+ CharCategory::Word

+ } else if char_is_punctuation(ch) {

+ CharCategory::Punctuation

+ } else {

+ CharCategory::Unknown

+ }

+/// Determine whether a character is a line ending.

+#[inline]

+pub fn char_is_line_ending(ch: char) -> bool {

+ LineEnding::from_char(ch).is_some()

}

/// Determine whether a character qualifies as (non-line-break)

/// whitespace.

-pub fn char_is_whitespace(c: char) -> bool {

+#[inline]

+pub fn char_is_whitespace(ch: char) -> bool {

// TODO: this is a naive binary categorization of whitespace

// characters. For display, word wrapping, etc. we'll need a better

// categorization based on e.g. breaking vs non-breaking spaces

// and whether they're zero-width or not.

- match c {

+ match ch {

//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)

'\u{0009}' | // Character Tabulation

'\u{0020}' | // Space

@@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {

// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,

// Four-per-em Space, Six-per-em Space, Figure Space,

// Punctuation Space, Thin Space, Hair Space, Zero Width Space.

- c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,

+ ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,

_ => false,

}

+#[inline]

+pub fn char_is_punctuation(ch: char) -> bool {

+ use unicode_general_category::{get_general_category, GeneralCategory};

+ matches!(

+ get_general_category(ch),

+ GeneralCategory::OtherPunctuation

+ | GeneralCategory::OpenPunctuation

+ | GeneralCategory::ClosePunctuation

+ | GeneralCategory::InitialPunctuation

+ | GeneralCategory::FinalPunctuation

+ | GeneralCategory::ConnectorPunctuation

+ | GeneralCategory::DashPunctuation

+ | GeneralCategory::MathSymbol

+ | GeneralCategory::CurrencySymbol

+ | GeneralCategory::ModifierSymbol

+ )

+#[inline]

+pub fn char_is_word(ch: char) -> bool {

+ ch.is_alphanumeric() || ch == '_'

+#[cfg(test)]

+mod test {

+ use super::*;

+ #[test]

+ fn test_categorize() {

+ const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";

+ const WORD_TEST_CASE: &'static str =

+ "_hello_world_あいうえおー1234567890１２３４５６７８９０";

+ const PUNCTUATION_TEST_CASE: &'static str =

+ "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~！”＃＄％＆’（）＊＋、。：；＜＝＞？＠「」＾｀｛｜｝～";

+ const WHITESPACE_TEST_CASE: &'static str = "  　  ";

+ for ch in EOL_TEST_CASE.chars() {

+ assert_eq!(CharCategory::Eol, categorize_char(ch));

+ }

+ for ch in WHITESPACE_TEST_CASE.chars() {

+ assert_eq!(

+ CharCategory::Whitespace,

+ categorize_char(ch),

+ "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",

+ ch,

+ categorize_char(ch)

+ );

+ }

+ for ch in WORD_TEST_CASE.chars() {

+ assert_eq!(

+ CharCategory::Word,

+ categorize_char(ch),

+ "Testing '{}', but got `{:?}` instead of `Category::Word`",

+ ch,

+ categorize_char(ch)

+ );

+ }

+ for ch in PUNCTUATION_TEST_CASE.chars() {

+ assert_eq!(

+ CharCategory::Punctuation,

+ categorize_char(ch),

+ "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",

+ ch,

+ categorize_char(ch)

+ );

+ }

diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index e00e56be..183b9f0a 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs

@@ -113,6 +113,6 @@ pub use diagnostic::Diagnostic;

pub use state::State;

pub use line_ending::{

- auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING,

+ auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING,

};

pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};

diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
index 45e20c88..c4636c63 100644
--- a/helix-core/src/line_ending.rs
+++ b/helix-core/src/line_ending.rs

@@ -1,5 +1,10 @@

use crate::{Rope, RopeGraphemes, RopeSlice};

+#[cfg(target_os = "windows")]

+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;

+#[cfg(not(target_os = "windows"))]

+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;

/// Represents one of the valid Unicode line endings.

#[derive(PartialEq, Copy, Clone, Debug)]

pub enum LineEnding {

@@ -14,6 +19,7 @@ pub enum LineEnding {

}

impl LineEnding {

+ #[inline]

pub fn len_chars(&self) -> usize {

match self {

Self::Crlf => 2,

@@ -21,6 +27,7 @@ impl LineEnding {

}

+ #[inline]

pub fn as_str(&self) -> &'static str {

match self {

Self::Crlf => "\u{000D}\u{000A}",

@@ -34,6 +41,22 @@ impl LineEnding {

}

+ #[inline]

+ pub fn from_char(ch: char) -> Option<LineEnding> {

+ match ch {

+ '\u{000A}' => Some(LineEnding::LF),

+ '\u{000B}' => Some(LineEnding::VT),

+ '\u{000C}' => Some(LineEnding::FF),

+ '\u{000D}' => Some(LineEnding::CR),

+ '\u{0085}' => Some(LineEnding::Nel),

+ '\u{2028}' => Some(LineEnding::LS),

+ '\u{2029}' => Some(LineEnding::PS),

+ // Not a line ending

+ _ => None,

+ }

+ #[inline]

pub fn from_str(g: &str) -> Option<LineEnding> {

match g {

"\u{000D}\u{000A}" => Some(LineEnding::Crlf),

@@ -49,6 +72,7 @@ impl LineEnding {

}

+ #[inline]

pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {

if let Some(text) = g.as_str() {

LineEnding::from_str(text)

@@ -62,6 +86,11 @@ impl LineEnding {

}

+#[inline]

+pub fn str_is_line_ending(s: &str) -> bool {

+ LineEnding::from_str(s).is_some()

/// Attempts to detect what line ending the passed document uses.

pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {

// Return first matched line ending. Not all possible line endings

@@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {

}

/// Returns the char index of the end of the given line, not including its line ending.

-pub fn line_end(slice: &RopeSlice, line: usize) -> usize {

- slice.line_to_char(line + 1).saturating_sub(

- get_line_ending(&slice.line(line))

+pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {

+ slice.line_to_char(line + 1)

+ - get_line_ending(&slice.line(line))

.map(|le| le.len_chars())

- .unwrap_or(0),

- )

+ .unwrap_or(0)

}

-#[cfg(target_os = "windows")]

-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;

-#[cfg(not(target_os = "windows"))]

-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;

#[cfg(test)]

mod line_ending_tests {

use super::*;

@@ -150,11 +173,11 @@ mod line_ending_tests {

fn test_rope_slice_to_line_ending() {

let r = Rope::from_str("\r\n");

assert_eq!(

- rope_slice_to_line_ending(&r.slice(1..2)),

+ LineEnding::from_rope_slice(&r.slice(1..2)),

Some(LineEnding::LF)

);

assert_eq!(

- rope_slice_to_line_ending(&r.slice(0..2)),

+ LineEnding::from_rope_slice(&r.slice(0..2)),

Some(LineEnding::Crlf)

);

}

diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 7f47e662..d0023e9f 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs

@@ -3,9 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile};

use ropey::iter::Chars;

use crate::{

+ chars::{

+ categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,

+ char_is_word, CharCategory,

+ },

coords_at_pos, get_line_ending,

graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},

- line_end, pos_at_coords, Position, Range, RopeSlice,

+ line_end_char_index, pos_at_coords, Position, Range, RopeSlice,

};

#[derive(Debug, Copy, Clone, PartialEq, Eq)]

@@ -37,9 +41,8 @@ pub fn move_horizontally(

nth_prev_grapheme_boundary(slice, pos, count).max(start)

}

Direction::Forward => {

- // Line end is pos at the start of next line - 1

- let end = line_end(&slice, line);

- nth_next_grapheme_boundary(slice, pos, count).min(end)

+ let end_char_idx = line_end_char_index(&slice, line);

+ nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)

}

};

let anchor = match behaviour {

@@ -68,8 +71,11 @@ pub fn move_vertically(

};

- // convert to 0-indexed, subtract another 1 because len_chars() counts \n

- let new_line_len = slice.line(new_line).len_chars().saturating_sub(2);

+ // Length of the line sans line-ending.

+ let new_line_len = {

+ let line = slice.line(new_line);

+ line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)

+ };

let new_col = std::cmp::min(horiz as usize, new_line_len);

@@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio

}

// ---- util ------------

-#[inline]

-pub(crate) fn is_word(ch: char) -> bool {

- ch.is_alphanumeric() || ch == '_'

-#[inline]

-pub(crate) fn is_end_of_line(ch: char) -> bool {

- ch == '\n'

-#[inline]

-// Whitespace, but not end of line

-pub(crate) fn is_strict_whitespace(ch: char) -> bool {

- ch.is_whitespace() && !is_end_of_line(ch)

-#[inline]

-pub(crate) fn is_punctuation(ch: char) -> bool {

- use unicode_general_category::{get_general_category, GeneralCategory};

- matches!(

- get_general_category(ch),

- GeneralCategory::OtherPunctuation

- | GeneralCategory::OpenPunctuation

- | GeneralCategory::ClosePunctuation

- | GeneralCategory::InitialPunctuation

- | GeneralCategory::FinalPunctuation

- | GeneralCategory::ConnectorPunctuation

- | GeneralCategory::DashPunctuation

- | GeneralCategory::MathSymbol

- | GeneralCategory::CurrencySymbol

- | GeneralCategory::ModifierSymbol

- )

-#[derive(Debug, Eq, PartialEq)]

-pub enum Category {

- Whitespace,

- Eol,

- Word,

- Punctuation,

- Unknown,

-#[inline]

-pub(crate) fn categorize(ch: char) -> Category {

- if is_end_of_line(ch) {

- Category::Eol

- } else if ch.is_whitespace() {

- Category::Whitespace

- } else if is_word(ch) {

- Category::Word

- } else if is_punctuation(ch) {

- Category::Punctuation

- } else {

- Category::Unknown

- }

#[inline]

/// Returns first index that doesn't satisfy a given predicate when

@@ -235,7 +183,8 @@ impl CharHelpers for Chars<'_> {

let mut phase = WordMotionPhase::Start;

let mut head = origin.head;

let mut anchor: Option<usize> = None;

- let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a));

+ let is_boundary =

+ |a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));

while let Some(peek) = characters.peek().copied() {

phase = match phase {

WordMotionPhase::Start => {

@@ -244,7 +193,8 @@ impl CharHelpers for Chars<'_> {

break; // We're at the end, so there's nothing to do.

}

// Anchor may remain here if the head wasn't at a boundary

- if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) {

+ if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)

+ {

anchor = Some(head);

}

// First character is always skipped by the head

@@ -252,7 +202,7 @@ impl CharHelpers for Chars<'_> {

WordMotionPhase::SkipNewlines

}

WordMotionPhase::SkipNewlines => {

- if is_end_of_line(peek) {

+ if char_is_line_ending(peek) {

characters.next();

if characters.peek().is_some() {

advance(&mut head);

@@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>

match target {

WordMotionTarget::NextWordStart => {

- ((categorize(peek) != categorize(*next_peek))

- && (is_end_of_line(*next_peek) || !next_peek.is_whitespace()))

+ ((categorize_char(peek) != categorize_char(*next_peek))

+ && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))

}

WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {

- ((categorize(peek) != categorize(*next_peek))

- && (!peek.is_whitespace() || is_end_of_line(*next_peek)))

+ ((categorize_char(peek) != categorize_char(*next_peek))

+ && (!peek.is_whitespace() || char_is_line_ending(*next_peek)))

}

@@ -330,7 +280,7 @@ mod test {

slice,

move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head

- (1, 2).into()

+ (1, 3).into()

);

}

@@ -343,12 +293,12 @@ mod test {

let mut range = Range::point(position);

let moves_and_expected_coordinates = [

- ((Direction::Forward, 1usize), (0, 1)),

- ((Direction::Forward, 2usize), (0, 3)),

- ((Direction::Forward, 0usize), (0, 3)),

- ((Direction::Forward, 999usize), (0, 31)),

- ((Direction::Backward, 999usize), (0, 0)),

+ ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line

+ ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line

+ ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line

+ ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|

+ ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line

];

for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {

@@ -366,15 +316,15 @@ mod test {

let mut range = Range::point(position);

let moves_and_expected_coordinates = IntoIter::new([

- ((Direction::Forward, 1usize), (0, 1)), // M_ltiline

- ((Direction::Forward, 2usize), (0, 3)), // Mul_iline

- ((Direction::Backward, 6usize), (0, 0)), // _ultiline

- ((Direction::Backward, 999usize), (0, 0)), // _ultiline

- ((Direction::Forward, 3usize), (0, 3)), // Mul_iline

- ((Direction::Forward, 0usize), (0, 3)), // Mul_iline

- ((Direction::Backward, 0usize), (0, 3)), // Mul_iline

- ((Direction::Forward, 999usize), (0, 9)), // Multilin_

+ ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n

+ ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n

+ ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n

+ ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n

+ ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n

+ ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n

+ ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n

+ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n

]);

for ((direction, amount), coordinates) in moves_and_expected_coordinates {

@@ -446,7 +396,7 @@ mod test {

// First descent preserves column as the target line is wider

((Axis::V, Direction::Forward, 1usize), (1, 8)),

// Second descent clamps column as the target line is shorter

- ((Axis::V, Direction::Forward, 1usize), (2, 4)),

+ ((Axis::V, Direction::Forward, 1usize), (2, 5)),

// Third descent restores the original column

((Axis::V, Direction::Forward, 1usize), (3, 8)),

// Behaviour is preserved even through long jumps

@@ -760,45 +710,4 @@ mod test {

}

- #[test]

- fn test_categorize() {

- const WORD_TEST_CASE: &'static str =

- "_hello_world_あいうえおー1234567890１２３４５６７８９０";

- const PUNCTUATION_TEST_CASE: &'static str =

- "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~！”＃＄％＆’（）＊＋、。：；＜＝＞？＠「」＾｀｛｜｝～";

- const WHITESPACE_TEST_CASE: &'static str = "  　  ";

- assert_eq!(Category::Eol, categorize('\n'));

- for ch in WHITESPACE_TEST_CASE.chars() {

- assert_eq!(

- Category::Whitespace,

- categorize(ch),

- "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",

- ch,

- categorize(ch)

- );

- }

- for ch in WORD_TEST_CASE.chars() {

- assert_eq!(

- Category::Word,

- categorize(ch),

- "Testing '{}', but got `{:?}` instead of `Category::Word`",

- ch,

- categorize(ch)

- );

- }

- for ch in PUNCTUATION_TEST_CASE.chars() {

- assert_eq!(

- Category::Punctuation,

- categorize(ch),

- "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",

- ch,

- categorize(ch)

- );

- }

}

diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index 3d85ff2f..392eee9c 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs

@@ -1,4 +1,5 @@

use crate::{

+ chars::char_is_line_ending,

graphemes::{nth_next_grapheme_boundary, RopeGraphemes},

Rope, RopeSlice,

};

@@ -23,8 +24,9 @@ impl Position {

pub fn traverse(self, text: &crate::Tendril) -> Self {

let Self { mut row, mut col } = self;

// TODO: there should be a better way here

- for ch in text.chars() {

- if ch == '\n' {

+ let mut chars = text.chars().peekable();

+ while let Some(ch) = chars.next() {

+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {

row += 1;

col = 0;

} else {

diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index ae058eb1..92e52d73 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs

@@ -1,4 +1,4 @@

-use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction};

+use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};

pub use helix_syntax::{get_language, get_language_name, Lang};

use std::{

@@ -579,9 +579,10 @@ impl LanguageLayer {

mut column,

} = point;

- // TODO: there should be a better way here

- for ch in text.bytes() {

- if ch == b'\n' {

+ // TODO: there should be a better way here.

+ let mut chars = text.chars().peekable();

+ while let Some(ch) = chars.next() {

+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {

row += 1;

column = 0;

} else {

diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs
index 101d2f9b..7f136fe8 100644
--- a/helix-lsp/src/client.rs
+++ b/helix-lsp/src/client.rs

@@ -3,7 +3,7 @@ use crate::{

Call, Error, OffsetEncoding, Result,

};

-use helix_core::{find_root, ChangeSet, Rope};

+use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};

use jsonrpc_core as jsonrpc;

use lsp_types as lsp;

use serde_json::Value;

@@ -337,8 +337,9 @@ impl Client {

mut character,

} = pos;

- for ch in text.chars() {

- if ch == '\n' {

+ let mut chars = text.chars().peekable();

+ while let Some(ch) = chars.next() {

+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {

line += 1;

character = 0;

} else {

diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 8124c17a..b006504b 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs

@@ -1,6 +1,6 @@

use helix_core::{

comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,

- indent, line_end, match_brackets,

+ indent, line_end_char_index, match_brackets,

movement::{self, Direction},

object, pos_at_coords,

regex::{self, Regex},

@@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) {

let text = doc.text();

let line = text.char_to_line(range.head);

- let pos = line_end(&text.slice(..), line);

+ let pos = line_end_char_index(&text.slice(..), line);

Range::new(pos, pos)

});

@@ -490,6 +490,8 @@ where

let count = cx.count();

// need to wait for next key

+ // TODO: should this be done by grapheme rather than char? For example,

+ // we can't properly handle the line-ending case here in terms of char.

cx.on_next_key(move |cx, event| {

let ch = match event {

KeyEvent {

@@ -623,7 +625,7 @@ fn replace(cx: &mut Context) {

KeyEvent {

code: KeyCode::Enter,

- } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING

+ } => Some('\n'), // TODO: use the document's default line ending.

_ => None,

};

@@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) {

let text = doc.text();

let line = text.char_to_line(range.head);

- let pos = line_end(&text.slice(..), line);

+ let pos = line_end_char_index(&text.slice(..), line);

Range::new(range.anchor, pos)

});

@@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) {

let selection = doc.selection(view.id).transform(|range| {

let text = doc.text();

let line = text.char_to_line(range.head);

- let pos = line_end(&text.slice(..), line);

+ let pos = line_end_char_index(&text.slice(..), line);

Range::new(pos, pos)

});

doc.set_selection(view.id, selection);

diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index be113747..3ce3a5b8 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs

@@ -110,6 +110,8 @@ fn parse<'a>(contents: &'a str, theme: Option<&Theme>) -> tui::text::Text<'a> {

// TODO: replace tabs with indentation

let mut slice = &text[start..end];

+ // TODO: do we need to handle all unicode line endings

+ // here, or is just '\n' okay?

while let Some(end) = slice.find('\n') {

// emit span up to newline

let text = &slice[..end];

diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml
index 89fa755d..30e2374d 100644
--- a/helix-tui/Cargo.toml
+++ b/helix-tui/Cargo.toml

@@ -22,3 +22,4 @@ unicode-segmentation = "1.2"

unicode-width = "0.1"

crossterm = { version = "0.20", optional = true }

serde = { version = "1", "optional" = true, features = ["derive"]}

+helix-core = { version = "0.2", path = "../helix-core" }

diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs
index c671e918..b23bfd81 100644
--- a/helix-tui/src/text.rs
+++ b/helix-tui/src/text.rs

@@ -47,6 +47,7 @@

//! ]);

//! ```

use crate::style::Style;

+use helix_core::line_ending::str_is_line_ending;

use std::borrow::Cow;

use unicode_segmentation::UnicodeSegmentation;

use unicode_width::UnicodeWidthStr;

@@ -177,7 +178,7 @@ impl<'a> Span<'a> {

symbol: g,

style: base_style.patch(self.style),

})

- .filter(|s| s.symbol != "\n")

+ .filter(|s| !str_is_line_ending(s.symbol))

}

diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs
index 94ff7330..ae561a4f 100644
--- a/helix-tui/src/widgets/reflow.rs
+++ b/helix-tui/src/widgets/reflow.rs

@@ -1,4 +1,5 @@

use crate::text::StyledGrapheme;

+use helix_core::line_ending::str_is_line_ending;

use unicode_segmentation::UnicodeSegmentation;

use unicode_width::UnicodeWidthStr;

@@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {

// Ignore characters wider that the total max width.

if symbol.width() as u16 > self.max_line_width

// Skip leading whitespace when trim is enabled.

- || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0

+ || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0

{

continue;

}

// Break on newline and discard it.

- if symbol == "\n" {

+ if str_is_line_ending(symbol) {

if prev_whitespace {

current_line_width = width_to_last_word_end;

self.current_line.truncate(symbols_to_last_word_end);

@@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {

}

// Break on newline and discard it.

- if symbol == "\n" {

+ if str_is_line_ending(symbol) {

break;

}

@@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {

if skip_rest {

for StyledGrapheme { symbol, .. } in &mut self.symbols {

- if symbol == "\n" {

+ if str_is_line_ending(symbol) {

break;

}

diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 80be1ed2..3e38c24d 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs

@@ -10,7 +10,7 @@ use std::sync::Arc;

use helix_core::{

auto_detect_line_ending,

- chars::{char_is_linebreak, char_is_whitespace},

+ chars::{char_is_line_ending, char_is_whitespace},

history::History,

syntax::{LanguageConfiguration, LOADER},

ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,

@@ -81,6 +81,9 @@ pub struct Document {

/// Current indent style.

pub indent_style: IndentStyle,

+ /// The document's default line ending.

+ pub line_ending: LineEnding,

syntax: Option<Syntax>,

// /// Corresponding language scope name. Usually `source.<lang>`.

pub(crate) language: Option<Arc<LanguageConfiguration>>,

@@ -99,7 +102,6 @@ pub struct Document {

diagnostics: Vec<Diagnostic>,

language_server: Option<Arc<helix_lsp::Client>>,

- line_ending: LineEnding,

}

use std::fmt;

@@ -254,21 +256,21 @@ impl Document {

pub fn load(path: PathBuf) -> Result<Self, Error> {

use std::{fs::File, io::BufReader};

- let doc = if !path.exists() {

+ let mut doc = if !path.exists() {

Rope::from(DEFAULT_LINE_ENDING.as_str())

} else {

let file = File::open(&path).context(format!("unable to open {:?}", path))?;

- let mut doc = Rope::from_reader(BufReader::new(file))?;

- // add missing newline at the end of file

- if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {

- doc.insert_char(doc.len_chars(), '\n');

- }

- doc

+ Rope::from_reader(BufReader::new(file))?

};

// search for line endings

let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);

+ // add missing newline at the end of file

+ if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {

+ doc.insert(doc.len_chars(), line_ending.as_str());

+ }

let mut doc = Self::new(doc);

// set the path and try detecting the language

doc.set_path(&path)?;

@@ -379,7 +381,7 @@ impl Document {

Some(' ') => false,

// Ignore blank lines.

- Some(c) if char_is_linebreak(c) => continue,

+ Some(c) if char_is_line_ending(c) => continue,

_ => {

prev_line_is_tabs = false;

@@ -403,7 +405,7 @@ impl Document {

c if char_is_whitespace(c) => count_is_done = true,

// Ignore blank lines.

- c if char_is_linebreak(c) => continue 'outer,

+ c if char_is_line_ending(c) => continue 'outer,

_ => break,

}

diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs
index db8ae87a..fb2eb36d 100644
--- a/helix-view/src/editor.rs
+++ b/helix-view/src/editor.rs

@@ -12,7 +12,7 @@ use anyhow::Error;

pub use helix_core::diagnostic::Severity;

pub use helix_core::register::Registers;

-use helix_core::Position;

+use helix_core::{Position, DEFAULT_LINE_ENDING};

#[derive(Debug)]

pub struct Editor {

@@ -150,7 +150,7 @@ impl Editor {

pub fn new_file(&mut self, action: Action) -> DocumentId {

use helix_core::Rope;

- let doc = Document::new(Rope::from("\n"));

+ let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));

let id = self.documents.insert(doc);

self.documents[id].id = id;

self.switch(id, action);