Unnamed repository; edit this file 'description' to name the repository.
Work on moving code over to LineEnding instead of assuming '\n'.
Also some general cleanup and some minor fixes along the way.
Nathan Vegdahl 2021-06-21
parent 5d22e3c · commit 4efd671
-rw-r--r--Cargo.lock1
-rw-r--r--helix-core/src/auto_pairs.rs2
-rw-r--r--helix-core/src/chars.rs122
-rw-r--r--helix-core/src/lib.rs2
-rw-r--r--helix-core/src/line_ending.rs47
-rw-r--r--helix-core/src/movement.rs167
-rw-r--r--helix-core/src/position.rs6
-rw-r--r--helix-core/src/syntax.rs9
-rw-r--r--helix-lsp/src/client.rs7
-rw-r--r--helix-term/src/commands.rs12
-rw-r--r--helix-term/src/ui/markdown.rs2
-rw-r--r--helix-tui/Cargo.toml1
-rw-r--r--helix-tui/src/text.rs3
-rw-r--r--helix-tui/src/widgets/reflow.rs9
-rw-r--r--helix-view/src/document.rs24
-rw-r--r--helix-view/src/editor.rs4
16 files changed, 228 insertions, 190 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 24c277e1..a1de7138 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -331,6 +331,7 @@ dependencies = [
"bitflags",
"cassowary",
"crossterm",
+ "helix-core",
"serde",
"unicode-segmentation",
"unicode-width",
diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs
index 74e25ac9..746f201a 100644
--- a/helix-core/src/auto_pairs.rs
+++ b/helix-core/src/auto_pairs.rs
@@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[
('`', '`'),
];
-const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline
+const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines
// insert hook:
// Fn(doc, selection, char) => Option<Transaction>
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
index 243a1374..24133dd3 100644
--- a/helix-core/src/chars.rs
+++ b/helix-core/src/chars.rs
@@ -1,25 +1,44 @@
-/// Determine whether a character is a line break.
-pub fn char_is_linebreak(c: char) -> bool {
- matches!(
- c,
- '\u{000A}' | // LineFeed
- '\u{000B}' | // VerticalTab
- '\u{000C}' | // FormFeed
- '\u{000D}' | // CarriageReturn
- '\u{0085}' | // NextLine
- '\u{2028}' | // Line Separator
- '\u{2029}' // ParagraphSeparator
- )
+use crate::LineEnding;
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum CharCategory {
+ Whitespace,
+ Eol,
+ Word,
+ Punctuation,
+ Unknown,
+}
+
+#[inline]
+pub fn categorize_char(ch: char) -> CharCategory {
+ if char_is_line_ending(ch) {
+ CharCategory::Eol
+ } else if ch.is_whitespace() {
+ CharCategory::Whitespace
+ } else if char_is_word(ch) {
+ CharCategory::Word
+ } else if char_is_punctuation(ch) {
+ CharCategory::Punctuation
+ } else {
+ CharCategory::Unknown
+ }
+}
+
+/// Determine whether a character is a line ending.
+#[inline]
+pub fn char_is_line_ending(ch: char) -> bool {
+ LineEnding::from_char(ch).is_some()
}
/// Determine whether a character qualifies as (non-line-break)
/// whitespace.
-pub fn char_is_whitespace(c: char) -> bool {
+#[inline]
+pub fn char_is_whitespace(ch: char) -> bool {
// TODO: this is a naive binary categorization of whitespace
// characters. For display, word wrapping, etc. we'll need a better
// categorization based on e.g. breaking vs non-breaking spaces
// and whether they're zero-width or not.
- match c {
+ match ch {
//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
'\u{0009}' | // Character Tabulation
'\u{0020}' | // Space
@@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool {
// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
// Four-per-em Space, Six-per-em Space, Figure Space,
// Punctuation Space, Thin Space, Hair Space, Zero Width Space.
- c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+ ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true,
_ => false,
}
}
+
+#[inline]
+pub fn char_is_punctuation(ch: char) -> bool {
+ use unicode_general_category::{get_general_category, GeneralCategory};
+
+ matches!(
+ get_general_category(ch),
+ GeneralCategory::OtherPunctuation
+ | GeneralCategory::OpenPunctuation
+ | GeneralCategory::ClosePunctuation
+ | GeneralCategory::InitialPunctuation
+ | GeneralCategory::FinalPunctuation
+ | GeneralCategory::ConnectorPunctuation
+ | GeneralCategory::DashPunctuation
+ | GeneralCategory::MathSymbol
+ | GeneralCategory::CurrencySymbol
+ | GeneralCategory::ModifierSymbol
+ )
+}
+
+#[inline]
+pub fn char_is_word(ch: char) -> bool {
+ ch.is_alphanumeric() || ch == '_'
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_categorize() {
+ const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}";
+ const WORD_TEST_CASE: &'static str =
+ "_hello_world_あいうえおー12345678901234567890";
+ const PUNCTUATION_TEST_CASE: &'static str =
+ "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
+ const WHITESPACE_TEST_CASE: &'static str = "      ";
+
+ for ch in EOL_TEST_CASE.chars() {
+ assert_eq!(CharCategory::Eol, categorize_char(ch));
+ }
+
+ for ch in WHITESPACE_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Whitespace,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+
+ for ch in WORD_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Word,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Word`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+
+ for ch in PUNCTUATION_TEST_CASE.chars() {
+ assert_eq!(
+ CharCategory::Punctuation,
+ categorize_char(ch),
+ "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
+ ch,
+ categorize_char(ch)
+ );
+ }
+ }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
index e00e56be..183b9f0a 100644
--- a/helix-core/src/lib.rs
+++ b/helix-core/src/lib.rs
@@ -113,6 +113,6 @@ pub use diagnostic::Diagnostic;
pub use state::State;
pub use line_ending::{
- auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING,
+ auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING,
};
pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction};
diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs
index 45e20c88..c4636c63 100644
--- a/helix-core/src/line_ending.rs
+++ b/helix-core/src/line_ending.rs
@@ -1,5 +1,10 @@
use crate::{Rope, RopeGraphemes, RopeSlice};
+#[cfg(target_os = "windows")]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
+#[cfg(not(target_os = "windows"))]
+pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
+
/// Represents one of the valid Unicode line endings.
#[derive(PartialEq, Copy, Clone, Debug)]
pub enum LineEnding {
@@ -14,6 +19,7 @@ pub enum LineEnding {
}
impl LineEnding {
+ #[inline]
pub fn len_chars(&self) -> usize {
match self {
Self::Crlf => 2,
@@ -21,6 +27,7 @@ impl LineEnding {
}
}
+ #[inline]
pub fn as_str(&self) -> &'static str {
match self {
Self::Crlf => "\u{000D}\u{000A}",
@@ -34,6 +41,22 @@ impl LineEnding {
}
}
+ #[inline]
+ pub fn from_char(ch: char) -> Option<LineEnding> {
+ match ch {
+ '\u{000A}' => Some(LineEnding::LF),
+ '\u{000B}' => Some(LineEnding::VT),
+ '\u{000C}' => Some(LineEnding::FF),
+ '\u{000D}' => Some(LineEnding::CR),
+ '\u{0085}' => Some(LineEnding::Nel),
+ '\u{2028}' => Some(LineEnding::LS),
+ '\u{2029}' => Some(LineEnding::PS),
+ // Not a line ending
+ _ => None,
+ }
+ }
+
+ #[inline]
pub fn from_str(g: &str) -> Option<LineEnding> {
match g {
"\u{000D}\u{000A}" => Some(LineEnding::Crlf),
@@ -49,6 +72,7 @@ impl LineEnding {
}
}
+ #[inline]
pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
if let Some(text) = g.as_str() {
LineEnding::from_str(text)
@@ -62,6 +86,11 @@ impl LineEnding {
}
}
+#[inline]
+pub fn str_is_line_ending(s: &str) -> bool {
+ LineEnding::from_str(s).is_some()
+}
+
/// Attempts to detect what line ending the passed document uses.
pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
// Return first matched line ending. Not all possible line endings
@@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
}
/// Returns the char index of the end of the given line, not including its line ending.
-pub fn line_end(slice: &RopeSlice, line: usize) -> usize {
- slice.line_to_char(line + 1).saturating_sub(
- get_line_ending(&slice.line(line))
+pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
+ slice.line_to_char(line + 1)
+ - get_line_ending(&slice.line(line))
.map(|le| le.len_chars())
- .unwrap_or(0),
- )
+ .unwrap_or(0)
}
-#[cfg(target_os = "windows")]
-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
-#[cfg(not(target_os = "windows"))]
-pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
-
#[cfg(test)]
mod line_ending_tests {
use super::*;
@@ -150,11 +173,11 @@ mod line_ending_tests {
fn test_rope_slice_to_line_ending() {
let r = Rope::from_str("\r\n");
assert_eq!(
- rope_slice_to_line_ending(&r.slice(1..2)),
+ LineEnding::from_rope_slice(&r.slice(1..2)),
Some(LineEnding::LF)
);
assert_eq!(
- rope_slice_to_line_ending(&r.slice(0..2)),
+ LineEnding::from_rope_slice(&r.slice(0..2)),
Some(LineEnding::Crlf)
);
}
diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs
index 7f47e662..d0023e9f 100644
--- a/helix-core/src/movement.rs
+++ b/helix-core/src/movement.rs
@@ -3,9 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile};
use ropey::iter::Chars;
use crate::{
+ chars::{
+ categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace,
+ char_is_word, CharCategory,
+ },
coords_at_pos, get_line_ending,
graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary},
- line_end, pos_at_coords, Position, Range, RopeSlice,
+ line_end_char_index, pos_at_coords, Position, Range, RopeSlice,
};
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -37,9 +41,8 @@ pub fn move_horizontally(
nth_prev_grapheme_boundary(slice, pos, count).max(start)
}
Direction::Forward => {
- // Line end is pos at the start of next line - 1
- let end = line_end(&slice, line);
- nth_next_grapheme_boundary(slice, pos, count).min(end)
+ let end_char_idx = line_end_char_index(&slice, line);
+ nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx)
}
};
let anchor = match behaviour {
@@ -68,8 +71,11 @@ pub fn move_vertically(
),
};
- // convert to 0-indexed, subtract another 1 because len_chars() counts \n
- let new_line_len = slice.line(new_line).len_chars().saturating_sub(2);
+ // Length of the line sans line-ending.
+ let new_line_len = {
+ let line = slice.line(new_line);
+ line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0)
+ };
let new_col = std::cmp::min(horiz as usize, new_line_len);
@@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio
}
// ---- util ------------
-#[inline]
-pub(crate) fn is_word(ch: char) -> bool {
- ch.is_alphanumeric() || ch == '_'
-}
-
-#[inline]
-pub(crate) fn is_end_of_line(ch: char) -> bool {
- ch == '\n'
-}
-
-#[inline]
-// Whitespace, but not end of line
-pub(crate) fn is_strict_whitespace(ch: char) -> bool {
- ch.is_whitespace() && !is_end_of_line(ch)
-}
-
-#[inline]
-pub(crate) fn is_punctuation(ch: char) -> bool {
- use unicode_general_category::{get_general_category, GeneralCategory};
-
- matches!(
- get_general_category(ch),
- GeneralCategory::OtherPunctuation
- | GeneralCategory::OpenPunctuation
- | GeneralCategory::ClosePunctuation
- | GeneralCategory::InitialPunctuation
- | GeneralCategory::FinalPunctuation
- | GeneralCategory::ConnectorPunctuation
- | GeneralCategory::DashPunctuation
- | GeneralCategory::MathSymbol
- | GeneralCategory::CurrencySymbol
- | GeneralCategory::ModifierSymbol
- )
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub enum Category {
- Whitespace,
- Eol,
- Word,
- Punctuation,
- Unknown,
-}
-
-#[inline]
-pub(crate) fn categorize(ch: char) -> Category {
- if is_end_of_line(ch) {
- Category::Eol
- } else if ch.is_whitespace() {
- Category::Whitespace
- } else if is_word(ch) {
- Category::Word
- } else if is_punctuation(ch) {
- Category::Punctuation
- } else {
- Category::Unknown
- }
-}
#[inline]
/// Returns first index that doesn't satisfy a given predicate when
@@ -235,7 +183,8 @@ impl CharHelpers for Chars<'_> {
let mut phase = WordMotionPhase::Start;
let mut head = origin.head;
let mut anchor: Option<usize> = None;
- let is_boundary = |a: char, b: Option<char>| categorize(a) != categorize(b.unwrap_or(a));
+ let is_boundary =
+ |a: char, b: Option<char>| categorize_char(a) != categorize_char(b.unwrap_or(a));
while let Some(peek) = characters.peek().copied() {
phase = match phase {
WordMotionPhase::Start => {
@@ -244,7 +193,8 @@ impl CharHelpers for Chars<'_> {
break; // We're at the end, so there's nothing to do.
}
// Anchor may remain here if the head wasn't at a boundary
- if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) {
+ if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek)
+ {
anchor = Some(head);
}
// First character is always skipped by the head
@@ -252,7 +202,7 @@ impl CharHelpers for Chars<'_> {
WordMotionPhase::SkipNewlines
}
WordMotionPhase::SkipNewlines => {
- if is_end_of_line(peek) {
+ if char_is_line_ending(peek) {
characters.next();
if characters.peek().is_some() {
advance(&mut head);
@@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char>
match target {
WordMotionTarget::NextWordStart => {
- ((categorize(peek) != categorize(*next_peek))
- && (is_end_of_line(*next_peek) || !next_peek.is_whitespace()))
+ ((categorize_char(peek) != categorize_char(*next_peek))
+ && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace()))
}
WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => {
- ((categorize(peek) != categorize(*next_peek))
- && (!peek.is_whitespace() || is_end_of_line(*next_peek)))
+ ((categorize_char(peek) != categorize_char(*next_peek))
+ && (!peek.is_whitespace() || char_is_line_ending(*next_peek)))
}
}
}
@@ -330,7 +280,7 @@ mod test {
slice,
move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head
),
- (1, 2).into()
+ (1, 3).into()
);
}
@@ -343,12 +293,12 @@ mod test {
let mut range = Range::point(position);
let moves_and_expected_coordinates = [
- ((Direction::Forward, 1usize), (0, 1)),
- ((Direction::Forward, 2usize), (0, 3)),
- ((Direction::Forward, 0usize), (0, 3)),
- ((Direction::Forward, 999usize), (0, 31)),
- ((Direction::Forward, 999usize), (0, 31)),
- ((Direction::Backward, 999usize), (0, 0)),
+ ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line
+ ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line
+ ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line
+ ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+ ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line|
+ ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line
];
for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) {
@@ -366,15 +316,15 @@ mod test {
let mut range = Range::point(position);
let moves_and_expected_coordinates = IntoIter::new([
- ((Direction::Forward, 1usize), (0, 1)), // M_ltiline
- ((Direction::Forward, 2usize), (0, 3)), // Mul_iline
- ((Direction::Backward, 6usize), (0, 0)), // _ultiline
- ((Direction::Backward, 999usize), (0, 0)), // _ultiline
- ((Direction::Forward, 3usize), (0, 3)), // Mul_iline
- ((Direction::Forward, 0usize), (0, 3)), // Mul_iline
- ((Direction::Backward, 0usize), (0, 3)), // Mul_iline
- ((Direction::Forward, 999usize), (0, 9)), // Multilin_
- ((Direction::Forward, 999usize), (0, 9)), // Multilin_
+ ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n
+ ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n
+ ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n
+ ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n
+ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
+ ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n
]);
for ((direction, amount), coordinates) in moves_and_expected_coordinates {
@@ -446,7 +396,7 @@ mod test {
// First descent preserves column as the target line is wider
((Axis::V, Direction::Forward, 1usize), (1, 8)),
// Second descent clamps column as the target line is shorter
- ((Axis::V, Direction::Forward, 1usize), (2, 4)),
+ ((Axis::V, Direction::Forward, 1usize), (2, 5)),
// Third descent restores the original column
((Axis::V, Direction::Forward, 1usize), (3, 8)),
// Behaviour is preserved even through long jumps
@@ -760,45 +710,4 @@ mod test {
}
}
}
-
- #[test]
- fn test_categorize() {
- const WORD_TEST_CASE: &'static str =
- "_hello_world_あいうえおー12345678901234567890";
- const PUNCTUATION_TEST_CASE: &'static str =
- "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~";
- const WHITESPACE_TEST_CASE: &'static str = "      ";
-
- assert_eq!(Category::Eol, categorize('\n'));
-
- for ch in WHITESPACE_TEST_CASE.chars() {
- assert_eq!(
- Category::Whitespace,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Whitespace`",
- ch,
- categorize(ch)
- );
- }
-
- for ch in WORD_TEST_CASE.chars() {
- assert_eq!(
- Category::Word,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Word`",
- ch,
- categorize(ch)
- );
- }
-
- for ch in PUNCTUATION_TEST_CASE.chars() {
- assert_eq!(
- Category::Punctuation,
- categorize(ch),
- "Testing '{}', but got `{:?}` instead of `Category::Punctuation`",
- ch,
- categorize(ch)
- );
- }
- }
}
diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs
index 3d85ff2f..392eee9c 100644
--- a/helix-core/src/position.rs
+++ b/helix-core/src/position.rs
@@ -1,4 +1,5 @@
use crate::{
+ chars::char_is_line_ending,
graphemes::{nth_next_grapheme_boundary, RopeGraphemes},
Rope, RopeSlice,
};
@@ -23,8 +24,9 @@ impl Position {
pub fn traverse(self, text: &crate::Tendril) -> Self {
let Self { mut row, mut col } = self;
// TODO: there should be a better way here
- for ch in text.chars() {
- if ch == '\n' {
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1;
col = 0;
} else {
diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs
index ae058eb1..92e52d73 100644
--- a/helix-core/src/syntax.rs
+++ b/helix-core/src/syntax.rs
@@ -1,4 +1,4 @@
-use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction};
+use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction};
pub use helix_syntax::{get_language, get_language_name, Lang};
use std::{
@@ -579,9 +579,10 @@ impl LanguageLayer {
mut column,
} = point;
- // TODO: there should be a better way here
- for ch in text.bytes() {
- if ch == b'\n' {
+ // TODO: there should be a better way here.
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
row += 1;
column = 0;
} else {
diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs
index 101d2f9b..7f136fe8 100644
--- a/helix-lsp/src/client.rs
+++ b/helix-lsp/src/client.rs
@@ -3,7 +3,7 @@ use crate::{
Call, Error, OffsetEncoding, Result,
};
-use helix_core::{find_root, ChangeSet, Rope};
+use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope};
use jsonrpc_core as jsonrpc;
use lsp_types as lsp;
use serde_json::Value;
@@ -337,8 +337,9 @@ impl Client {
mut character,
} = pos;
- for ch in text.chars() {
- if ch == '\n' {
+ let mut chars = text.chars().peekable();
+ while let Some(ch) = chars.next() {
+ if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) {
line += 1;
character = 0;
} else {
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
index 8124c17a..b006504b 100644
--- a/helix-term/src/commands.rs
+++ b/helix-term/src/commands.rs
@@ -1,6 +1,6 @@
use helix_core::{
comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes,
- indent, line_end, match_brackets,
+ indent, line_end_char_index, match_brackets,
movement::{self, Direction},
object, pos_at_coords,
regex::{self, Regex},
@@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- let pos = line_end(&text.slice(..), line);
+ let pos = line_end_char_index(&text.slice(..), line);
Range::new(pos, pos)
});
@@ -490,6 +490,8 @@ where
let count = cx.count();
// need to wait for next key
+ // TODO: should this be done by grapheme rather than char? For example,
+ // we can't properly handle the line-ending case here in terms of char.
cx.on_next_key(move |cx, event| {
let ch = match event {
KeyEvent {
@@ -623,7 +625,7 @@ fn replace(cx: &mut Context) {
KeyEvent {
code: KeyCode::Enter,
..
- } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING
+ } => Some('\n'), // TODO: use the document's default line ending.
_ => None,
};
@@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) {
let text = doc.text();
let line = text.char_to_line(range.head);
- let pos = line_end(&text.slice(..), line);
+ let pos = line_end_char_index(&text.slice(..), line);
Range::new(range.anchor, pos)
});
@@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) {
let selection = doc.selection(view.id).transform(|range| {
let text = doc.text();
let line = text.char_to_line(range.head);
- let pos = line_end(&text.slice(..), line);
+ let pos = line_end_char_index(&text.slice(..), line);
Range::new(pos, pos)
});
doc.set_selection(view.id, selection);
diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs
index be113747..3ce3a5b8 100644
--- a/helix-term/src/ui/markdown.rs
+++ b/helix-term/src/ui/markdown.rs
@@ -110,6 +110,8 @@ fn parse<'a>(contents: &'a str, theme: Option<&Theme>) -> tui::text::Text<'a> {
// TODO: replace tabs with indentation
let mut slice = &text[start..end];
+ // TODO: do we need to handle all unicode line endings
+ // here, or is just '\n' okay?
while let Some(end) = slice.find('\n') {
// emit span up to newline
let text = &slice[..end];
diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml
index 89fa755d..30e2374d 100644
--- a/helix-tui/Cargo.toml
+++ b/helix-tui/Cargo.toml
@@ -22,3 +22,4 @@ unicode-segmentation = "1.2"
unicode-width = "0.1"
crossterm = { version = "0.20", optional = true }
serde = { version = "1", "optional" = true, features = ["derive"]}
+helix-core = { version = "0.2", path = "../helix-core" }
diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs
index c671e918..b23bfd81 100644
--- a/helix-tui/src/text.rs
+++ b/helix-tui/src/text.rs
@@ -47,6 +47,7 @@
//! ]);
//! ```
use crate::style::Style;
+use helix_core::line_ending::str_is_line_ending;
use std::borrow::Cow;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
@@ -177,7 +178,7 @@ impl<'a> Span<'a> {
symbol: g,
style: base_style.patch(self.style),
})
- .filter(|s| s.symbol != "\n")
+ .filter(|s| !str_is_line_ending(s.symbol))
}
}
diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs
index 94ff7330..ae561a4f 100644
--- a/helix-tui/src/widgets/reflow.rs
+++ b/helix-tui/src/widgets/reflow.rs
@@ -1,4 +1,5 @@
use crate::text::StyledGrapheme;
+use helix_core::line_ending::str_is_line_ending;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
@@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> {
// Ignore characters wider that the total max width.
if symbol.width() as u16 > self.max_line_width
// Skip leading whitespace when trim is enabled.
- || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0
+ || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0
{
continue;
}
// Break on newline and discard it.
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
if prev_whitespace {
current_line_width = width_to_last_word_end;
self.current_line.truncate(symbols_to_last_word_end);
@@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
}
// Break on newline and discard it.
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
break;
}
@@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> {
if skip_rest {
for StyledGrapheme { symbol, .. } in &mut self.symbols {
- if symbol == "\n" {
+ if str_is_line_ending(symbol) {
break;
}
}
diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
index 80be1ed2..3e38c24d 100644
--- a/helix-view/src/document.rs
+++ b/helix-view/src/document.rs
@@ -10,7 +10,7 @@ use std::sync::Arc;
use helix_core::{
auto_detect_line_ending,
- chars::{char_is_linebreak, char_is_whitespace},
+ chars::{char_is_line_ending, char_is_whitespace},
history::History,
syntax::{LanguageConfiguration, LOADER},
ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction,
@@ -81,6 +81,9 @@ pub struct Document {
/// Current indent style.
pub indent_style: IndentStyle,
+ /// The document's default line ending.
+ pub line_ending: LineEnding,
+
syntax: Option<Syntax>,
// /// Corresponding language scope name. Usually `source.<lang>`.
pub(crate) language: Option<Arc<LanguageConfiguration>>,
@@ -99,7 +102,6 @@ pub struct Document {
diagnostics: Vec<Diagnostic>,
language_server: Option<Arc<helix_lsp::Client>>,
- line_ending: LineEnding,
}
use std::fmt;
@@ -254,21 +256,21 @@ impl Document {
pub fn load(path: PathBuf) -> Result<Self, Error> {
use std::{fs::File, io::BufReader};
- let doc = if !path.exists() {
+ let mut doc = if !path.exists() {
Rope::from(DEFAULT_LINE_ENDING.as_str())
} else {
let file = File::open(&path).context(format!("unable to open {:?}", path))?;
- let mut doc = Rope::from_reader(BufReader::new(file))?;
- // add missing newline at the end of file
- if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' {
- doc.insert_char(doc.len_chars(), '\n');
- }
- doc
+ Rope::from_reader(BufReader::new(file))?
};
// search for line endings
let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING);
+ // add missing newline at the end of file
+ if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) {
+ doc.insert(doc.len_chars(), line_ending.as_str());
+ }
+
let mut doc = Self::new(doc);
// set the path and try detecting the language
doc.set_path(&path)?;
@@ -379,7 +381,7 @@ impl Document {
Some(' ') => false,
// Ignore blank lines.
- Some(c) if char_is_linebreak(c) => continue,
+ Some(c) if char_is_line_ending(c) => continue,
_ => {
prev_line_is_tabs = false;
@@ -403,7 +405,7 @@ impl Document {
c if char_is_whitespace(c) => count_is_done = true,
// Ignore blank lines.
- c if char_is_linebreak(c) => continue 'outer,
+ c if char_is_line_ending(c) => continue 'outer,
_ => break,
}
diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs
index db8ae87a..fb2eb36d 100644
--- a/helix-view/src/editor.rs
+++ b/helix-view/src/editor.rs
@@ -12,7 +12,7 @@ use anyhow::Error;
pub use helix_core::diagnostic::Severity;
pub use helix_core::register::Registers;
-use helix_core::Position;
+use helix_core::{Position, DEFAULT_LINE_ENDING};
#[derive(Debug)]
pub struct Editor {
@@ -150,7 +150,7 @@ impl Editor {
pub fn new_file(&mut self, action: Action) -> DocumentId {
use helix_core::Rope;
- let doc = Document::new(Rope::from("\n"));
+ let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str()));
let id = self.documents.insert(doc);
self.documents[id].id = id;
self.switch(id, action);