Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--crates/rust-analyzer/src/line_index.rs60
1 files changed, 49 insertions, 11 deletions
diff --git a/crates/rust-analyzer/src/line_index.rs b/crates/rust-analyzer/src/line_index.rs
index 0d424b9157..7636c3da7f 100644
--- a/crates/rust-analyzer/src/line_index.rs
+++ b/crates/rust-analyzer/src/line_index.rs
@@ -27,10 +27,6 @@ pub(crate) enum LineEndings {
impl LineEndings {
/// Replaces `\r\n` with `\n` in-place in `src`.
pub(crate) fn normalize(src: String) -> (String, LineEndings) {
- if !src.as_bytes().contains(&b'\r') {
- return (src, LineEndings::Unix);
- }
-
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
// While we *can* call `as_mut_vec` and do surgery on the live string
// directly, let's rather steal the contents of `src`. This makes the code
@@ -39,10 +35,19 @@ impl LineEndings {
let mut buf = src.into_bytes();
let mut gap_len = 0;
let mut tail = buf.as_mut_slice();
+ let mut crlf_seen = false;
+
+ let find_crlf = |src: &[u8]| src.windows(2).position(|it| it == b"\r\n");
+
loop {
let idx = match find_crlf(&tail[gap_len..]) {
- None => tail.len(),
- Some(idx) => idx + gap_len,
+ None if crlf_seen => tail.len(),
+ // SAFETY: buf is unchanged and therefor still contains utf8 data
+ None => return (unsafe { String::from_utf8_unchecked(buf) }, LineEndings::Unix),
+ Some(idx) => {
+ crlf_seen = true;
+ idx + gap_len
+ }
};
tail.copy_within(gap_len..idx, 0);
tail = &mut tail[idx - gap_len..];
@@ -54,15 +59,48 @@ impl LineEndings {
// Account for removed `\r`.
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
- let new_len = buf.len() - gap_len;
let src = unsafe {
+ let new_len = buf.len() - gap_len;
buf.set_len(new_len);
String::from_utf8_unchecked(buf)
};
- return (src, LineEndings::Dos);
+ (src, LineEndings::Dos)
+ }
+}
- fn find_crlf(src: &[u8]) -> Option<usize> {
- src.windows(2).position(|it| it == b"\r\n")
- }
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn unix() {
+ let src = "a\nb\nc\n\n\n\n";
+ let (res, endings) = LineEndings::normalize(src.into());
+ assert_eq!(endings, LineEndings::Unix);
+ assert_eq!(res, src);
+ }
+
+ #[test]
+ fn dos() {
+ let src = "\r\na\r\n\r\nb\r\nc\r\n\r\n\r\n\r\n";
+ let (res, endings) = LineEndings::normalize(src.into());
+ assert_eq!(endings, LineEndings::Dos);
+ assert_eq!(res, "\na\n\nb\nc\n\n\n\n");
+ }
+
+ #[test]
+ fn mixed() {
+ let src = "a\r\nb\r\nc\r\n\n\r\n\n";
+ let (res, endings) = LineEndings::normalize(src.into());
+ assert_eq!(endings, LineEndings::Dos);
+ assert_eq!(res, "a\nb\nc\n\n\n\n");
+ }
+
+ #[test]
+ fn none() {
+ let src = "abc";
+ let (res, endings) = LineEndings::normalize(src.into());
+ assert_eq!(endings, LineEndings::Unix);
+ assert_eq!(res, src);
}
}