Unnamed repository; edit this file 'description' to name the repository.
-rw-r--r--Cargo.lock16
-rw-r--r--helix-core/Cargo.toml3
-rw-r--r--helix-core/src/diff.rs86
-rw-r--r--helix-vcs/Cargo.toml2
-rw-r--r--helix-vcs/src/diff.rs47
-rw-r--r--helix-vcs/src/diff/line_cache.rs2
-rw-r--r--helix-vcs/src/diff/worker.rs30
7 files changed, 76 insertions, 110 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3694c060..083545be 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -748,7 +748,7 @@ dependencies = [
"gix-trace",
"gix-traverse",
"gix-worktree",
- "imara-diff",
+ "imara-diff 0.1.8",
"thiserror 2.0.12",
]
@@ -1409,7 +1409,7 @@ dependencies = [
"helix-loader",
"helix-parsec",
"helix-stdx",
- "imara-diff",
+ "imara-diff 0.2.0",
"indoc",
"log",
"nucleo",
@@ -1604,7 +1604,7 @@ dependencies = [
"gix",
"helix-core",
"helix-event",
- "imara-diff",
+ "imara-diff 0.2.0",
"log",
"parking_lot",
"tempfile",
@@ -1849,6 +1849,16 @@ dependencies = [
]
[[package]]
+name = "imara-diff"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c"
+dependencies = [
+ "hashbrown 0.15.4",
+ "memchr",
+]
+
+[[package]]
name = "indexmap"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
index b0520761..4e825364 100644
--- a/helix-core/Cargo.toml
+++ b/helix-core/Cargo.toml
@@ -46,8 +46,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
-imara-diff = "0.1.8"
-
+imara-diff = "0.2.0"
encoding_rs = "0.8"
chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] }
diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs
index a5d6d722..5937f91c 100644
--- a/helix-core/src/diff.rs
+++ b/helix-core/src/diff.rs
@@ -1,51 +1,22 @@
use std::ops::Range;
use std::time::Instant;
-use imara_diff::intern::InternedInput;
-use imara_diff::Algorithm;
+use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput};
use ropey::RopeSlice;
use crate::{ChangeSet, Rope, Tendril, Transaction};
-/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
-struct CharChangeSetBuilder<'a> {
- res: &'a mut ChangeSet,
- hunk: &'a InternedInput<char>,
- pos: u32,
-}
-
-impl imara_diff::Sink for CharChangeSetBuilder<'_> {
- type Out = ();
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
- self.res.retain((before.start - self.pos) as usize);
- self.res.delete(before.len());
- self.pos = before.end;
-
- let res = self.hunk.after[after.start as usize..after.end as usize]
- .iter()
- .map(|&token| self.hunk.interner[token])
- .collect();
-
- self.res.insert(res);
- }
-
- fn finish(self) -> Self::Out {
- self.res.retain(self.hunk.before.len() - self.pos as usize);
- }
-}
-
-struct LineChangeSetBuilder<'a> {
+struct ChangeSetBuilder<'a> {
res: ChangeSet,
after: RopeSlice<'a>,
file: &'a InternedInput<RopeSlice<'a>>,
current_hunk: InternedInput<char>,
+ char_diff: Diff,
pos: u32,
}
-impl imara_diff::Sink for LineChangeSetBuilder<'_> {
- type Out = ChangeSet;
-
- fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
+impl ChangeSetBuilder<'_> {
+ fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) {
let len = self.file.before[self.pos as usize..before.start as usize]
.iter()
.map(|&it| self.file.interner[it].len_chars())
@@ -109,25 +80,36 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
.flat_map(|&it| self.file.interner[it].chars());
self.current_hunk.update_before(hunk_before);
self.current_hunk.update_after(hunk_after);
-
// the histogram heuristic does not work as well
// for characters because the same characters often reoccur
// use myer diff instead
- imara_diff::diff(
+ self.char_diff.compute_with(
Algorithm::Myers,
- &self.current_hunk,
- CharChangeSetBuilder {
- res: &mut self.res,
- hunk: &self.current_hunk,
- pos: 0,
- },
+ &self.current_hunk.before,
+ &self.current_hunk.after,
+ self.current_hunk.interner.num_tokens(),
);
-
+ let mut pos = 0;
+ for Hunk { before, after } in self.char_diff.hunks() {
+ self.res.retain((before.start - pos) as usize);
+ self.res.delete(before.len());
+ pos = before.end;
+
+ let res = self.current_hunk.after[after.start as usize..after.end as usize]
+ .iter()
+ .map(|&token| self.current_hunk.interner[token])
+ .collect();
+
+ self.res.insert(res);
+ }
+ self.res
+ .retain(self.current_hunk.before.len() - pos as usize);
+ // reuse allocations
self.current_hunk.clear();
}
}
- fn finish(mut self) -> Self::Out {
+ fn finish(mut self) -> ChangeSet {
let len = self.file.before[self.pos as usize..]
.iter()
.map(|&it| self.file.interner[it].len_chars())
@@ -140,7 +122,7 @@ impl imara_diff::Sink for LineChangeSetBuilder<'_> {
struct RopeLines<'a>(RopeSlice<'a>);
-impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
+impl<'a> imara_diff::TokenSource for RopeLines<'a> {
type Token = RopeSlice<'a>;
type Tokenizer = ropey::iter::Lines<'a>;
@@ -161,15 +143,23 @@ pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
let res = ChangeSet::with_capacity(32);
let after = after.slice(..);
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
- let builder = LineChangeSetBuilder {
+ let mut builder = ChangeSetBuilder {
res,
file: &file,
after,
pos: 0,
current_hunk: InternedInput::default(),
+ char_diff: Diff::default(),
};
-
- let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into();
+ let mut diff = Diff::compute(Algorithm::Histogram, &file);
+ diff.postprocess_with_heuristic(
+ &file,
+ IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)),
+ );
+ for hunk in diff.hunks() {
+ builder.process_hunk(hunk.before, hunk.after)
+ }
+ let res = builder.finish().into();
log::debug!(
"rope diff took {}s",
diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml
index b5c150d3..d8936686 100644
--- a/helix-vcs/Cargo.toml
+++ b/helix-vcs/Cargo.toml
@@ -20,7 +20,7 @@ parking_lot.workspace = true
arc-swap = { version = "1.7.1" }
gix = { version = "0.72.1", features = ["attributes", "status"], default-features = false, optional = true }
-imara-diff = "0.1.8"
+imara-diff = "0.2.0"
anyhow = "1"
log = "0.4"
diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs
index e49e171d..6ec29813 100644
--- a/helix-vcs/src/diff.rs
+++ b/helix-vcs/src/diff.rs
@@ -1,5 +1,4 @@
use std::iter::Peekable;
-use std::ops::Range;
use std::sync::Arc;
use helix_core::Rope;
@@ -12,6 +11,8 @@ use tokio::time::Instant;
use crate::diff::worker::DiffWorker;
+pub use imara_diff::Hunk;
+
mod line_cache;
mod worker;
@@ -52,8 +53,8 @@ impl DiffHandle {
let worker = DiffWorker {
channel: receiver,
diff: diff.clone(),
- new_hunks: Vec::default(),
diff_finished_notify: Arc::default(),
+ diff_alloc: imara_diff::Diff::default(),
};
let handle = tokio::spawn(worker.run(diff_base, doc));
let differ = DiffHandle {
@@ -118,48 +119,6 @@ const MAX_DIFF_LINES: usize = 64 * u16::MAX as usize;
// cap average line length to 128 for files with MAX_DIFF_LINES
const MAX_DIFF_BYTES: usize = MAX_DIFF_LINES * 128;
-/// A single change in a file potentially spanning multiple lines
-/// Hunks produced by the differs are always ordered by their position
-/// in the file and non-overlapping.
-/// Specifically for any two hunks `x` and `y` the following properties hold:
-///
-/// ``` no_compile
-/// assert!(x.before.end <= y.before.start);
-/// assert!(x.after.end <= y.after.start);
-/// ```
-#[derive(PartialEq, Eq, Clone, Debug)]
-pub struct Hunk {
- pub before: Range<u32>,
- pub after: Range<u32>,
-}
-
-impl Hunk {
- /// Can be used instead of `Option::None` for better performance
- /// because lines larger then `i32::MAX` are not supported by `imara-diff` anyways.
- /// Has some nice properties where it usually is not necessary to check for `None` separately:
- /// Empty ranges fail contains checks and also fails smaller then checks.
- pub const NONE: Hunk = Hunk {
- before: u32::MAX..u32::MAX,
- after: u32::MAX..u32::MAX,
- };
-
- /// Inverts a change so that `before`
- pub fn invert(&self) -> Hunk {
- Hunk {
- before: self.after.clone(),
- after: self.before.clone(),
- }
- }
-
- pub fn is_pure_insertion(&self) -> bool {
- self.before.is_empty()
- }
-
- pub fn is_pure_removal(&self) -> bool {
- self.after.is_empty()
- }
-}
-
/// A list of changes in a file sorted in ascending
/// non-overlapping order
#[derive(Debug)]
diff --git a/helix-vcs/src/diff/line_cache.rs b/helix-vcs/src/diff/line_cache.rs
index 460a2065..4a03a8f5 100644
--- a/helix-vcs/src/diff/line_cache.rs
+++ b/helix-vcs/src/diff/line_cache.rs
@@ -13,7 +13,7 @@
use std::mem::transmute;
use helix_core::{Rope, RopeSlice};
-use imara_diff::intern::{InternedInput, Interner};
+use imara_diff::{InternedInput, Interner};
use super::{MAX_DIFF_BYTES, MAX_DIFF_LINES};
diff --git a/helix-vcs/src/diff/worker.rs b/helix-vcs/src/diff/worker.rs
index 2b6466f6..3471b4cb 100644
--- a/helix-vcs/src/diff/worker.rs
+++ b/helix-vcs/src/diff/worker.rs
@@ -1,9 +1,7 @@
-use std::mem::swap;
-use std::ops::Range;
use std::sync::Arc;
use helix_core::{Rope, RopeSlice};
-use imara_diff::intern::InternedInput;
+use imara_diff::{IndentHeuristic, IndentLevel, InternedInput};
use parking_lot::RwLock;
use tokio::sync::mpsc::UnboundedReceiver;
use tokio::sync::Notify;
@@ -14,7 +12,6 @@ use crate::diff::{
};
use super::line_cache::InternedRopeLines;
-use super::Hunk;
#[cfg(test)]
mod test;
@@ -22,8 +19,8 @@ mod test;
pub(super) struct DiffWorker {
pub channel: UnboundedReceiver<Event>,
pub diff: Arc<RwLock<DiffInner>>,
- pub new_hunks: Vec<Hunk>,
pub diff_finished_notify: Arc<Notify>,
+ pub diff_alloc: imara_diff::Diff,
}
impl DiffWorker {
@@ -76,15 +73,26 @@ impl DiffWorker {
let mut diff = self.diff.write();
diff.diff_base = diff_base;
diff.doc = doc;
- swap(&mut diff.hunks, &mut self.new_hunks);
+ diff.hunks.clear();
+ diff.hunks.extend(self.diff_alloc.hunks());
+ drop(diff);
self.diff_finished_notify.notify_waiters();
- self.new_hunks.clear();
}
fn perform_diff(&mut self, input: &InternedInput<RopeSlice>) {
- imara_diff::diff(ALGORITHM, input, |before: Range<u32>, after: Range<u32>| {
- self.new_hunks.push(Hunk { before, after })
- })
+ self.diff_alloc.compute_with(
+ ALGORITHM,
+ &input.before,
+ &input.after,
+ input.interner.num_tokens(),
+ );
+ self.diff_alloc.postprocess_with(
+ &input.before,
+ &input.after,
+ IndentHeuristic::new(|token| {
+ IndentLevel::for_ascii_line(input.interner[token].bytes(), 4)
+ }),
+ );
}
}
@@ -94,7 +102,7 @@ struct EventAccumulator {
render_lock: Option<RenderLock>,
}
-impl EventAccumulator {
+impl<'a> EventAccumulator {
fn new() -> EventAccumulator {
EventAccumulator {
diff_base: None,