Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/diff.rs')
-rw-r--r--helix-core/src/diff.rs237
1 files changed, 45 insertions, 192 deletions
diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs
index 5937f91c..6960c679 100644
--- a/helix-core/src/diff.rs
+++ b/helix-core/src/diff.rs
@@ -1,184 +1,58 @@
-use std::ops::Range;
-use std::time::Instant;
+use crate::{Rope, Transaction};
-use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput};
-use ropey::RopeSlice;
-
-use crate::{ChangeSet, Rope, Tendril, Transaction};
-
-struct ChangeSetBuilder<'a> {
- res: ChangeSet,
- after: RopeSlice<'a>,
- file: &'a InternedInput<RopeSlice<'a>>,
- current_hunk: InternedInput<char>,
- char_diff: Diff,
- pos: u32,
-}
-
-impl ChangeSetBuilder<'_> {
- fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) {
- let len = self.file.before[self.pos as usize..before.start as usize]
+/// Compares `old` and `new` to generate a [`Transaction`] describing
+/// the steps required to get from `old` to `new`.
+pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
+ // `similar` only works on contiguous data, so a `Rope` has
+ // to be temporarily converted into a `String`.
+ let old_converted = old.to_string();
+ let new_converted = new.to_string();
+
+ // A timeout is set so after 1 seconds, the algorithm will start
+ // approximating. This is especially important for big `Rope`s or
+ // `Rope`s that are extremely dissimilar to each other.
+ let mut config = similar::TextDiff::configure();
+ config.timeout(std::time::Duration::from_secs(1));
+
+ let diff = config.diff_chars(&old_converted, &new_converted);
+
+ // The current position of the change needs to be tracked to
+ // construct the `Change`s.
+ let mut pos = 0;
+ Transaction::change(
+ old,
+ diff.ops()
.iter()
- .map(|&it| self.file.interner[it].len_chars())
- .sum();
- self.res.retain(len);
- self.pos = before.end;
-
- // do not perform diffs on large hunks
- let len_before = before.end - before.start;
- let len_after = after.end - after.start;
-
- // Pure insertions/removals do not require a character diff.
- // Very large changes are ignored because their character diff is expensive to compute
- // TODO adjust heuristic to detect large changes?
- if len_before == 0
- || len_after == 0
- || len_after > 5 * len_before
- || 5 * len_after < len_before && len_before > 10
- || len_before + len_after > 200
- {
- let remove = self.file.before[before.start as usize..before.end as usize]
- .iter()
- .map(|&it| self.file.interner[it].len_chars())
- .sum();
- self.res.delete(remove);
- let mut fragment = Tendril::new();
- if len_after > 500 {
- // copying a rope line by line is slower then copying the entire
- // rope. Use to_string for very large changes instead..
- if self.file.after.len() == after.end as usize {
- if after.start == 0 {
- fragment = self.after.to_string().into();
- } else {
- let start = self.after.line_to_char(after.start as usize);
- fragment = self.after.slice(start..).to_string().into();
+ .map(|op| op.as_tag_tuple())
+ .filter_map(|(tag, old_range, new_range)| {
+ // `old_pos..pos` is equivalent to `start..end` for where
+ // the change should be applied.
+ let old_pos = pos;
+ pos += old_range.end - old_range.start;
+
+ match tag {
+ // Semantically, inserts and replacements are the same thing.
+ similar::DiffTag::Insert | similar::DiffTag::Replace => {
+ // This is the text from the `new` rope that should be
+ // inserted into `old`.
+ let text: &str = {
+ let start = new.char_to_byte(new_range.start);
+ let end = new.char_to_byte(new_range.end);
+ &new_converted[start..end]
+ };
+ Some((old_pos, pos, Some(text.into())))
}
- } else if after.start == 0 {
- let end = self.after.line_to_char(after.end as usize);
- fragment = self.after.slice(..end).to_string().into();
- } else {
- let start = self.after.line_to_char(after.start as usize);
- let end = self.after.line_to_char(after.end as usize);
- fragment = self.after.slice(start..end).to_string().into();
+ similar::DiffTag::Delete => Some((old_pos, pos, None)),
+ similar::DiffTag::Equal => None,
}
- } else {
- for &line in &self.file.after[after.start as usize..after.end as usize] {
- for chunk in self.file.interner[line].chunks() {
- fragment.push_str(chunk)
- }
- }
- };
- self.res.insert(fragment);
- } else {
- // for reasonably small hunks, generating a ChangeSet from char diff can save memory
- // TODO use a tokenizer (word diff?) for improved performance
- let hunk_before = self.file.before[before.start as usize..before.end as usize]
- .iter()
- .flat_map(|&it| self.file.interner[it].chars());
- let hunk_after = self.file.after[after.start as usize..after.end as usize]
- .iter()
- .flat_map(|&it| self.file.interner[it].chars());
- self.current_hunk.update_before(hunk_before);
- self.current_hunk.update_after(hunk_after);
- // the histogram heuristic does not work as well
- // for characters because the same characters often reoccur
- // use myer diff instead
- self.char_diff.compute_with(
- Algorithm::Myers,
- &self.current_hunk.before,
- &self.current_hunk.after,
- self.current_hunk.interner.num_tokens(),
- );
- let mut pos = 0;
- for Hunk { before, after } in self.char_diff.hunks() {
- self.res.retain((before.start - pos) as usize);
- self.res.delete(before.len());
- pos = before.end;
-
- let res = self.current_hunk.after[after.start as usize..after.end as usize]
- .iter()
- .map(|&token| self.current_hunk.interner[token])
- .collect();
-
- self.res.insert(res);
- }
- self.res
- .retain(self.current_hunk.before.len() - pos as usize);
- // reuse allocations
- self.current_hunk.clear();
- }
- }
-
- fn finish(mut self) -> ChangeSet {
- let len = self.file.before[self.pos as usize..]
- .iter()
- .map(|&it| self.file.interner[it].len_chars())
- .sum();
-
- self.res.retain(len);
- self.res
- }
-}
-
-struct RopeLines<'a>(RopeSlice<'a>);
-
-impl<'a> imara_diff::TokenSource for RopeLines<'a> {
- type Token = RopeSlice<'a>;
- type Tokenizer = ropey::iter::Lines<'a>;
-
- fn tokenize(&self) -> Self::Tokenizer {
- self.0.lines()
- }
-
- fn estimate_tokens(&self) -> u32 {
- // we can provide a perfect estimate which is very nice for performance
- self.0.len_lines() as u32
- }
-}
-
-/// Compares `old` and `new` to generate a [`Transaction`] describing
-/// the steps required to get from `old` to `new`.
-pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
- let start = Instant::now();
- let res = ChangeSet::with_capacity(32);
- let after = after.slice(..);
- let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
- let mut builder = ChangeSetBuilder {
- res,
- file: &file,
- after,
- pos: 0,
- current_hunk: InternedInput::default(),
- char_diff: Diff::default(),
- };
- let mut diff = Diff::compute(Algorithm::Histogram, &file);
- diff.postprocess_with_heuristic(
- &file,
- IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)),
- );
- for hunk in diff.hunks() {
- builder.process_hunk(hunk.before, hunk.after)
- }
- let res = builder.finish().into();
-
- log::debug!(
- "rope diff took {}s",
- Instant::now().duration_since(start).as_secs_f64()
- );
- res
+ }),
+ )
}
#[cfg(test)]
mod tests {
use super::*;
- fn test_identity(a: &str, b: &str) {
- let mut old = Rope::from(a);
- let new = Rope::from(b);
- compare_ropes(&old, &new).apply(&mut old);
- assert_eq!(old, new);
- }
-
quickcheck::quickcheck! {
fn test_compare_ropes(a: String, b: String) -> bool {
let mut old = Rope::from(a);
@@ -187,25 +61,4 @@ mod tests {
old == new
}
}
-
- #[test]
- fn equal_files() {
- test_identity("foo", "foo");
- }
-
- #[test]
- fn trailing_newline() {
- test_identity("foo\n", "foo");
- test_identity("foo", "foo\n");
- }
-
- #[test]
- fn new_file() {
- test_identity("", "foo");
- }
-
- #[test]
- fn deleted_file() {
- test_identity("foo", "");
- }
}