Unnamed repository; edit this file 'description' to name the repository.
Diffstat (limited to 'helix-core/src/diff.rs')
| -rw-r--r-- | helix-core/src/diff.rs | 237 |
1 files changed, 45 insertions, 192 deletions
diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs index 5937f91c..6960c679 100644 --- a/helix-core/src/diff.rs +++ b/helix-core/src/diff.rs @@ -1,184 +1,58 @@ -use std::ops::Range; -use std::time::Instant; +use crate::{Rope, Transaction}; -use imara_diff::{Algorithm, Diff, Hunk, IndentHeuristic, IndentLevel, InternedInput}; -use ropey::RopeSlice; - -use crate::{ChangeSet, Rope, Tendril, Transaction}; - -struct ChangeSetBuilder<'a> { - res: ChangeSet, - after: RopeSlice<'a>, - file: &'a InternedInput<RopeSlice<'a>>, - current_hunk: InternedInput<char>, - char_diff: Diff, - pos: u32, -} - -impl ChangeSetBuilder<'_> { - fn process_hunk(&mut self, before: Range<u32>, after: Range<u32>) { - let len = self.file.before[self.pos as usize..before.start as usize] +/// Compares `old` and `new` to generate a [`Transaction`] describing +/// the steps required to get from `old` to `new`. +pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction { + // `similar` only works on contiguous data, so a `Rope` has + // to be temporarily converted into a `String`. + let old_converted = old.to_string(); + let new_converted = new.to_string(); + + // A timeout is set so after 1 seconds, the algorithm will start + // approximating. This is especially important for big `Rope`s or + // `Rope`s that are extremely dissimilar to each other. + let mut config = similar::TextDiff::configure(); + config.timeout(std::time::Duration::from_secs(1)); + + let diff = config.diff_chars(&old_converted, &new_converted); + + // The current position of the change needs to be tracked to + // construct the `Change`s. + let mut pos = 0; + Transaction::change( + old, + diff.ops() .iter() - .map(|&it| self.file.interner[it].len_chars()) - .sum(); - self.res.retain(len); - self.pos = before.end; - - // do not perform diffs on large hunks - let len_before = before.end - before.start; - let len_after = after.end - after.start; - - // Pure insertions/removals do not require a character diff. - // Very large changes are ignored because their character diff is expensive to compute - // TODO adjust heuristic to detect large changes? - if len_before == 0 - || len_after == 0 - || len_after > 5 * len_before - || 5 * len_after < len_before && len_before > 10 - || len_before + len_after > 200 - { - let remove = self.file.before[before.start as usize..before.end as usize] - .iter() - .map(|&it| self.file.interner[it].len_chars()) - .sum(); - self.res.delete(remove); - let mut fragment = Tendril::new(); - if len_after > 500 { - // copying a rope line by line is slower then copying the entire - // rope. Use to_string for very large changes instead.. - if self.file.after.len() == after.end as usize { - if after.start == 0 { - fragment = self.after.to_string().into(); - } else { - let start = self.after.line_to_char(after.start as usize); - fragment = self.after.slice(start..).to_string().into(); + .map(|op| op.as_tag_tuple()) + .filter_map(|(tag, old_range, new_range)| { + // `old_pos..pos` is equivalent to `start..end` for where + // the change should be applied. + let old_pos = pos; + pos += old_range.end - old_range.start; + + match tag { + // Semantically, inserts and replacements are the same thing. + similar::DiffTag::Insert | similar::DiffTag::Replace => { + // This is the text from the `new` rope that should be + // inserted into `old`. + let text: &str = { + let start = new.char_to_byte(new_range.start); + let end = new.char_to_byte(new_range.end); + &new_converted[start..end] + }; + Some((old_pos, pos, Some(text.into()))) } - } else if after.start == 0 { - let end = self.after.line_to_char(after.end as usize); - fragment = self.after.slice(..end).to_string().into(); - } else { - let start = self.after.line_to_char(after.start as usize); - let end = self.after.line_to_char(after.end as usize); - fragment = self.after.slice(start..end).to_string().into(); + similar::DiffTag::Delete => Some((old_pos, pos, None)), + similar::DiffTag::Equal => None, } - } else { - for &line in &self.file.after[after.start as usize..after.end as usize] { - for chunk in self.file.interner[line].chunks() { - fragment.push_str(chunk) - } - } - }; - self.res.insert(fragment); - } else { - // for reasonably small hunks, generating a ChangeSet from char diff can save memory - // TODO use a tokenizer (word diff?) for improved performance - let hunk_before = self.file.before[before.start as usize..before.end as usize] - .iter() - .flat_map(|&it| self.file.interner[it].chars()); - let hunk_after = self.file.after[after.start as usize..after.end as usize] - .iter() - .flat_map(|&it| self.file.interner[it].chars()); - self.current_hunk.update_before(hunk_before); - self.current_hunk.update_after(hunk_after); - // the histogram heuristic does not work as well - // for characters because the same characters often reoccur - // use myer diff instead - self.char_diff.compute_with( - Algorithm::Myers, - &self.current_hunk.before, - &self.current_hunk.after, - self.current_hunk.interner.num_tokens(), - ); - let mut pos = 0; - for Hunk { before, after } in self.char_diff.hunks() { - self.res.retain((before.start - pos) as usize); - self.res.delete(before.len()); - pos = before.end; - - let res = self.current_hunk.after[after.start as usize..after.end as usize] - .iter() - .map(|&token| self.current_hunk.interner[token]) - .collect(); - - self.res.insert(res); - } - self.res - .retain(self.current_hunk.before.len() - pos as usize); - // reuse allocations - self.current_hunk.clear(); - } - } - - fn finish(mut self) -> ChangeSet { - let len = self.file.before[self.pos as usize..] - .iter() - .map(|&it| self.file.interner[it].len_chars()) - .sum(); - - self.res.retain(len); - self.res - } -} - -struct RopeLines<'a>(RopeSlice<'a>); - -impl<'a> imara_diff::TokenSource for RopeLines<'a> { - type Token = RopeSlice<'a>; - type Tokenizer = ropey::iter::Lines<'a>; - - fn tokenize(&self) -> Self::Tokenizer { - self.0.lines() - } - - fn estimate_tokens(&self) -> u32 { - // we can provide a perfect estimate which is very nice for performance - self.0.len_lines() as u32 - } -} - -/// Compares `old` and `new` to generate a [`Transaction`] describing -/// the steps required to get from `old` to `new`. -pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction { - let start = Instant::now(); - let res = ChangeSet::with_capacity(32); - let after = after.slice(..); - let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after)); - let mut builder = ChangeSetBuilder { - res, - file: &file, - after, - pos: 0, - current_hunk: InternedInput::default(), - char_diff: Diff::default(), - }; - let mut diff = Diff::compute(Algorithm::Histogram, &file); - diff.postprocess_with_heuristic( - &file, - IndentHeuristic::new(|token| IndentLevel::for_ascii_line(file.interner[token].bytes(), 4)), - ); - for hunk in diff.hunks() { - builder.process_hunk(hunk.before, hunk.after) - } - let res = builder.finish().into(); - - log::debug!( - "rope diff took {}s", - Instant::now().duration_since(start).as_secs_f64() - ); - res + }), + ) } #[cfg(test)] mod tests { use super::*; - fn test_identity(a: &str, b: &str) { - let mut old = Rope::from(a); - let new = Rope::from(b); - compare_ropes(&old, &new).apply(&mut old); - assert_eq!(old, new); - } - quickcheck::quickcheck! { fn test_compare_ropes(a: String, b: String) -> bool { let mut old = Rope::from(a); @@ -187,25 +61,4 @@ mod tests { old == new } } - - #[test] - fn equal_files() { - test_identity("foo", "foo"); - } - - #[test] - fn trailing_newline() { - test_identity("foo\n", "foo"); - test_identity("foo", "foo\n"); - } - - #[test] - fn new_file() { - test_identity("", "foo"); - } - - #[test] - fn deleted_file() { - test_identity("foo", ""); - } } |