my fork of dmp
Diffstat (limited to 'src/dmp.rs')
| -rw-r--r-- | src/dmp.rs | 1031 |
1 files changed, 544 insertions, 487 deletions
@@ -9,7 +9,7 @@ use percent_encoding::{percent_decode, percent_encode, CONTROLS}; use serde::{Deserialize, Serialize}; use serde_repr::{Deserialize_repr, Serialize_repr}; -use crate::errors::Error; +use crate::{errors::Error, traits::BisectSplit}; /// Enum representing the different ops of diff #[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize_repr, Deserialize_repr)] @@ -25,25 +25,25 @@ pub enum Ops { /// (Ops::Insert, String::new("Goodbye")) means add `Goodbye` /// (Ops::Equal, String::new("World")) means keep world #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct Diff(Ops, Vec<u8>); +pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>); -impl Diff { +impl<T: Copy + Ord + Eq> Diff<T> { /// Create a new diff object - pub fn new(op: Ops, text: &[u8]) -> Self { - Self(op, text.to_vec()) + pub fn new(op: Ops, data: &[T]) -> Self { + Self(op, data.to_vec()) } /// helper functions to create ops - pub fn delete(text: &[u8]) -> Self { - Self::new(Ops::Delete, text) + pub fn delete(data: &[T]) -> Self { + Self::new(Ops::Delete, data) } - pub fn insert(text: &[u8]) -> Self { - Self::new(Ops::Insert, text) + pub fn insert(data: &[T]) -> Self { + Self::new(Ops::Insert, data) } - pub fn equal(text: &[u8]) -> Self { - Self::new(Ops::Equal, text) + pub fn equal(data: &[T]) -> Self { + Self::new(Ops::Equal, data) } // returns the operation of the current diff @@ -51,8 +51,8 @@ impl Diff { self.0 } - // returns the bytes of the text - pub fn text(&self) -> &[u8] { + // returns the bytes of the data + pub fn data(&self) -> &[T] { &self.1[..] } } @@ -99,12 +99,12 @@ impl Default for DiffMatchPatch { } #[derive(Debug, PartialEq, Eq)] -struct HalfMatch<'a> { - prefix_long: &'a [u8], - suffix_long: &'a [u8], - prefix_short: &'a [u8], - suffix_short: &'a [u8], - common: &'a [u8], +struct HalfMatch<'a, T: Copy + Ord + Eq> { + prefix_long: &'a [T], + suffix_long: &'a [T], + prefix_short: &'a [T], + suffix_short: &'a [T], + common: &'a [T], } impl DiffMatchPatch { @@ -142,46 +142,13 @@ impl DiffMatchPatch { self.match_distance } - // fn diff_lines<'a>( - // &self, - // old: &'a [u16], - // new: &'a [u16] - // ) -> Result<Vec<Diff>, crate::errors::Error> { - // if old == new { - // if old.is_empty() { - // return Ok(Vec::new()); - // } - - // return Ok(vec![Diff::equal(old)]) - // } - - // if old.is_empty() { - // return Ok(vec![Diff::insert(new)]); - // } - - // if new.is_empty() { - // return Ok(vec![Diff::delete(old)]); - // } - - // // Trim common prefix - // let common_prefix = Self::common_prefix(old, new, false); - // let common_suffix = Self::common_prefix( - // &old[common_prefix..], - // &new[common_prefix..], - // true, - // ); - - - // todo!() - // } - - fn diff_internal<'a>( + pub(crate) fn diff_internal<'a>( &self, old_bytes: &'a [u8], new_bytes: &'a [u8], linemode: bool, deadline: Instant, - ) -> Result<Vec<Diff>, crate::errors::Error> { + ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { // First, check if lhs and rhs are equal if old_bytes == new_bytes { if old_bytes.is_empty() { @@ -239,7 +206,7 @@ impl DiffMatchPatch { new: &'a [u8], linemode: bool, deadline: Instant, - ) -> Result<Vec<Diff>, crate::errors::Error> { + ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { // returning all of the new part if old.is_empty() { return Ok(vec![Diff::insert(new)]); @@ -287,19 +254,15 @@ impl DiffMatchPatch { let new_b = half_match.suffix_short; let mid_common = half_match.common; - + // Send both pairs off for separate processing. let mut diffs_a = match self.diff_internal(old_a, new_a, linemode, deadline) { Ok(d) => d, - Err(_) => { - return Err(crate::errors::Error::Utf8Error) - } + Err(_) => return Err(crate::errors::Error::Utf8Error), }; let mut diffs_b = match self.diff_internal(old_b, new_b, linemode, deadline) { Ok(d) => d, - Err(_) => { - return Err(crate::errors::Error::Utf8Error) - } + Err(_) => return Err(crate::errors::Error::Utf8Error), }; // Merge the results @@ -315,13 +278,15 @@ impl DiffMatchPatch { match self.bisect(old, new, deadline) { Ok(b) => Ok(b), - Err(_) => { - Err(crate::errors::Error::Utf8Error) - } + Err(_) => Err(crate::errors::Error::Utf8Error), } } - fn half_match<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Option<HalfMatch<'a>> { + fn half_match<'a, T: Copy + Ord + Eq>( + &self, + old: &'a [T], + new: &'a [T], + ) -> Option<HalfMatch<'a, T>> { // Don't risk returning a suboptimal diff when we have unlimited time if self.timeout() == u64::MAX { return None; @@ -393,14 +358,13 @@ impl DiffMatchPatch { old: &'a [u8], new: &'a [u8], deadline: Instant, - ) -> Result<Vec<Diff>, crate::errors::Error> { + ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { let to_chars = Self::lines_to_chars(old, new); - - let mut diffs = - self.diff_internal(&to_chars.chars_old[..], &to_chars.chars_new[..], false, deadline)?; + + let diffs = self.diff_lines(&to_chars.chars_old[..], &to_chars.chars_new[..], deadline)?; // Convert diffs back to text - Self::chars_to_lines(&mut diffs[..], &to_chars.lines[..])?; + let mut diffs = Self::chars_to_lines(&diffs[..], &to_chars.lines[..]); // Eliminate freak matches Self::cleanup_semantic(&mut diffs); @@ -425,12 +389,12 @@ impl DiffMatchPatch { match diffs[pointer].op() { Ops::Insert => { insert_n += 1; - let mut data = diffs[pointer].text().to_vec(); + let mut data = diffs[pointer].data().to_vec(); insert_data.append(&mut data); } Ops::Delete => { delete_n += 1; - let mut data = diffs[pointer].text().to_vec(); + let mut data = diffs[pointer].data().to_vec(); delete_data.append(&mut data); } Ops::Equal => { @@ -470,15 +434,139 @@ impl DiffMatchPatch { Ok(diffs) } + pub(crate) fn diff_lines<'a>( + &self, + old: &'a [usize], + new: &'a [usize], + deadline: Instant, + ) -> Result<Vec<Diff<usize>>, crate::errors::Error> { + if old == new { + if old.is_empty() { + return Ok(Vec::new()); + } + + return Ok(vec![Diff::equal(old)]); + } + + if old.is_empty() { + return Ok(vec![Diff::insert(new)]); + } + + if new.is_empty() { + return Ok(vec![Diff::delete(old)]); + } + + // Trim common prefix + let common_prefix = Self::common_prefix(old, new, false); + let common_suffix = Self::common_prefix(&old[common_prefix..], &new[common_prefix..], true); + + let mut diffs = self.compute_lines( + &old[common_prefix..old.len() - common_suffix], + &new[common_prefix..new.len() - common_suffix], + deadline, + )?; + + if common_prefix > 0 { + let mut d = vec![Diff::equal(&old[..common_prefix])]; + d.append(&mut diffs); + diffs = d; + } + + if common_suffix > 0 { + diffs.push(Diff::new(Ops::Equal, &new[new.len() - common_suffix..])); + } + + Self::cleanup_merge(&mut diffs); + + Ok(diffs) + } + + fn compute_lines<'a>( + &self, + old: &'a [usize], + new: &'a [usize], + deadline: Instant, + ) -> Result<Vec<Diff<usize>>, crate::errors::Error> { + // returning all of the new part + if old.is_empty() { + return Ok(vec![Diff::insert(new)]); + } + + // return everything deleted + if new.is_empty() { + return Ok(vec![Diff::delete(old)]); + } + + let (long, short, old_gt_new) = if old.len() > new.len() { + (old, new, true) + } else { + (new, old, false) + }; + + // found a subsequence which contains the short text + if let Some(idx) = long + .windows(short.len()) + .step_by(1) + .position(|k| k == short) + { + // Shorter text is inside the longer text (speedup). + let op = if old_gt_new { Ops::Delete } else { Ops::Insert }; + let diffs = vec![ + Diff::new(op, &long[0..idx]), + Diff::equal(short), + Diff::new(op, &long[idx + short.len()..]), + ]; + + return Ok(diffs); + } + + if short.len() == 1 { + // After previous case, this can't be an equality + return Ok(vec![Diff::delete(old), Diff::insert(new)]); + } + + // Check if the problem can be split in two + if let Some(half_match) = self.half_match(old, new) { + let old_a = half_match.prefix_long; + let old_b = half_match.suffix_long; + + let new_a = half_match.prefix_short; + let new_b = half_match.suffix_short; + + let mid_common = half_match.common; + + // Send both pairs off for separate processing. + let mut diffs_a = match self.diff_lines(old_a, new_a, deadline) { + Ok(d) => d, + Err(_) => return Err(crate::errors::Error::Utf8Error), + }; + let mut diffs_b = match self.diff_lines(old_b, new_b, deadline) { + Ok(d) => d, + Err(_) => return Err(crate::errors::Error::Utf8Error), + }; + + // Merge the results + diffs_a.push(Diff::equal(mid_common)); + diffs_a.append(&mut diffs_b); + + return Ok(diffs_a); + } + + match self.bisect(old, new, deadline) { + Ok(b) => Ok(b), + Err(_) => Err(crate::errors::Error::Utf8Error), + } + } + // Find the 'middle snake' of a diff, split the problem in two // and return the recursively constructed diff. // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - fn bisect<'a>( + fn bisect<'a, T: BisectSplit>( &self, - old: &'a [u8], - new: &'a [u8], + old: &'a [T], + new: &'a [T], deadline: Instant, - ) -> Result<Vec<Diff>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { let old_len = old.len() as i32; let new_len = new.len() as i32; @@ -541,7 +629,14 @@ impl DiffMatchPatch { let x2 = old_len - v2[k2_offset as usize]; if x1 >= x2 { // Overlap detected - return self.bisect_split(old, new, x1 as usize, y1 as usize, deadline); + return T::bisect_split( + self, + old, + new, + x1 as usize, + y1 as usize, + deadline, + ); } } } @@ -583,7 +678,15 @@ impl DiffMatchPatch { x2 = old_len - x2; if x1 >= x2 { // Overlap - return self.bisect_split(old, new, x1 as usize, y1 as usize, deadline); + // return bisect_split(old, new, x1 as usize, y1 as usize, deadline); + return T::bisect_split( + self, + old, + new, + x1 as usize, + y1 as usize, + deadline, + ); } } } @@ -593,43 +696,35 @@ impl DiffMatchPatch { Ok(vec![Diff::delete(old), Diff::insert(new)]) } - fn bisect_split( - &self, - old: &[u8], - new: &[u8], - x: usize, - y: usize, - deadline: Instant, - ) -> Result<Vec<Diff>, crate::errors::Error> { - let old_a = &old[..x]; - let new_a = &new[..y]; - - let old_b = &old[x..]; - let new_b = &new[y..]; - - // Compute both diffs serially. - let mut diffs_a = self.diff_internal(old_a, new_a, false, deadline)?; - let mut diffs_b = self.diff_internal(old_b, new_b, false, deadline)?; - - diffs_a.append(&mut diffs_b); + // fn bisect_split( + // &self, + // old: &[u8], + // new: &[u8], + // x: usize, + // y: usize, + // deadline: Instant, + // ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { - Ok(diffs_a) - } + // } // Does a substring of shorttext exist within longtext such that the substring // is at least half the length of longtext? //idx Start index of quarter length substring within longtext. - fn half_match_i<'a>(long: &'a [u8], short: &'a [u8], idx: usize) -> Option<HalfMatch<'a>> { + fn half_match_i<'a, T: Copy + Ord + Eq>( + long: &'a [T], + short: &'a [T], + idx: usize, + ) -> Option<HalfMatch<'a, T>> { // Start with a 1/4 length substring at position i as a seed. let seed = &long[idx..idx + long.len() / 4]; let mut j = 0; - let mut best_common: &[u8] = &[]; - let mut best_long_a: &[u8] = &[]; - let mut best_long_b: &[u8] = &[]; - let mut best_short_a: &[u8] = &[]; - let mut best_short_b: &[u8] = &[]; + let mut best_common: &[T] = &[]; + let mut best_long_a: &[T] = &[]; + let mut best_long_b: &[T] = &[]; + let mut best_short_a: &[T] = &[]; + let mut best_short_b: &[T] = &[]; while let Some(pos) = &short[j..] .windows(seed.len()) @@ -761,7 +856,7 @@ impl DiffMatchPatch { } // Reduce the number of edits by eliminating semantically trivial equalities - fn cleanup_semantic(diffs: &mut Vec<Diff>) { + fn cleanup_semantic(diffs: &mut Vec<Diff<u8>>) { let mut changes = false; let mut pointer = 0_usize; @@ -791,14 +886,14 @@ impl DiffMatchPatch { insert_len_post = 0; delete_len_post = 0; - last_equality = Some(diffs[pointer].text()); + last_equality = Some(diffs[pointer].data()); } else { // Ops::Insert || Ops::Delete // Increasing changes of post_equality metrics if diffs[pointer].op() == Ops::Insert { - insert_len_post += diffs[pointer].text().len(); + insert_len_post += diffs[pointer].data().len(); } else { - delete_len_post += diffs[pointer].text().len(); + delete_len_post += diffs[pointer].data().len(); } // Eliminate an equality that is smaller or equal to the edits on both @@ -862,8 +957,8 @@ impl DiffMatchPatch { pointer = 1; while difflen > 0 && pointer < difflen { if diffs[pointer - 1].op() == Ops::Delete && diffs[pointer].op() == Ops::Insert { - let delete = diffs[pointer - 1].text().to_vec(); - let insert = diffs[pointer].text().to_vec(); + let delete = diffs[pointer - 1].data().to_vec(); + let insert = diffs[pointer].data().to_vec(); let delete_thres = delete.len() / 2 + if delete.len() % 2 > 0 { 1 } else { 0 }; let insert_thres = insert.len() / 2 + if insert.len() % 2 > 0 { 1 } else { 0 }; @@ -898,7 +993,7 @@ impl DiffMatchPatch { // Look for single edits surrounded on both sides by equalities // e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. - fn cleanup_semantic_lossless(diffs: &mut Vec<Diff>) { + fn cleanup_semantic_lossless(diffs: &mut Vec<Diff<u8>>) { let mut pointer = 1_usize; let mut difflen = diffs.len(); @@ -906,9 +1001,9 @@ impl DiffMatchPatch { while difflen > 0 && pointer < difflen - 1 { // an edit surrounded by equalities if diffs[pointer - 1].op() == Ops::Equal && diffs[pointer + 1].op() == Ops::Equal { - let mut equality_prev = diffs[pointer - 1].text().to_vec(); - let mut edit = diffs[pointer].text().to_vec(); - let mut equality_next = diffs[pointer + 1].text().to_vec(); + let mut equality_prev = diffs[pointer - 1].data().to_vec(); + let mut edit = diffs[pointer].data().to_vec(); + let mut equality_next = diffs[pointer + 1].data().to_vec(); // Shift the edit as far left as possible let commonlen = Self::common_prefix(&equality_prev[..], &edit[..], true); @@ -952,7 +1047,7 @@ impl DiffMatchPatch { } // We have an improvement, save it back to the diff. - if diffs[pointer - 1].text() != best_equality_prev { + if diffs[pointer - 1].data() != best_equality_prev { if !best_equality_prev.is_empty() { diffs[pointer - 1].1 = best_equality_prev.to_vec(); } else { @@ -1028,9 +1123,9 @@ impl DiffMatchPatch { // Reorder and merge like edit sections. Merge equalities. // Any edit section can move as long as it doesn't cross an equality. - fn cleanup_merge(diffs: &mut Vec<Diff>) { + fn cleanup_merge<T: BisectSplit>(diffs: &mut Vec<Diff<T>>) { // Push a dummy diff ... this triggers the equality as a last step - diffs.push(Diff::equal(b"")); + diffs.push(Diff::equal(&[])); let mut difflen = diffs.len(); @@ -1048,13 +1143,13 @@ impl DiffMatchPatch { match diffs[pointer].op() { Ops::Insert => { insert_n += 1; - let mut data = diffs[pointer].text().to_vec(); + let mut data = diffs[pointer].data().to_vec(); insert_data.append(&mut data); pointer += 1; } Ops::Delete => { delete_n += 1; - let mut data = diffs[pointer].text().to_vec(); + let mut data = diffs[pointer].data().to_vec(); delete_data.append(&mut data); pointer += 1; } @@ -1084,7 +1179,7 @@ impl DiffMatchPatch { if commonlen > 0 { diffs[pointer].1 = [ insert_data[insert_data.len() - commonlen..].to_vec(), - diffs[pointer].text().to_vec(), + diffs[pointer].data().to_vec(), ] .concat(); insert_data = insert_data[..insert_data.len() - commonlen].to_vec(); @@ -1118,7 +1213,7 @@ impl DiffMatchPatch { pointer += 1; } else if pointer != 0 && diffs[pointer - 1].op() == Ops::Equal { // Merge this equality with the previous one. - let mut to_merge = diffs[pointer].text().to_vec(); + let mut to_merge = diffs[pointer].data().to_vec(); diffs[pointer - 1].1.append(&mut to_merge); diffs.remove(pointer); @@ -1136,7 +1231,7 @@ impl DiffMatchPatch { } if let Some(dl) = diffs.last() { - if dl.text().is_empty() { + if dl.data().is_empty() { diffs.pop(); } } @@ -1157,19 +1252,19 @@ impl DiffMatchPatch { ) { // This is a single edit surrounded by equalities. if diff_prev.op() == Ops::Equal && diff_next.op() == Ops::Equal { - let substr_idx = if diff.text().len() >= diff_prev.text().len() { - diff.text().len() - diff_prev.text().len() + let substr_idx = if diff.data().len() >= diff_prev.data().len() { + diff.data().len() - diff_prev.data().len() } else { 0 }; - if &diff.text()[substr_idx..] == diff_prev.text() { + if &diff.data()[substr_idx..] == diff_prev.data() { // Shift the edit over the previous equality. let new_current_data = [ - diff_prev.text(), - &diff.text()[..diff.text().len() - diff_prev.text().len()], + diff_prev.data(), + &diff.data()[..diff.data().len() - diff_prev.data().len()], ] .concat(); - let new_next_data = [diff_prev.text(), diff_next.text()].concat(); + let new_next_data = [diff_prev.data(), diff_next.data()].concat(); diffs[pointer].1 = new_current_data; diffs[pointer + 1].1 = new_next_data; @@ -1177,19 +1272,19 @@ impl DiffMatchPatch { difflen = diffs.len(); changes = true; - } else if &diff.text()[..if diff_next.text().len() <= diff.text().len() { - diff_next.text().len() + } else if &diff.data()[..if diff_next.data().len() <= diff.data().len() { + diff_next.data().len() } else { - diff.text().len() - }] == diff_next.text() + diff.data().len() + }] == diff_next.data() { // Shift the edit over the next equality. - let mut next_data = diffs[pointer + 1].text().to_vec(); + let mut next_data = diffs[pointer + 1].data().to_vec(); diffs[pointer - 1].1.append(&mut next_data); diffs[pointer].1 = [ - &diffs[pointer].text()[diffs[pointer + 1].text().len()..], - diffs[pointer + 1].text(), + &diffs[pointer].data()[diffs[pointer + 1].data().len()..], + diffs[pointer + 1].data(), ] .concat(); diffs.remove(pointer + 1); @@ -1209,7 +1304,7 @@ impl DiffMatchPatch { } // Reduce the number of edits by eliminating operationally trivial equalities. - fn cleanup_efficiency(&self, diffs: &mut Vec<Diff>) { + fn cleanup_efficiency(&self, diffs: &mut Vec<Diff<u8>>) { if diffs.is_empty() { return; } @@ -1223,7 +1318,7 @@ impl DiffMatchPatch { let diff = &diffs[idx]; // if diff.op() == Ops::Equal { // // Equality found - // if diff.text().len() + // if diff.data().len() // } idx += 1; } @@ -1231,9 +1326,9 @@ impl DiffMatchPatch { // return; // } // bool changes = false; - // QStack<Diff> equalities; // Stack of equalities. + // QStack<Diff<u8>> equalities; // Stack of equalities. // QString lastequality; // Always equal to equalities.lastElement().text - // QMutableListIterator<Diff> pointer(diffs); + // QMutableListIterator<Diff<u8>> pointer(diffs); // // Is there an insertion operation before the last equality. // bool pre_ins = false; // // Is there a deletion operation before the last equality. @@ -1334,7 +1429,7 @@ impl DiffMatchPatch { todo!() } - fn x_index(diffs: &[Diff], loc: usize) -> usize { + fn x_index<T: Copy + Eq + Ord>(diffs: &[Diff<T>], loc: usize) -> usize { let mut char1 = 0; let mut char2 = 0; @@ -1346,12 +1441,12 @@ impl DiffMatchPatch { for diff in diffs.iter() { if diff.op() != Ops::Insert { // Equality or deletion - char1 += diff.text().len(); + char1 += diff.data().len(); } if diff.op() != Ops::Delete { // Equality or insertion - char2 += diff.text().len(); + char2 += diff.data().len(); } if char1 > loc { @@ -1375,12 +1470,12 @@ impl DiffMatchPatch { last_char2 + (loc - last_char1) } - fn diff_text_old(diffs: &[Diff]) -> Vec<u8> { + fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> { diffs .iter() .filter_map(|diff| { if diff.op() != Ops::Insert { - Some(diff.text()) + Some(diff.data()) } else { None } @@ -1389,12 +1484,12 @@ impl DiffMatchPatch { .concat() } - fn diff_text_new(diffs: &[Diff]) -> Vec<u8> { + fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> { diffs .iter() .filter_map(|diff| { if diff.op() != Ops::Delete { - Some(diff.text()) + Some(diff.data()) } else { None } @@ -1440,8 +1535,8 @@ impl DiffMatchPatch { while !bigpatch.diffs.is_empty() && patch.length1 < max_bit - patch_margin { if bigpatch.diffs[0].op() == Ops::Insert { // Insertions are harmless. - patch.length2 += bigpatch.diffs[0].text().len(); - start2 += bigpatch.diffs[0].text().len(); + patch.length2 += bigpatch.diffs[0].data().len(); + start2 += bigpatch.diffs[0].data().len(); let d = bigpatch.diffs.remove(0); patch.diffs.push(d); empty = false; @@ -1449,20 +1544,20 @@ impl DiffMatchPatch { } else if bigpatch.diffs[0].op() == Ops::Delete && patch.diffs.len() == 1 && patch.diffs[0].op() == Ops::Equal - && bigpatch.diffs[0].text().len() > 2 * max_bit + && bigpatch.diffs[0].data().len() > 2 * max_bit { // This is a large deletion. Let it pass in one chunk. - patch.length1 += bigpatch.diffs[0].text().len(); - start1 += bigpatch.diffs[0].text().len(); + patch.length1 += bigpatch.diffs[0].data().len(); + start1 += bigpatch.diffs[0].data().len(); empty = false; patch .diffs - .push(Diff::new(bigpatch.diffs[0].op(), bigpatch.diffs[0].text())); + .push(Diff::new(bigpatch.diffs[0].op(), bigpatch.diffs[0].data())); bigpatch.diffs.remove(0); } else { // Deletion or equality. Only take as much as we can stomach. - let diff_text = bigpatch.diffs[0].text()[..bigpatch.diffs[0] - .text() + let diff_text = bigpatch.diffs[0].data()[..bigpatch.diffs[0] + .data() .len() .min(max_bit - patch.length1 - patch_margin)] .to_vec(); @@ -1481,7 +1576,7 @@ impl DiffMatchPatch { .push(Diff::new(bigpatch.diffs[0].op(), &diff_text)); let cond = if let Some(d) = bigpatch.diffs.first() { - diff_text == d.text() + diff_text == d.data() } else { false }; @@ -1489,7 +1584,7 @@ impl DiffMatchPatch { if cond { bigpatch.diffs.remove(0); } else if let Some(bd) = bigpatch.diffs.first_mut() { - bd.1 = bd.text()[diff_text.len()..].to_vec(); + bd.1 = bd.data()[diff_text.len()..].to_vec(); } } } @@ -1559,7 +1654,7 @@ impl DiffMatchPatch { let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, maxlines); // This basically represents the U16::MAX value - maxlines = 65535; + maxlines = 65535; // maxlines = 7; let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, maxlines); @@ -1584,23 +1679,24 @@ impl DiffMatchPatch { let mut broke = false; let mut cursor = 0; - text.split_inclusive(|u| *u == b'\n').enumerate() - .take(take) - .for_each(|(idx, line)| { - cursor += line.len(); + text.split_inclusive(|u| *u == b'\n') + .enumerate() + .take(take) + .for_each(|(idx, line)| { + cursor += line.len(); - let e = hash.entry(line).or_insert(array.len()); - // Fresh insert - if *e == array.len() { - array.push(line) - } + let e = hash.entry(line).or_insert(array.len()); + // Fresh insert + if *e == array.len() { + array.push(line) + } - // upcasting, should never fail - charlist.push(*e); + // upcasting, should never fail + charlist.push(*e); - // break at max lines - broke = idx == take - 1; - }); + // break at max lines + broke = idx == take - 1; + }); // We broke at max lines, so we'll account for the remaining text if broke { @@ -1619,30 +1715,26 @@ impl DiffMatchPatch { charlist } - fn chars_to_lines(diffs: &mut [Diff], lines: &[&[u8]]) -> Result<(), crate::errors::Error> { - // println!("{lines:?} {}", lines.len()); - for d in diffs.iter_mut() { - println!("{:?}", std::str::from_utf8(d.text())); - let chars = match std::str::from_utf8(d.text()) { - Ok(c) => c, - Err(_) => { - return Err(crate::errors::Error::Utf8Error) - }, - }; - // let mut txt = &[]; - let t = chars - .chars() - .map(|c| { - let idx: u32 = c.into(); - *lines.get(idx as usize).unwrap() // Investigate - }) - .collect::<Vec<_>>() - .concat(); - - d.1 = t; - } + fn chars_to_lines(diffs: &[Diff<usize>], lines: &[&[u8]]) -> Vec<Diff<u8>> { + diffs + .iter() + .map(|d| { + let t = d + .data() + .iter() + .map(|&d| { + *lines.get(d).unwrap() // investigate + }) + .collect::<Vec<_>>() + .concat(); - Ok(()) + Diff::new(d.op(), &t) + // let d = Diff::new(d.op(), vec!) + }) + .collect::<Vec<_>>() + + // Ok(()) + // todo!() } } @@ -1829,7 +1921,7 @@ impl DiffMatchPatch { // Patch Methods #[derive(Debug, Default, Clone)] pub struct Patch { - diffs: Vec<Diff>, + diffs: Vec<Diff<u8>>, start1: usize, start2: usize, length1: usize, @@ -1880,7 +1972,7 @@ impl Display for Patch { Ops::Equal => ' ', }; - let segment = format!("{sign}{}\n", percent_encode(diff.text(), CONTROLS)); + let segment = format!("{sign}{}\n", percent_encode(diff.data(), CONTROLS)); segments.push(segment) }); @@ -1891,8 +1983,8 @@ impl Display for Patch { pub enum PatchInput<'a> { Texts(&'a str, &'a str), - Diffs(&'a [Diff]), - TextDiffs(&'a str, &'a [Diff]), + Diffs(&'a [Diff<u8>]), + TextDiffs(&'a str, &'a [Diff<u8>]), } pub type Patches = Vec<Patch>; @@ -1967,7 +2059,7 @@ impl DiffMatchPatch { fn patch_make_internal( &self, txt: &[u8], - diffs: &[Diff], + diffs: &[Diff<u8>], ) -> Result<Patches, crate::errors::Error> { // No diffs -> no patches if diffs.is_empty() { @@ -1994,32 +2086,32 @@ impl DiffMatchPatch { match diff.op() { Ops::Insert => { - patch.length2 += diff.text().len(); + patch.length2 += diff.data().len(); postpatch = - [&postpatch[..char_n2], diff.text(), &postpatch[char_n2..]].concat(); + [&postpatch[..char_n2], diff.data(), &postpatch[char_n2..]].concat(); patch.diffs.push(diff.clone()); } Ops::Delete => { - patch.length1 += diff.text().len(); + patch.length1 += diff.data().len(); postpatch = [ &postpatch[..char_n2], - &postpatch[char_n2 + diff.text().len()..], + &postpatch[char_n2 + diff.data().len()..], ] .concat(); patch.diffs.push(diff.clone()); } Ops::Equal => { - if diff.text().len() <= 2 * patch_margin + if diff.data().len() <= 2 * patch_margin && !patch.diffs.is_empty() && diffs.len() != idx + 1 { // Small equality inside a patch. - patch.length1 += diff.text().len(); - patch.length2 += diff.text().len(); + patch.length1 += diff.data().len(); + patch.length2 += diff.data().len(); patch.diffs.push(diff.clone()); - } else if diff.text().len() >= 2 * patch_margin && !patch.diffs.is_empty() { + } else if diff.data().len() >= 2 * patch_margin && !patch.diffs.is_empty() { // Time for a new patch. self.patch_add_context(&mut patch, &prepatch); patches.push(patch.clone()); @@ -2036,10 +2128,10 @@ impl DiffMatchPatch { } if diff.op() != Ops::Insert { - char_n1 += diff.text().len(); + char_n1 += diff.data().len(); } if diff.op() != Ops::Delete { - char_n2 += diff.text().len(); + char_n2 += diff.data().len(); } }); @@ -2217,21 +2309,21 @@ impl DiffMatchPatch { if diff.op() == Ops::Insert { // Insertion source = - [&source[..sl + idx2], diff.text(), &source[sl + idx2..]] + [&source[..sl + idx2], diff.data(), &source[sl + idx2..]] .concat(); } else if diff.op() == Ops::Delete { // Deletion source = [ &source[..sl + idx2], &source[sl - + Self::x_index(&diffs, idx1 + diff.text().len())..], + + Self::x_index(&diffs, idx1 + diff.data().len())..], ] .concat(); } } if diff.op() != Ops::Delete { - idx1 += diff.text().len() + idx1 += diff.data().len() } }); } @@ -2265,7 +2357,7 @@ impl DiffMatchPatch { // Add some padding on start of first diff. if let Some(first_patch) = patches.first_mut() { let (add_null_pad, pad_len_gt_txt_len) = if let Some(fd) = first_patch.diffs.first() { - (fd.op() != Ops::Equal, pad_len > fd.text().len()) + (fd.op() != Ops::Equal, pad_len > fd.data().len()) } else { (true, false) }; @@ -2279,8 +2371,8 @@ impl DiffMatchPatch { } else if pad_len_gt_txt_len { // Grow first equality. if let Some(fd) = first_patch.diffs.first_mut() { - let extra_len = pad_len - fd.text().len(); - fd.1 = [&null_pad[fd.text().len()..], fd.text()].concat(); + let extra_len = pad_len - fd.data().len(); + fd.1 = [&null_pad[fd.data().len()..], fd.data()].concat(); first_patch.start1 -= extra_len; first_patch.start2 -= extra_len; first_patch.length1 += extra_len; @@ -2292,7 +2384,7 @@ impl DiffMatchPatch { // Add some padding on end of last diff. if let Some(last_patch) = patches.last_mut() { let (add_null_pad, pad_len_gt_txt_len) = if let Some(ld) = last_patch.diffs.last() { - (ld.op() != Ops::Equal, pad_len > ld.text().len()) + (ld.op() != Ops::Equal, pad_len > ld.data().len()) } else { (true, false) }; @@ -2305,7 +2397,7 @@ impl DiffMatchPatch { } else if pad_len_gt_txt_len { // Grow last equality. if let Some(ld) = last_patch.diffs.last_mut() { - let extra_len = pad_len - ld.text().len(); + let extra_len = pad_len - ld.data().len(); let mut padd = null_pad[..extra_len].to_vec(); ld.1.append(&mut padd); @@ -2330,7 +2422,7 @@ impl DiffMatchPatch { /// /// Returns: /// Vec of changes (Diff). - pub fn diff_main(&self, old: &str, new: &str) -> Result<Vec<Diff>, crate::errors::Error> { + pub fn diff_main(&self, old: &str, new: &str) -> Result<Vec<Diff<u8>>, crate::errors::Error> { let deadline = if let Some(i) = Instant::now().checked_add(Duration::from_millis(self.timeout())) { i @@ -2345,23 +2437,23 @@ impl DiffMatchPatch { /// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")]. /// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format. /// The above example would become: [(-1, "mouse"), (1, "sofas")]. If a diff is to be human-readable, it should be passed to diff_cleanupSemantic. - pub fn diff_cleanup_semantic(diffs: &mut Vec<Diff>) { + pub fn diff_cleanup_semantic(diffs: &mut Vec<Diff<u8>>) { Self::cleanup_semantic(diffs) } - pub fn diff_cleanup_efficiency(&self, diffs: &mut Vec<Diff>) { + pub fn diff_cleanup_efficiency(&self, diffs: &mut Vec<Diff<u8>>) { self.cleanup_efficiency(diffs) } - pub fn diff_levenshtein(diffs: &[Diff]) -> usize { + pub fn diff_levenshtein(diffs: &[Diff<u8>]) -> usize { let mut levenshtein = 0; let mut insert = 0; let mut delete = 0; diffs.iter().for_each(|diff| { match diff.op() { - Ops::Insert => insert += diff.text().len(), - Ops::Delete => delete += diff.text().len(), + Ops::Insert => insert += diff.data().len(), + Ops::Delete => delete += diff.data().len(), Ops::Equal => { // A deletion and an insertion is one substitution. levenshtein += insert.max(delete); @@ -2377,7 +2469,7 @@ impl DiffMatchPatch { } /// Takes a diff array and returns a pretty HTML sequence. This function is mainly intended as an example from which to write ones own display functions. - pub fn diff_pretty_html(diffs: &[Diff]) -> String { + pub fn diff_pretty_html(diffs: &[Diff<u8>]) -> String { todo!() } @@ -2525,7 +2617,7 @@ mod tests { time::{Duration, Instant}, }; - use crate::dmp::{self, Diff, HalfMatch, LineToChars}; + use crate::dmp::{Diff, HalfMatch, LineToChars}; use super::{DiffMatchPatch, Ops, Patch, PatchInput}; @@ -2700,49 +2792,24 @@ mod tests { // Convert lines down to characters. assert_eq!( LineToChars { - chars_old: [0_usize, 1, 0] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>() - .as_bytes() - .to_vec(), - chars_new: [1_usize, 0, 1] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>() - .as_bytes() - .to_vec(), + chars_old: vec![0_usize, 1, 0], + chars_new: vec![1_usize, 0, 1], lines: vec![b"alpha\n", b"beta\n"] }, DiffMatchPatch::lines_to_chars(b"alpha\nbeta\nalpha\n", b"beta\nalpha\nbeta\n") ); assert_eq!( LineToChars { - chars_old: "".as_bytes().to_vec(), - chars_new: [0_usize, 1, 2, 2] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>() - .as_bytes() - .to_vec(), + chars_old: vec![], + chars_new: vec![0_usize, 1, 2, 2], lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"] }, DiffMatchPatch::lines_to_chars(b"", b"alpha\r\nbeta\r\n\r\n\r\n") ); assert_eq!( LineToChars { - chars_old: [0_usize] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>() - .as_bytes() - .to_vec(), - chars_new: [1_usize] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>() - .as_bytes() - .to_vec(), + chars_old: vec![0_usize], + chars_new: vec![1_usize], lines: vec![b"a", b"b"] }, DiffMatchPatch::lines_to_chars(b"a", b"b") @@ -2752,38 +2819,30 @@ mod tests { const TLIMIT: usize = 300; let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>(); let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect(); - let charlist = (0..TLIMIT) - .map(|i| char::from_u32(i as u32).unwrap()) - .collect::<String>(); + let charlist = (0..TLIMIT).collect::<Vec<_>>(); let linestr = linestr.join(""); let res = DiffMatchPatch::lines_to_chars(linestr.as_bytes(), b""); let src = LineToChars { - chars_old: charlist.as_bytes().to_vec(), - chars_new: String::new().as_bytes().to_vec(), + chars_old: charlist, + chars_new: vec![], lines: linelist, }; assert_eq!(res.chars_new, src.chars_new); assert_eq!(res.lines, src.lines); - // assert_eq!(res.chars_old, src.chars_old); + assert_eq!(res.chars_old, src.chars_old); } #[test] - fn test_diff_chars_to_lines() -> Result<(), crate::errors::Error> { + fn test_diff_chars_to_lines() { // Convert chars up to lines. - let d1 = [0_usize, 1, 0] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>(); - let d2 = [1_usize, 0, 1] - .iter() - .map(|i| char::from_u32(*i as u32).unwrap()) - .collect::<String>(); - let mut diffs = [Diff::equal(d1.as_bytes()), Diff::insert(d2.as_bytes())]; + let d1 = [0_usize, 1, 0].to_vec(); + let d2 = [1_usize, 0, 1].to_vec(); + let diffs = [Diff::equal(&d1), Diff::insert(&d2)]; - DiffMatchPatch::chars_to_lines(&mut diffs, &[b"alpha\n", b"beta\n"])?; + let diffs = DiffMatchPatch::chars_to_lines(&diffs, &[b"alpha\n", b"beta\n"]); assert_eq!( - [ + vec![ Diff::equal(b"alpha\nbeta\nalpha\n"), Diff::insert(b"beta\nalpha\nbeta\n") ], @@ -2793,16 +2852,14 @@ mod tests { // More than 256 to reveal any 8-bit limitations. const TLIMIT: usize = 300; - let charlist = (0..TLIMIT) - .map(|i| char::from_u32(i as u32).unwrap()) - .collect::<String>(); + let charlist = (0..TLIMIT).collect::<Vec<_>>(); let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>(); let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect(); - let mut diffs = [Diff::delete(charlist.as_bytes())]; - DiffMatchPatch::chars_to_lines(&mut diffs, &linelist[..])?; + let diffs = [Diff::delete(&charlist)]; + let diffs = DiffMatchPatch::chars_to_lines(&diffs, &linelist[..]); - assert_eq!([Diff::delete(linestr.join("").as_bytes())], diffs); + assert_eq!(vec![Diff::delete(linestr.join("").as_bytes())], diffs); // More than 65536 to verify any 16-bit limitation. const ULIMIT: usize = 10; @@ -2810,12 +2867,10 @@ mod tests { let lines = linestr.join(""); let l2c = DiffMatchPatch::lines_to_chars(lines.as_bytes(), b""); - let mut diffs = [Diff::insert(&l2c.chars_old)]; - DiffMatchPatch::chars_to_lines(&mut diffs, &l2c.lines)?; - - assert_eq!(lines.as_bytes(), diffs[0].text()); + let diffs = [Diff::insert(&l2c.chars_old)]; + let diffs = DiffMatchPatch::chars_to_lines(&diffs, &l2c.lines); - Ok(()) + assert_eq!(lines.as_bytes(), diffs[0].data()); } #[test] @@ -2823,7 +2878,7 @@ mod tests { // Cleanup a messy diff. // Null case. let mut diffs = vec![]; - let test: Vec<Diff> = vec![]; + let test: Vec<Diff<u8>> = vec![]; DiffMatchPatch::cleanup_merge(&mut diffs); assert_eq!(test, diffs); @@ -2950,7 +3005,7 @@ mod tests { // Cleanup semantically trivial equalities. // Null case. let mut diffs = vec![]; - let test: Vec<Diff> = vec![]; + let test: Vec<Diff<u8>> = vec![]; DiffMatchPatch::cleanup_semantic(&mut diffs); assert_eq!(test, diffs); @@ -2961,7 +3016,7 @@ mod tests { Diff::equal(b"12"), Diff::delete(b"e"), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::delete(b"ab"), Diff::insert(b"cd"), Diff::equal(b"12"), @@ -2977,7 +3032,7 @@ mod tests { Diff::equal(b"1234"), Diff::delete(b"wxyz"), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::delete(b"abc"), Diff::insert(b"ABC"), Diff::equal(b"1234"), @@ -2988,7 +3043,7 @@ mod tests { // Simple elimination. let mut diffs = vec![Diff::delete(b"a"), Diff::equal(b"b"), Diff::delete(b"c")]; - let test: Vec<Diff> = vec![Diff::delete(b"abc"), Diff::insert(b"b")]; + let test: Vec<Diff<u8>> = vec![Diff::delete(b"abc"), Diff::insert(b"b")]; DiffMatchPatch::cleanup_semantic(&mut diffs); assert_eq!(test, diffs); @@ -3000,7 +3055,7 @@ mod tests { Diff::equal(b"f"), Diff::insert(b"g"), ]; - let test: Vec<Diff> = vec![Diff::delete(b"abcdef"), Diff::insert(b"cdfg")]; + let test: Vec<Diff<u8>> = vec![Diff::delete(b"abcdef"), Diff::insert(b"cdfg")]; DiffMatchPatch::cleanup_semantic(&mut diffs); assert_eq!(test, diffs); @@ -3016,7 +3071,7 @@ mod tests { Diff::delete(b"B"), Diff::insert(b"2"), ]; - let test: Vec<Diff> = vec![Diff::delete(b"AB_AB"), Diff::insert(b"1A2_1A2")]; + let test: Vec<Diff<u8>> = vec![Diff::delete(b"AB_AB"), Diff::insert(b"1A2_1A2")]; DiffMatchPatch::cleanup_semantic(&mut diffs); assert_eq!(test, diffs); @@ -3026,7 +3081,7 @@ mod tests { Diff::delete(b"ow and the c"), Diff::equal(b"at."), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"The "), Diff::delete(b"cow and the "), Diff::equal(b"cat."), @@ -3036,13 +3091,13 @@ mod tests { // No overlap elimination. let mut diffs = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")]; - let test: Vec<Diff> = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")]; + let test: Vec<Diff<u8>> = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")]; DiffMatchPatch::cleanup_semantic(&mut diffs); assert_eq!(test, diffs); // Overlap elimination. let mut diffs = vec![Diff::delete(b"abcxxx"), Diff::insert(b"xxxdef")]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::delete(b"abc"), Diff::equal(b"xxx"), Diff::insert(b"def"), @@ -3052,7 +3107,7 @@ mod tests { // Reverse overlap elimination. let mut diffs = vec![Diff::delete(b"xxxabc"), Diff::insert(b"defxxx")]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::insert(b"def"), Diff::equal(b"xxx"), Diff::delete(b"abc"), @@ -3068,7 +3123,7 @@ mod tests { Diff::delete(b"A3"), Diff::insert(b"3BC"), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::delete(b"abcd"), Diff::equal(b"1212"), Diff::insert(b"efghi"), @@ -3085,18 +3140,18 @@ mod tests { fn test_diff_cleanup_semantic_lossless() { // Slide diffs to match logical boundaries. // Null case. - let mut diffs: Vec<Diff> = vec![]; - let test: Vec<Diff> = vec![]; + let mut diffs: Vec<Diff<u8>> = vec![]; + let test: Vec<Diff<u8>> = vec![]; DiffMatchPatch::cleanup_semantic_lossless(&mut diffs); assert_eq!(test, diffs); // Blank lines. - let mut diffs: Vec<Diff> = vec![ + let mut diffs: Vec<Diff<u8>> = vec![ Diff::equal(b"AAA\r\n\r\nBBB"), Diff::insert(b"\r\nDDD\r\n\r\nBBB"), Diff::equal(b"\r\nEEE"), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"AAA\r\n\r\n"), Diff::insert(b"BBB\r\nDDD\r\n\r\n"), Diff::equal(b"BBB\r\nEEE"), @@ -3105,12 +3160,12 @@ mod tests { assert_eq!(test, diffs); // Line boundaries. - let mut diffs: Vec<Diff> = vec![ + let mut diffs: Vec<Diff<u8>> = vec![ Diff::equal(b"AAA\r\nBBB"), Diff::insert(b" DDD\r\nBBB"), Diff::equal(b" EEE"), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"AAA\r\n"), Diff::insert(b"BBB DDD\r\n"), Diff::equal(b"BBB EEE"), @@ -3119,12 +3174,12 @@ mod tests { assert_eq!(test, diffs); // Word boundaries. - let mut diffs: Vec<Diff> = vec![ + let mut diffs: Vec<Diff<u8>> = vec![ Diff::equal(b"The c"), Diff::insert(b"ow and the c"), Diff::equal(b"at."), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"The "), Diff::insert(b"cow and the "), Diff::equal(b"cat."), @@ -3133,12 +3188,12 @@ mod tests { assert_eq!(test, diffs); // Alphanumeric boundaries. - let mut diffs: Vec<Diff> = vec![ + let mut diffs: Vec<Diff<u8>> = vec![ Diff::equal(b"The-c"), Diff::insert(b"ow-and-the-c"), Diff::equal(b"at."), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"The-"), Diff::insert(b"cow-and-the-"), Diff::equal(b"cat."), @@ -3147,24 +3202,26 @@ mod tests { assert_eq!(test, diffs); // Hitting the start. - let mut diffs: Vec<Diff> = vec![Diff::equal(b"a"), Diff::delete(b"a"), Diff::equal(b"ax")]; - let test: Vec<Diff> = vec![Diff::delete(b"a"), Diff::equal(b"aax")]; + let mut diffs: Vec<Diff<u8>> = + vec![Diff::equal(b"a"), Diff::delete(b"a"), Diff::equal(b"ax")]; + let test: Vec<Diff<u8>> = vec![Diff::delete(b"a"), Diff::equal(b"aax")]; DiffMatchPatch::cleanup_semantic_lossless(&mut diffs); assert_eq!(test, diffs); // Hitting the end. - let mut diffs: Vec<Diff> = vec![Diff::equal(b"xa"), Diff::delete(b"a"), Diff::equal(b"a")]; - let test: Vec<Diff> = vec![Diff::equal(b"xaa"), Diff::delete(b"a")]; + let mut diffs: Vec<Diff<u8>> = + vec![Diff::equal(b"xa"), Diff::delete(b"a"), Diff::equal(b"a")]; + let test: Vec<Diff<u8>> = vec![Diff::equal(b"xaa"), Diff::delete(b"a")]; DiffMatchPatch::cleanup_semantic_lossless(&mut diffs); assert_eq!(test, diffs); // Sentence boundaries. - let mut diffs: Vec<Diff> = vec![ + let mut diffs: Vec<Diff<u8>> = vec![ Diff::equal(b"The xxx. The "), Diff::insert(b"zzz. The "), Diff::equal(b"yyy."), ]; - let test: Vec<Diff> = vec![ + let test: Vec<Diff<u8>> = vec![ Diff::equal(b"The xxx."), Diff::insert(b" The zzz."), Diff::equal(b" The yyy."), @@ -3299,182 +3356,182 @@ mod tests { // Perform a trivial diff. // Null case. - // assert!(dmp.diff_main("", "")?.is_empty()); - - // // Equality - // assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?); - - // // Simple insert - // assert_eq!( - // vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")], - // dmp.diff_main("abc", "ab123c")? - // ); - - // // Simple delete - // assert_eq!( - // vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")], - // dmp.diff_main("a123bc", "abc")? - // ); - - // // Two insertions - // assert_eq!( - // vec![ - // Diff::equal(b"a"), - // Diff::insert(b"123"), - // Diff::equal(b"b"), - // Diff::insert(b"456"), - // Diff::equal(b"c"), - // ], - // dmp.diff_main("abc", "a123b456c")? - // ); - - // // Two deletions. - // assert_eq!( - // vec![ - // Diff::equal(b"a"), - // Diff::delete(b"123"), - // Diff::equal(b"b"), - // Diff::delete(b"456"), - // Diff::equal(b"c"), - // ], - // dmp.diff_main("a123b456c", "abc")? - // ); - - // // Perform a real diff. - // // Switch off the timeout. - // dmp.timeout = None; - // // Simple cases. - // assert_eq!( - // vec![Diff::delete(b"a"), Diff::insert(b"b"),], - // dmp.diff_main("a", "b")? - // ); - - // assert_eq!( - // vec![ - // Diff::delete(b"Apple"), - // Diff::insert(b"Banana"), - // Diff::equal(b"s are a"), - // Diff::insert(b"lso"), - // Diff::equal(b" fruit.") - // ], - // dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")? - // ); - - // assert_eq!( - // vec![ - // Diff::delete(b"a"), - // Diff::insert("\u{0680}".as_bytes()), - // Diff::equal(b"x"), - // Diff::delete(b"\t"), - // Diff::insert(b"\0") - // ], - // dmp.diff_main("ax\t", "\u{0680}x\0")? - // ); - - // // Overlaps. - // assert_eq!( - // vec![ - // Diff::delete(b"1"), - // Diff::equal(b"a"), - // Diff::delete(b"y"), - // Diff::equal(b"b"), - // Diff::delete(b"2"), - // Diff::insert(b"xab"), - // ], - // dmp.diff_main("1ayb2", "abxab")? - // ); - - // assert_eq!( - // vec![ - // Diff::insert(b"xaxcx"), - // Diff::equal(b"abc"), - // Diff::delete(b"y"), - // ], - // dmp.diff_main("abcy", "xaxcxabc")? - // ); - - // assert_eq!( - // vec![ - // Diff::delete(b"ABCD"), - // Diff::equal(b"a"), - // Diff::delete(b"="), - // Diff::insert(b"-"), - // Diff::equal(b"bcd"), - // Diff::delete(b"="), - // Diff::insert(b"-"), - // Diff::equal(b"efghijklmnopqrs"), - // Diff::delete(b"EFGHIJKLMNOefg"), - // ], - // dmp.diff_main( - // "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", - // "a-bcd-efghijklmnopqrs" - // )? - // ); - - // // Large equality. - // assert_eq!( - // vec![ - // Diff::insert(b" "), - // Diff::equal(b"a"), - // Diff::insert(b"nd"), - // Diff::equal(b" [[Hepatopancreatic]]"), - // Diff::delete(b" and [[New"), - // ], - // dmp.diff_main( - // "a [[Hepatopancreatic]] and [[New", - // " and [[Hepatopancreatic]]" - // )? - // ); + assert!(dmp.diff_main("", "")?.is_empty()); + + // Equality + assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?); + + // Simple insert + assert_eq!( + vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")], + dmp.diff_main("abc", "ab123c")? + ); + + // Simple delete + assert_eq!( + vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")], + dmp.diff_main("a123bc", "abc")? + ); + + // Two insertions + assert_eq!( + vec![ + Diff::equal(b"a"), + Diff::insert(b"123"), + Diff::equal(b"b"), + Diff::insert(b"456"), + Diff::equal(b"c"), + ], + dmp.diff_main("abc", "a123b456c")? + ); + + // Two deletions. + assert_eq!( + vec![ + Diff::equal(b"a"), + Diff::delete(b"123"), + Diff::equal(b"b"), + Diff::delete(b"456"), + Diff::equal(b"c"), + ], + dmp.diff_main("a123b456c", "abc")? + ); + + // Perform a real diff. + // Switch off the timeout. + dmp.timeout = None; + // Simple cases. + assert_eq!( + vec![Diff::delete(b"a"), Diff::insert(b"b"),], + dmp.diff_main("a", "b")? + ); + + assert_eq!( + vec![ + Diff::delete(b"Apple"), + Diff::insert(b"Banana"), + Diff::equal(b"s are a"), + Diff::insert(b"lso"), + Diff::equal(b" fruit.") + ], + dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")? + ); + + assert_eq!( + vec![ + Diff::delete(b"a"), + Diff::insert("\u{0680}".as_bytes()), + Diff::equal(b"x"), + Diff::delete(b"\t"), + Diff::insert(b"\0") + ], + dmp.diff_main("ax\t", "\u{0680}x\0")? + ); + + // Overlaps. + assert_eq!( + vec![ + Diff::delete(b"1"), + Diff::equal(b"a"), + Diff::delete(b"y"), + Diff::equal(b"b"), + Diff::delete(b"2"), + Diff::insert(b"xab"), + ], + dmp.diff_main("1ayb2", "abxab")? + ); + + assert_eq!( + vec![ + Diff::insert(b"xaxcx"), + Diff::equal(b"abc"), + Diff::delete(b"y"), + ], + dmp.diff_main("abcy", "xaxcxabc")? + ); + + assert_eq!( + vec![ + Diff::delete(b"ABCD"), + Diff::equal(b"a"), + Diff::delete(b"="), + Diff::insert(b"-"), + Diff::equal(b"bcd"), + Diff::delete(b"="), + Diff::insert(b"-"), + Diff::equal(b"efghijklmnopqrs"), + Diff::delete(b"EFGHIJKLMNOefg"), + ], + dmp.diff_main( + "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", + "a-bcd-efghijklmnopqrs" + )? + ); + + // Large equality. + assert_eq!( + vec![ + Diff::insert(b" "), + Diff::equal(b"a"), + Diff::insert(b"nd"), + Diff::equal(b" [[Hepatopancreatic]]"), + Diff::delete(b" and [[New"), + ], + dmp.diff_main( + "a [[Hepatopancreatic]] and [[New", + " and [[Hepatopancreatic]]" + )? + ); // Timeout. - // const LOW_TIMEOUT: u64 = 100; - // dmp.timeout = Some(LOW_TIMEOUT); - // let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); - // let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); + const LOW_TIMEOUT: u64 = 100; + dmp.timeout = Some(LOW_TIMEOUT); + let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); + let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); - // let start = Instant::now(); - // dmp.diff_main(&a, &b)?; - // let end = Instant::now(); - // // Test that we took at least the timeout period (+ 5ms being generous). - // assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5); + let start = Instant::now(); + dmp.diff_main(&a, &b)?; + let end = Instant::now(); + // Test that we took at least the timeout period (+ 5ms being generous). + assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5); // Test the linemode speedup. // Must be long to pass the 100 char cutoff. // Simple line-mode. - // let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - // let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - // dmp.checklines = Some(false); - // // let start = Instant::now(); - // let res_no_lm = dmp.diff_main(a, b)?; - // // let no_lm = Instant::now() - start; - // dmp.checklines = Some(true); - // // let start = Instant::now(); - // let res_yes_lm = dmp.diff_main(a, b)?; - // // let yes_lm = Instant::now() - start; - - // // Now, we'll run 2 checks - one for result equality, two for speedup - // assert_eq!(res_no_lm, res_yes_lm); - // // Fails, no-linemode takes less time than with linemode optimizations - // // Off by a few 30μs - // // assert!(no_lm > yes_lm); - - // // Single line-mode. - // let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - // let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - // dmp.checklines = Some(true); - // let yes_lm = dmp.diff_main(a, b)?; - // dmp.checklines = Some(false); - // let no_lm = dmp.diff_main(a, b)?; - // assert_eq!(no_lm, yes_lm); - - // // Overlap line-mode. - // let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - // let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - // dmp.checklines = Some(true); - // let yes_lm = dmp.diff_main(a, b)?; - // dmp.checklines = Some(false); - // let no_lm = dmp.diff_main(a, b)?; - // assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); + let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + dmp.checklines = Some(false); + let start = Instant::now(); + let res_no_lm = dmp.diff_main(a, b)?; + let no_lm = Instant::now() - start; + dmp.checklines = Some(true); + let start = Instant::now(); + let res_yes_lm = dmp.diff_main(a, b)?; + let yes_lm = Instant::now() - start; + + // Now, we'll run 2 checks - one for result equality, two for speedup + assert_eq!(res_no_lm, res_yes_lm); + // Fails, no-linemode takes less time than with linemode optimizations + // Off by a few 30μs + assert!(no_lm > yes_lm); + + // Single line-mode. + let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + dmp.checklines = Some(true); + let yes_lm = dmp.diff_main(a, b)?; + dmp.checklines = Some(false); + let no_lm = dmp.diff_main(a, b)?; + assert_eq!(no_lm, yes_lm); + + // Overlap line-mode. + let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + dmp.checklines = Some(true); + let yes_lm = dmp.diff_main(a, b)?; + dmp.checklines = Some(false); + let no_lm = dmp.diff_main(a, b)?; + assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); let dmp = DiffMatchPatch::default(); let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap(); @@ -3696,17 +3753,17 @@ mod tests { // } // Helper to construct the two texts which made up the diff originally. - fn rebuild_text(diffs: &[Diff]) -> Result<(String, String), crate::errors::Error> { + fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), crate::errors::Error> { let mut txt1 = vec![]; let mut txt2 = vec![]; diffs.iter().for_each(|d| { if d.op() != Ops::Insert { - txt1.push(d.text()); + txt1.push(d.data()); } if d.op() != Ops::Delete { - txt2.push(d.text()); + txt2.push(d.data()); } }); |