my fork of dmp
WIP: testing cleanup algorithm
Anubhab Bandyopadhyay 2024-08-06
parent c0e9c19 · commit ca9afcd
-rw-r--r--benches/prefix.rs33
-rw-r--r--src/dmp.rs940
2 files changed, 672 insertions, 301 deletions
diff --git a/benches/prefix.rs b/benches/prefix.rs
index 4ea6650..d82ce9d 100644
--- a/benches/prefix.rs
+++ b/benches/prefix.rs
@@ -4,10 +4,7 @@ use std::path::Path;
use criterion::{criterion_group, criterion_main, Criterion};
fn prefix_linear(a: &str, b: &str, res: usize) {
- let found = a.bytes()
- .zip(b.bytes())
- .take_while(|(a, b)| a == b )
- .count();
+ let found = a.bytes().zip(b.bytes()).take_while(|(a, b)| a == b).count();
if found != res {
panic!()
@@ -25,10 +22,9 @@ fn prefix_binary(a: &str, b: &str, res: usize) {
let mut pointstart = 0;
while pointmin < pointmid {
- if a[pointstart .. pointmid] ==
- b[pointstart .. pointmid] {
- pointmin = pointmid;
- pointstart = pointmin;
+ if a[pointstart..pointmid] == b[pointstart..pointmid] {
+ pointmin = pointmid;
+ pointstart = pointmin;
} else {
pointmax = pointmid;
}
@@ -49,24 +45,31 @@ fn create_data() -> Vec<(String, String, usize, usize)> {
let old = std::fs::read_to_string(basedir.join(format!("old_{n}.txt"))).unwrap();
let new = std::fs::read_to_string(basedir.join(format!("new_{n}.txt"))).unwrap();
- let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt"))).unwrap().parse::<usize>().unwrap();
+ let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt")))
+ .unwrap()
+ .parse::<usize>()
+ .unwrap();
(old, new, res, n)
- }).collect();
+ })
+ .collect();
data
}
pub fn prefix_bench(c: &mut Criterion) {
let d = create_data();
-
+
for (old, new, res, n) in d.iter() {
println!("For N={n}");
- c.bench_function("prefix linear", |bencher| bencher.iter(|| prefix_linear(old, new, *res)));
- c.bench_function("prefix binary", |bencher| bencher.iter(|| prefix_binary(old, new, *res)));
+ c.bench_function("prefix linear", |bencher| {
+ bencher.iter(|| prefix_linear(old, new, *res))
+ });
+ c.bench_function("prefix binary", |bencher| {
+ bencher.iter(|| prefix_binary(old, new, *res))
+ });
}
}
-
criterion_group!(prefix, prefix_bench);
-criterion_main!(prefix); \ No newline at end of file
+criterion_main!(prefix);
diff --git a/src/dmp.rs b/src/dmp.rs
index cf81328..5b88e5b 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1,5 +1,9 @@
-use std::{collections::HashMap, io::BufRead, time::{Duration, Instant}};
-
+use std::{
+ borrow::BorrowMut,
+ collections::HashMap,
+ io::BufRead,
+ time::{Duration, Instant},
+};
/**
* The data structure representing a diff is an array of tuples:
@@ -13,7 +17,7 @@ use std::{collections::HashMap, io::BufRead, time::{Duration, Instant}};
pub enum Ops {
Delete = -1,
Insert,
- Equal
+ Equal,
}
/// A structure representing a diff
@@ -40,14 +44,14 @@ pub struct DiffMatchPatch {
/// Defaults to true, which does a faster, slightly less optimal diff.
checklines: Option<bool>,
/// A default timeout in num seconds, defaults to 1
- timeout: Option<u64>
+ timeout: Option<u64>,
}
impl Default for DiffMatchPatch {
fn default() -> Self {
Self {
checklines: Some(true),
- timeout: Some(1)
+ timeout: Some(1),
}
}
}
@@ -58,7 +62,7 @@ struct HalfMatch<'a> {
suffix_long: &'a [u8],
prefix_short: &'a [u8],
suffix_short: &'a [u8],
- common: &'a [u8]
+ common: &'a [u8],
}
impl DiffMatchPatch {
@@ -68,7 +72,8 @@ impl DiffMatchPatch {
// returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout
fn timeout(&self) -> u64 {
- self.timeout.map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX })
+ self.timeout
+ .map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX })
}
fn main_internal<'a>(&self, old_bytes: &'a [u8], new_bytes: &'a [u8]) -> Vec<Diff> {
@@ -82,58 +87,76 @@ impl DiffMatchPatch {
}
if old_bytes.is_empty() {
- return vec![Diff::new(Ops::Insert, new_bytes)]
+ return vec![Diff::new(Ops::Insert, new_bytes)];
}
if new_bytes.is_empty() {
- return vec![Diff::new(Ops::Delete, old_bytes)]
+ return vec![Diff::new(Ops::Delete, old_bytes)];
}
- let deadline = Instant::now().checked_add(Duration::from_secs(self.timeout())).unwrap();
+ let deadline = Instant::now()
+ .checked_add(Duration::from_secs(self.timeout()))
+ .unwrap();
// Trim common prefix
let common_prefix = Self::common_prefix(old_bytes, new_bytes, false);
- let common_suffix = Self::common_prefix(&old_bytes[common_prefix..], &new_bytes[common_prefix..], true);
+ let common_suffix = Self::common_prefix(
+ &old_bytes[common_prefix..],
+ &new_bytes[common_prefix..],
+ true,
+ );
- let mut diffs = self.compute(&old_bytes[common_prefix .. old_bytes.len() - common_suffix], &new_bytes[common_prefix .. new_bytes.len() - common_suffix]);
+ let mut diffs = self.compute(
+ &old_bytes[common_prefix..old_bytes.len() - common_suffix],
+ &new_bytes[common_prefix..new_bytes.len() - common_suffix],
+ );
// Restore the prefix and suffix.
if common_prefix > 0 {
- let mut d = vec![Diff::new(Ops::Equal, &old_bytes[.. common_prefix])];
+ let mut d = vec![Diff::new(Ops::Equal, &old_bytes[..common_prefix])];
d.append(&mut diffs);
diffs = d;
}
if common_suffix > 0 {
- diffs.push(Diff::new(Ops::Equal, &new_bytes[new_bytes.len() - common_suffix ..]));
+ diffs.push(Diff::new(
+ Ops::Equal,
+ &new_bytes[new_bytes.len() - common_suffix..],
+ ));
}
-
diffs
}
-
fn compute<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {
// returning all of the new part
if old.is_empty() {
- return vec![Diff::new(Ops::Insert, new)]
+ return vec![Diff::new(Ops::Insert, new)];
}
// return everything deleted
if new.is_empty() {
- return vec![Diff::new(Ops::Delete, old)]
+ return vec![Diff::new(Ops::Delete, old)];
}
- let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) };
+ let (long, short, old_gt_new) = if old.len() > new.len() {
+ (old, new, true)
+ } else {
+ (new, old, false)
+ };
// found a subsequence which contains the short text
- if let Some(idx) = long.windows(short.len()).step_by(1).position(|k| k == short) {
+ if let Some(idx) = long
+ .windows(short.len())
+ .step_by(1)
+ .position(|k| k == short)
+ {
// Shorter text is inside the longer text (speedup).
let op = if old_gt_new { Ops::Delete } else { Ops::Insert };
let diffs = vec![
- Diff::new(op, &long[0 .. idx]),
+ Diff::new(op, &long[0..idx]),
Diff::new(Ops::Equal, short),
- Diff::new(op, &long[idx + short.len() ..])
+ Diff::new(op, &long[idx + short.len()..]),
];
return diffs;
@@ -143,12 +166,12 @@ impl DiffMatchPatch {
// After previous case, this can't be an equality
return vec![Diff::new(Ops::Delete, old), Diff::new(Ops::Insert, new)];
}
-
+
// Check if the problem can be split in two
if let Some(half_match) = self.half_match(old, new) {
let old_a = half_match.prefix_long;
let old_b = half_match.suffix_long;
-
+
let new_a = half_match.prefix_short;
let new_b = half_match.suffix_short;
@@ -161,10 +184,10 @@ impl DiffMatchPatch {
// Merge the results
diffs_a.push(Diff::new(Ops::Equal, mid_common));
diffs_a.append(&mut diffs_b);
-
+
return diffs_a;
}
-
+
if self.checklines() && old.len() > 100 && new.len() > 100 {
return self.line_mode(old, new);
}
@@ -178,7 +201,11 @@ impl DiffMatchPatch {
return None;
}
- let (long, short) = if old.len() > new.len() { (old, new) } else { (new, old) };
+ let (long, short) = if old.len() > new.len() {
+ (old, new)
+ } else {
+ (new, old)
+ };
// pointless - two small for this algo
if long.len() < 4 || short.len() * 2 < long.len() {
@@ -187,10 +214,10 @@ impl DiffMatchPatch {
// First check if the second quarter is the seed for a half-match.
// let hm1 = Self::diff_half_match_i(long, short, (long.len() as f32 / 4.).ceil() as usize);
- let hm1 = Self::half_match_i(long, short, long.len() / 4 );
+ let hm1 = Self::half_match_i(long, short, long.len() / 4);
// Check again based on the third quarter.
// let hm2 = Self::diff_half_match_i(long, short, (long.len() as f32 / 2.).ceil() as usize);
- let hm2 = Self::half_match_i(long, short, long.len() / 2 );
+ let hm2 = Self::half_match_i(long, short, long.len() / 2);
if hm1.is_none() && hm2.is_none() {
return None;
@@ -208,10 +235,9 @@ impl DiffMatchPatch {
hm2
}
} else {
- return None
+ return None;
};
-
// A half-match was found, sort out the return data.
let half_match = if old.len() > new.len() {
HalfMatch {
@@ -219,7 +245,7 @@ impl DiffMatchPatch {
suffix_long: hm.suffix_long,
prefix_short: hm.prefix_short,
suffix_short: hm.suffix_short,
- common: hm.common
+ common: hm.common,
}
} else {
HalfMatch {
@@ -227,22 +253,18 @@ impl DiffMatchPatch {
suffix_long: hm.suffix_short,
prefix_short: hm.prefix_long,
suffix_short: hm.suffix_long,
- common: hm.common
+ common: hm.common,
}
};
-
+
Some(half_match)
}
-
// Quick line-level diff on both strings, then rediff the parts for greater accuracy
// This speedup can produce non-minimal diffs
fn line_mode<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {
let to_chars = Self::lines_to_chars(old, new);
- let mut diffs = self.main_internal(
- &to_chars.chars_old,
- &to_chars.chars_new
- );
+ let mut diffs = self.main_internal(&to_chars.chars_old, &to_chars.chars_new);
Self::chars_to_lines(&mut diffs[..], &to_chars.lines[..]);
// anubhab
@@ -252,14 +274,14 @@ impl DiffMatchPatch {
fn diff_bisect<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {
todo!()
}
-
+
// Does a substring of shorttext exist within longtext such that the substring
// is at least half the length of longtext?
//idx Start index of quarter length substring within longtext.
- fn half_match_i<'a>(long: &'a[u8], short: &'a[u8], idx: usize) -> Option<HalfMatch<'a>> {
+ fn half_match_i<'a>(long: &'a [u8], short: &'a [u8], idx: usize) -> Option<HalfMatch<'a>> {
// Start with a 1/4 length substring at position i as a seed.
- let seed = &long[idx .. idx + long.len() / 4];
+ let seed = &long[idx..idx + long.len() / 4];
let mut j = 0;
let mut best_common: &[u8] = &[];
@@ -268,39 +290,37 @@ impl DiffMatchPatch {
let mut best_short_a: &[u8] = &[];
let mut best_short_b: &[u8] = &[];
-
- while let Some(pos) = &short[ j .. ]
+ while let Some(pos) = &short[j..]
.windows(seed.len())
.step_by(1)
- .position(|p| p == seed) {
- j += *pos;
+ .position(|p| p == seed)
+ {
+ j += *pos;
- let prefix_len = Self::common_prefix(&long[idx ..], &short[j ..], false);
- let suffix_len = Self::common_prefix(&long[.. idx], &short[.. j], true);
+ let prefix_len = Self::common_prefix(&long[idx..], &short[j..], false);
+ let suffix_len = Self::common_prefix(&long[..idx], &short[..j], true);
- if best_common.len() < suffix_len + prefix_len {
- best_common = &short[j - suffix_len .. j + prefix_len];
-
- best_long_a = &long[.. idx - suffix_len];
- best_long_b = &long[idx + prefix_len ..];
+ if best_common.len() < suffix_len + prefix_len {
+ best_common = &short[j - suffix_len..j + prefix_len];
- best_short_a = &short[.. j - suffix_len];
- best_short_b = &short[j + prefix_len ..];
- }
+ best_long_a = &long[..idx - suffix_len];
+ best_long_b = &long[idx + prefix_len..];
+
+ best_short_a = &short[..j - suffix_len];
+ best_short_b = &short[j + prefix_len..];
+ }
- j += 1;
+ j += 1;
}
if best_common.len() * 2 >= long.len() {
- Some(
- HalfMatch {
- prefix_long: best_long_a,
- suffix_long: best_long_b,
- prefix_short: best_short_a,
- suffix_short: best_short_b,
- common: best_common
- }
- )
+ Some(HalfMatch {
+ prefix_long: best_long_a,
+ suffix_long: best_long_b,
+ prefix_short: best_short_a,
+ suffix_short: best_short_b,
+ common: best_common,
+ })
} else {
None
}
@@ -312,12 +332,13 @@ impl DiffMatchPatch {
// Reverse prefix is suffix
// TODO: investigate this further
fn common_prefix(lhs: &[u8], rhs: &[u8], reverse: bool) -> usize {
- if lhs.is_empty() || rhs.is_empty() ||
- (!reverse && (lhs.first() != rhs.first())) ||
- (reverse && (lhs.last() != rhs.last())) {
- return 0;
- }
-
+ if lhs.is_empty()
+ || rhs.is_empty()
+ || (!reverse && (lhs.first() != rhs.first()))
+ || (reverse && (lhs.last() != rhs.last()))
+ {
+ return 0;
+ }
let mut pointmin = 0;
let mut pointmax = lhs.len().min(rhs.len());
@@ -327,14 +348,17 @@ impl DiffMatchPatch {
while pointmin < pointmid {
let (lhsrange, rhsrange) = if !reverse {
- (pointstart .. pointmid, pointstart .. pointmid)
+ (pointstart..pointmid, pointstart..pointmid)
} else {
- (lhs.len() - pointmid .. lhs.len() - pointstart, rhs.len() - pointmid .. rhs.len() - pointstart)
+ (
+ lhs.len() - pointmid..lhs.len() - pointstart,
+ rhs.len() - pointmid..rhs.len() - pointstart,
+ )
};
if lhs[lhsrange] == rhs[rhsrange] {
- pointmin = pointmid;
- pointstart = pointmin;
+ pointmin = pointmid;
+ pointstart = pointmin;
} else {
pointmax = pointmid;
}
@@ -377,7 +401,8 @@ impl DiffMatchPatch {
delete_len_post = 0;
last_equality = Some(diffs[pointer].1.clone());
- } else { // Ops::Insert || Ops::Delete
+ } else {
+ // Ops::Insert || Ops::Delete
// Increasing changes of post_equality metrics
if diffs[pointer].0 == Ops::Insert {
insert_len_post += diffs[pointer].1.len();
@@ -389,38 +414,38 @@ impl DiffMatchPatch {
// sides of it.
if let Some(last_eq) = &last_equality {
if last_eq.len() <= insert_len_pre.max(delete_len_pre)
- && last_eq.len() <= insert_len_post.max(delete_len_post) {
-
- if let Some(&last) = equalities.last() {
- // Duplicate record
- diffs.insert(last, Diff::new(Ops::Delete, last_eq));
- // Change the other copy to insert
- diffs[last + 1].0 = Ops::Insert;
- // change diff length
- difflen = diffs.len();
-
- // Throw away the equality we just deleted.
- equalities.pop();
- // Throw away the previous equality (it needs to be reevaluated).
- equalities.pop();
-
- diff_mod = true;
- changes = true;
-
- if let Some(&e) = equalities.last() {
- pointer = e;
- } else {
- pointer = 0;
- }
+ && last_eq.len() <= insert_len_post.max(delete_len_post)
+ {
+ if let Some(&last) = equalities.last() {
+ // Duplicate record
+ diffs.insert(last, Diff::new(Ops::Delete, last_eq));
+ // Change the other copy to insert
+ diffs[last + 1].0 = Ops::Insert;
+ // change diff length
+ difflen = diffs.len();
+
+ // Throw away the equality we just deleted.
+ equalities.pop();
+ // Throw away the previous equality (it needs to be reevaluated).
+ equalities.pop();
+
+ diff_mod = true;
+ changes = true;
+
+ if let Some(&e) = equalities.last() {
+ pointer = e;
+ } else {
+ pointer = 0;
+ }
- // reset all counters
- insert_len_pre = 0;
- delete_len_pre = 0;
- insert_len_post = 0;
- delete_len_post = 0;
+ // reset all counters
+ insert_len_pre = 0;
+ delete_len_pre = 0;
+ insert_len_post = 0;
+ delete_len_post = 0;
- last_equality = None;
- }
+ last_equality = None;
+ }
}
}
}
@@ -445,7 +470,6 @@ impl DiffMatchPatch {
// // Number of characters that changed after the equality.
// var length_insertions2 = 0;
// var length_deletions2 = 0;
-
// // Normalize the diff.
// if (changes) {
@@ -503,6 +527,9 @@ impl DiffMatchPatch {
// Reorder and merge like edit sections. Merge equalities.
// Any edit section can move as long as it doesn't cross an equality.
fn cleanup_merge(diffs: &mut Vec<Diff>) {
+ // Push a dummy diff ... this triggers the equality as a last step
+ diffs.push(Diff::new(Ops::Equal, b""));
+
let mut difflen = diffs.len();
let mut pointer = 0_usize;
@@ -519,12 +546,14 @@ impl DiffMatchPatch {
match diffs[pointer].0 {
Ops::Insert => {
insert_n += 1;
- insert_data.append(&mut diffs[pointer].1);
+ let mut data = diffs[pointer].1.to_vec();
+ insert_data.append(&mut data);
pointer += 1;
}
Ops::Delete => {
delete_n += 1;
- delete_data.append(&mut diffs[pointer].1);
+ let mut data = diffs[pointer].1.to_vec();
+ delete_data.append(&mut data);
pointer += 1;
}
Ops::Equal => {
@@ -532,18 +561,140 @@ impl DiffMatchPatch {
if delete_n + insert_n > 1 {
if delete_n != 0 && insert_n != 0 {
// Factor out any common prefixies.
- let commonlen = Self::common_prefix(&insert_data[..], &delete_data[..], false);
+ let commonlen =
+ Self::common_prefix(&insert_data[..], &delete_data[..], false);
if commonlen != 0 {
+ let tmpidx = pointer - delete_n - insert_n;
+ if tmpidx > 0 && diffs[tmpidx - 1].0 == Ops::Equal {
+ let mut appenddata = insert_data[..commonlen].to_vec();
+ diffs[tmpidx - 1].1.append(&mut appenddata);
+ } else {
+ diffs.insert(
+ 0,
+ Diff::new(Ops::Equal, &insert_data[..commonlen]),
+ );
+ pointer += 1;
+ difflen = diffs.len();
+ }
+ insert_data = insert_data[commonlen..].to_vec();
+ delete_data = delete_data[commonlen..].to_vec();
+ }
- } else {
-
+ // Factor out any common suffixies.
+ let commonlen =
+ Self::common_prefix(&insert_data[..], &delete_data[..], true);
+ if commonlen > 0 {
+ diffs[pointer].1 = [
+ insert_data[insert_data.len() - commonlen..].to_vec(),
+ diffs[pointer].1.to_vec(),
+ ]
+ .concat();
+ insert_data = insert_data[..insert_data.len() - commonlen].to_vec();
+ delete_data = delete_data[..delete_data.len() - commonlen].to_vec();
}
}
+
+ // Delete the offending records and add the merged ones.
+ pointer -= delete_n + insert_n;
+
+ // Reversing because index will not change
+ (pointer..pointer + delete_n + insert_n)
+ .rev()
+ .for_each(|i| {
+ diffs.remove(i);
+ });
+ difflen = diffs.len();
+
+ if !delete_data.is_empty() {
+ diffs.insert(pointer, Diff::new(Ops::Delete, &delete_data));
+ pointer += 1;
+ difflen = diffs.len();
+ }
+
+ if !insert_data.is_empty() {
+ diffs.insert(pointer, Diff::new(Ops::Insert, &insert_data));
+ pointer += 1;
+ difflen = diffs.len();
+ }
+
+ pointer += 1;
+ } else if pointer != 0 && diffs[pointer - 1].0 == Ops::Equal {
+ // Merge this equality with the previous one.
+ let mut to_merge = diffs[pointer].1.to_vec();
+ diffs[pointer - 1].1.append(&mut to_merge);
+ diffs.remove(pointer);
+
+ difflen = diffs.len();
+ } else {
+ pointer += 1;
}
+
+ insert_n = 0;
+ delete_n = 0;
+ insert_data = Vec::new();
+ delete_data = Vec::new();
}
}
}
-
+
+ if let Some(dl) = diffs.last() {
+ if dl.1.is_empty() {
+ diffs.pop();
+ }
+ }
+
+ difflen = diffs.len();
+
+ // Second pass: look for single edits surrounded on both sides by equalities
+ // which can be shifted sideways to eliminate an equality.
+ // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
+ pointer = 1;
+ let mut changes = false;
+
+ while difflen > 0 && pointer < difflen - 1 {
+ if let (Some(diff_prev), Some(diff), Some(diff_next)) = (
+ diffs.get(pointer - 1),
+ diffs.get(pointer),
+ diffs.get(pointer + 1),
+ ) {
+ // This is a single edit surrounded by equalities.
+ if diff_prev.0 == Ops::Equal && diff_next.0 == Ops::Equal {
+ if diff.1[diff.1.len() - diff_prev.1.len()..] == diff_prev.1 {
+ // Shift the edit over the previous equality.
+ let new_current_data =
+ [&diff_prev.1, &diff.1[..diff.1.len() - diff_prev.1.len()]].concat();
+ let new_next_data = [&diff_prev.1[..], &diff_next.1[..]].concat();
+
+ diffs[pointer].1 = new_current_data;
+ diffs[pointer + 1].1 = new_next_data;
+ diffs.remove(pointer - 1);
+ difflen = diffs.len();
+
+ changes = true;
+ } else if diff.1[..diff_next.1.len()] == diff_next.1 {
+ // Shift the edit over the next equality.
+ let mut next_data = diffs[pointer + 1].1.to_vec();
+
+ diffs[pointer - 1].1.append(&mut next_data);
+ diffs[pointer].1 = [
+ &diffs[pointer].1[diffs[pointer + 1].1.len()..],
+ &diffs[pointer + 1].1[..],
+ ]
+ .concat();
+ diffs.remove(pointer + 1);
+ difflen = diffs.len();
+
+ changes = true;
+ }
+ }
+ }
+
+ pointer += 1;
+ }
+
+ if changes {
+ Self::cleanup_merge(diffs);
+ }
}
}
@@ -551,7 +702,7 @@ impl DiffMatchPatch {
struct LineToChars<'a> {
chars_old: Vec<u8>,
chars_new: Vec<u8>,
- lines: Vec<&'a [u8]>
+ lines: Vec<&'a [u8]>,
}
impl DiffMatchPatch {
@@ -563,20 +714,25 @@ impl DiffMatchPatch {
// let mut maxlines = 5;
let mut maxlines = 40000;
let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, maxlines);
-
+
// This basically represents the U16::MAX value
maxlines = 65535;
// maxlines = 7;
let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, maxlines);
-
+
LineToChars {
chars_old,
chars_new,
- lines
- }
+ lines,
+ }
}
- fn lines_to_chars_internal<'a>(text: &'a [u8], array: &mut Vec<&'a [u8]>, hash: &mut HashMap<&'a [u8], usize>, maxlines: usize) -> Vec<u8> {
+ fn lines_to_chars_internal<'a>(
+ text: &'a [u8],
+ array: &mut Vec<&'a [u8]>,
+ hash: &mut HashMap<&'a [u8], usize>,
+ maxlines: usize,
+ ) -> Vec<u8> {
let take = maxlines - array.len();
println!("Take: {take}");
@@ -604,30 +760,32 @@ impl DiffMatchPatch {
}
}
- //
+ //
if broke.is_some() {
- let line = &text[cursor ..];
+ let line = &text[cursor..];
let e = hash.entry(line).or_insert(array.len());
array.push(line);
charlist.push(char::from_u32(*e as u32).unwrap());
}
-
+
let chars: String = charlist.iter().collect::<String>();
-
+
chars.as_bytes().to_vec()
}
fn chars_to_lines(diffs: &mut [Diff], lines: &[&[u8]]) {
- diffs.iter_mut()
- .for_each(|d| {
+ diffs.iter_mut().for_each(|d| {
let chars = String::from_utf8(d.1.to_vec()).unwrap();
// let mut txt = &[];
- let t = chars.chars().map(|c| {
- let idx: u32 = c.into();
- *lines.get(idx as usize).unwrap()
- }).collect::<Vec<_>>()
- .concat();
+ let t = chars
+ .chars()
+ .map(|c| {
+ let idx: u32 = c.into();
+ *lines.get(idx as usize).unwrap()
+ })
+ .collect::<Vec<_>>()
+ .concat();
d.1 = t;
});
@@ -641,7 +799,7 @@ impl DiffMatchPatch {
/// new: New string to be diffed.
/// deadline: Optional time when the diff should be complete by. Used
/// internally for recursive calls. Users should set DiffTimeout instead.
- ///
+ ///
/// Returns:
/// Vec of changes (Diff).
pub fn diff_main<'a>(&self, old: &'a str, new: &'a str) -> Vec<Diff> {
@@ -650,7 +808,6 @@ impl DiffMatchPatch {
// Reduce the number of edits by eliminating semantically trivial equalities
pub fn diff_cleanup_semantic(diffs: &mut [Diff]) {
-
todo!()
}
@@ -695,7 +852,6 @@ impl DiffMatchPatch {
}
}
-
#[cfg(test)]
mod tests {
use crate::dmp::{Diff, HalfMatch, LineToChars, Ops};
@@ -727,36 +883,51 @@ mod tests {
// 'testPatchApply'
// ];
-
#[test]
- fn test_diff_is_destructurable() {
-
- }
+ fn test_diff_is_destructurable() {}
#[test]
fn test_prefix() {
// Detect any common prefix.
// Null case.
- assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false));
+ assert_eq!(
+ 0,
+ DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false)
+ );
// Non-null case.
- assert_eq!(4, DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false));
+ assert_eq!(
+ 4,
+ DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false)
+ );
// Whole case.
- assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false));
+ assert_eq!(
+ 4,
+ DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false)
+ );
}
#[test]
fn test_suffix() {
// Detect any common suffix.
// Null case.
- assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true));
+ assert_eq!(
+ 0,
+ DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true)
+ );
// Non-null case.
- assert_eq!(4, DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true));
+ assert_eq!(
+ 4,
+ DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true)
+ );
// Whole case.
- assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true));
+ assert_eq!(
+ 4,
+ DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true)
+ );
}
#[test]
@@ -764,51 +935,114 @@ mod tests {
let mut dmp = DiffMatchPatch::default();
// No match
- assert!(dmp.half_match("1234567890".as_bytes(), "abcdef".as_bytes()).is_none());
- assert!(dmp.half_match("12345".as_bytes(), "23".as_bytes()).is_none());
+ assert!(dmp
+ .half_match("1234567890".as_bytes(), "abcdef".as_bytes())
+ .is_none());
+ assert!(dmp
+ .half_match("12345".as_bytes(), "23".as_bytes())
+ .is_none());
// Single Match.
assert_eq!(
- Some(HalfMatch { prefix_long: "12".as_bytes(), suffix_long: "90".as_bytes(), prefix_short: "a".as_bytes(), suffix_short: "z".as_bytes(), common: "345678".as_bytes() }),
+ Some(HalfMatch {
+ prefix_long: "12".as_bytes(),
+ suffix_long: "90".as_bytes(),
+ prefix_short: "a".as_bytes(),
+ suffix_short: "z".as_bytes(),
+ common: "345678".as_bytes()
+ }),
dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())
);
assert_eq!(
- Some(HalfMatch { prefix_long: "a".as_bytes(), suffix_long: "z".as_bytes(), prefix_short: "12".as_bytes(), suffix_short: "90".as_bytes(), common: "345678".as_bytes() }),
+ Some(HalfMatch {
+ prefix_long: "a".as_bytes(),
+ suffix_long: "z".as_bytes(),
+ prefix_short: "12".as_bytes(),
+ suffix_short: "90".as_bytes(),
+ common: "345678".as_bytes()
+ }),
dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())
);
assert_eq!(
- Some(HalfMatch { prefix_long: "abc".as_bytes(), suffix_long: "z".as_bytes(), prefix_short: "1234".as_bytes(), suffix_short: "0".as_bytes(), common: "56789".as_bytes() }),
+ Some(HalfMatch {
+ prefix_long: "abc".as_bytes(),
+ suffix_long: "z".as_bytes(),
+ prefix_short: "1234".as_bytes(),
+ suffix_short: "0".as_bytes(),
+ common: "56789".as_bytes()
+ }),
dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())
);
assert_eq!(
- Some(HalfMatch { prefix_long: "a".as_bytes(), suffix_long: "xyz".as_bytes(), prefix_short: "1".as_bytes(), suffix_short: "7890".as_bytes(), common: "23456".as_bytes() }),
+ Some(HalfMatch {
+ prefix_long: "a".as_bytes(),
+ suffix_long: "xyz".as_bytes(),
+ prefix_short: "1".as_bytes(),
+ suffix_short: "7890".as_bytes(),
+ common: "23456".as_bytes()
+ }),
dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())
);
// Multiple Matches.
assert_eq!(
- Some(HalfMatch { prefix_long: "12123".as_bytes(), suffix_long: "123121".as_bytes(), prefix_short: "a".as_bytes(), suffix_short: "z".as_bytes(), common: "1234123451234".as_bytes() }),
- dmp.half_match("121231234123451234123121".as_bytes(), "a1234123451234z".as_bytes())
+ Some(HalfMatch {
+ prefix_long: "12123".as_bytes(),
+ suffix_long: "123121".as_bytes(),
+ prefix_short: "a".as_bytes(),
+ suffix_short: "z".as_bytes(),
+ common: "1234123451234".as_bytes()
+ }),
+ dmp.half_match(
+ "121231234123451234123121".as_bytes(),
+ "a1234123451234z".as_bytes()
+ )
);
assert_eq!(
- Some(HalfMatch { prefix_long: "".as_bytes(), suffix_long: "-=-=-=-=-=".as_bytes(), prefix_short: "x".as_bytes(), suffix_short: "".as_bytes(), common: "x-=-=-=-=-=-=-=".as_bytes() }),
- dmp.half_match("x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(), "xx-=-=-=-=-=-=-=".as_bytes())
+ Some(HalfMatch {
+ prefix_long: "".as_bytes(),
+ suffix_long: "-=-=-=-=-=".as_bytes(),
+ prefix_short: "x".as_bytes(),
+ suffix_short: "".as_bytes(),
+ common: "x-=-=-=-=-=-=-=".as_bytes()
+ }),
+ dmp.half_match(
+ "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),
+ "xx-=-=-=-=-=-=-=".as_bytes()
+ )
);
assert_eq!(
- Some(HalfMatch { prefix_long: "-=-=-=-=-=".as_bytes(), suffix_long: "".as_bytes(), prefix_short: "".as_bytes(), suffix_short: "y".as_bytes(), common: "-=-=-=-=-=-=-=y".as_bytes() }),
- dmp.half_match("-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(), "-=-=-=-=-=-=-=yy".as_bytes())
+ Some(HalfMatch {
+ prefix_long: "-=-=-=-=-=".as_bytes(),
+ suffix_long: "".as_bytes(),
+ prefix_short: "".as_bytes(),
+ suffix_short: "y".as_bytes(),
+ common: "-=-=-=-=-=-=-=y".as_bytes()
+ }),
+ dmp.half_match(
+ "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),
+ "-=-=-=-=-=-=-=yy".as_bytes()
+ )
);
// Non-optimal halfmatch.
// Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
assert_eq!(
- Some(HalfMatch { prefix_long: "qHillo".as_bytes(), suffix_long: "w".as_bytes(), prefix_short: "x".as_bytes(), suffix_short: "Hulloy".as_bytes(), common: "HelloHe".as_bytes() }),
+ Some(HalfMatch {
+ prefix_long: "qHillo".as_bytes(),
+ suffix_long: "w".as_bytes(),
+ prefix_short: "x".as_bytes(),
+ suffix_short: "Hulloy".as_bytes(),
+ common: "HelloHe".as_bytes()
+ }),
dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
);
// Optimal no halfmatch.
dmp.timeout = Some(u64::MAX);
- assert!(dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()).is_none());
+ assert!(dmp
+ .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
+ .is_none());
}
#[test]
@@ -818,19 +1052,30 @@ mod tests {
// Perform a trivial diff.
// Null case.
assert!(dmp.diff_main("", "").is_empty());
-
+
// Equality
- assert_eq!(vec![Diff::new(Ops::Equal, "abc".as_bytes())], dmp.diff_main("abc", "abc"));
+ assert_eq!(
+ vec![Diff::new(Ops::Equal, "abc".as_bytes())],
+ dmp.diff_main("abc", "abc")
+ );
// Simple insert
assert_eq!(
- vec![Diff::new(Ops::Equal, "ab".as_bytes()), Diff::new(Ops::Insert, "123".as_bytes()), Diff::new(Ops::Equal, "c".as_bytes())],
+ vec![
+ Diff::new(Ops::Equal, "ab".as_bytes()),
+ Diff::new(Ops::Insert, "123".as_bytes()),
+ Diff::new(Ops::Equal, "c".as_bytes())
+ ],
dmp.diff_main("abc", "ab123c")
);
// Simple delete
assert_eq!(
- vec![Diff::new(Ops::Equal, "a".as_bytes()), Diff::new(Ops::Delete, "123".as_bytes()), Diff::new(Ops::Equal, "bc".as_bytes())],
+ vec![
+ Diff::new(Ops::Equal, "a".as_bytes()),
+ Diff::new(Ops::Delete, "123".as_bytes()),
+ Diff::new(Ops::Equal, "bc".as_bytes())
+ ],
dmp.diff_main("a123bc", "abc")
);
@@ -846,104 +1091,147 @@ mod tests {
// dmp.diff_main("abc", "a123b456c")
// );
-// // Two insertions.
-// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false));
-
-// // Two deletions.
-// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false));
-
-// // Perform a real diff.
-// // Switch off the timeout.
-// dmp.Diff_Timeout = 0;
-// // Simple cases.
-// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false));
-
-// assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false));
-
-// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false));
-
-// // Overlaps.
-// assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false));
-
-// assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false));
-
-// assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false));
-
-// // Large equality.
-// assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false));
-
-// // Timeout.
-// dmp.Diff_Timeout = 0.1; // 100ms
-// var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';
-// var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';
-// // Increase the text lengths by 1024 times to ensure a timeout.
-// for (var i = 0; i < 10; i++) {
-// a += a;
-// b += b;
-// }
-// var startTime = (new Date()).getTime();
-// dmp.diff_main(a, b);
-// var endTime = (new Date()).getTime();
-// // Test that we took at least the timeout period.
-// assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime);
-// // Test that we didn't take forever (be forgiving).
-// // Theoretically this test could fail very occasionally if the
-// // OS task swaps or locks up for a second at the wrong moment.
-// assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime);
-// dmp.Diff_Timeout = 0;
-
-// // Test the linemode speedup.
-// // Must be long to pass the 100 char cutoff.
-// // Simple line-mode.
-// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
-// b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n';
-// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
-
-// // Single line-mode.
-// a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890';
-// b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij';
-// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
-
-// // Overlap line-mode.
-// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
-// b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n';
-// var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));
-// var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));
-// assertEquivalent(texts_textmode, texts_linemode);
-
-// // Test null inputs.
-// try {
-// dmp.diff_main(null, null);
-// assertEquals(Error, null);
-// } catch (e) {
-// // Exception expected.
-// }
+ // // Two insertions.
+ // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false));
+
+ // // Two deletions.
+ // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false));
+
+ // // Perform a real diff.
+ // // Switch off the timeout.
+ // dmp.Diff_Timeout = 0;
+ // // Simple cases.
+ // assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false));
+
+ // assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false));
+
+ // assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false));
+
+ // // Overlaps.
+ // assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false));
+
+ // assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false));
+
+ // assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false));
+
+ // // Large equality.
+ // assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false));
+
+ // // Timeout.
+ // dmp.Diff_Timeout = 0.1; // 100ms
+ // var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';
+ // var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';
+ // // Increase the text lengths by 1024 times to ensure a timeout.
+ // for (var i = 0; i < 10; i++) {
+ // a += a;
+ // b += b;
+ // }
+ // var startTime = (new Date()).getTime();
+ // dmp.diff_main(a, b);
+ // var endTime = (new Date()).getTime();
+ // // Test that we took at least the timeout period.
+ // assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime);
+ // // Test that we didn't take forever (be forgiving).
+ // // Theoretically this test could fail very occasionally if the
+ // // OS task swaps or locks up for a second at the wrong moment.
+ // assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime);
+ // dmp.Diff_Timeout = 0;
+
+ // // Test the linemode speedup.
+ // // Must be long to pass the 100 char cutoff.
+ // // Simple line-mode.
+ // a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
+ // b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n';
+ // assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
+
+ // // Single line-mode.
+ // a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890';
+ // b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij';
+ // assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
+
+ // // Overlap line-mode.
+ // a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
+ // b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n';
+ // var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));
+ // var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));
+ // assertEquivalent(texts_textmode, texts_linemode);
+
+ // // Test null inputs.
+ // try {
+ // dmp.diff_main(null, null);
+ // assertEquals(Error, null);
+ // } catch (e) {
+ // // Exception expected.
+ // }
}
-
+
#[test]
fn test_diff_lines_to_chars() {
// Convert lines down to characters.
assert_eq!(
- LineToChars { chars_old: [0_usize, 1, 0].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), chars_new: [1_usize, 0, 1].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"alpha\n", b"beta\n"] },
+ LineToChars {
+ chars_old: [0_usize, 1, 0]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>()
+ .as_bytes()
+ .to_vec(),
+ chars_new: [1_usize, 0, 1]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>()
+ .as_bytes()
+ .to_vec(),
+ lines: vec![b"alpha\n", b"beta\n"]
+ },
DiffMatchPatch::lines_to_chars(b"alpha\nbeta\nalpha\n", b"beta\nalpha\nbeta\n")
);
assert_eq!(
- LineToChars { chars_old: "".as_bytes().to_vec(), chars_new: [0_usize, 1, 2, 2].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"] },
+ LineToChars {
+ chars_old: "".as_bytes().to_vec(),
+ chars_new: [0_usize, 1, 2, 2]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>()
+ .as_bytes()
+ .to_vec(),
+ lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"]
+ },
DiffMatchPatch::lines_to_chars(b"", b"alpha\r\nbeta\r\n\r\n\r\n")
);
assert_eq!(
- LineToChars { chars_old: [0_usize].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), chars_new: [1_usize].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"a", b"b"] },
+ LineToChars {
+ chars_old: [0_usize]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>()
+ .as_bytes()
+ .to_vec(),
+ chars_new: [1_usize]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>()
+ .as_bytes()
+ .to_vec(),
+ lines: vec![b"a", b"b"]
+ },
DiffMatchPatch::lines_to_chars(b"a", b"b")
);
-
+
// More than 256 to reveal any 8-bit limitations.
const TLIMIT: usize = 300;
- let linestr = (0 .. TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
- let linelist: Vec<&[u8]> = (0 .. TLIMIT).map(|i| linestr[i].as_bytes()).collect();
- let charlist = (0 .. TLIMIT).map(|i| char::from_u32(i as u32).unwrap()).collect::<String>();
+ let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
+ let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
+ let charlist = (0..TLIMIT)
+ .map(|i| char::from_u32(i as u32).unwrap())
+ .collect::<String>();
assert_eq!(
- LineToChars { chars_old: charlist.as_bytes().to_vec(), chars_new: String::new().as_bytes().to_vec(), lines: linelist },
+ LineToChars {
+ chars_old: charlist.as_bytes().to_vec(),
+ chars_new: String::new().as_bytes().to_vec(),
+ lines: linelist
+ },
DiffMatchPatch::lines_to_chars(linestr.join("").as_bytes(), b"")
);
}
@@ -951,27 +1239,37 @@ mod tests {
#[test]
fn test_diff_chars_to_lines() {
// Convert chars up to lines.
- let d1 = [0_usize, 1, 0].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>();
- let d2 = [1_usize, 0, 1].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>();
+ let d1 = [0_usize, 1, 0]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>();
+ let d2 = [1_usize, 0, 1]
+ .iter()
+ .map(|i| char::from_u32(*i as u32).unwrap())
+ .collect::<String>();
let mut diffs = [
Diff::new(Ops::Equal, d1.as_bytes()),
- Diff::new(Ops::Insert, d2.as_bytes())
+ Diff::new(Ops::Insert, d2.as_bytes()),
];
DiffMatchPatch::chars_to_lines(&mut diffs, &[b"alpha\n", b"beta\n"]);
- assert_eq!([
- Diff::new(Ops::Equal,b"alpha\nbeta\nalpha\n"),
- Diff::new(Ops::Insert, b"beta\nalpha\nbeta\n")
- ], diffs);
+ assert_eq!(
+ [
+ Diff::new(Ops::Equal, b"alpha\nbeta\nalpha\n"),
+ Diff::new(Ops::Insert, b"beta\nalpha\nbeta\n")
+ ],
+ diffs
+ );
-
// More than 256 to reveal any 8-bit limitations.
const TLIMIT: usize = 300;
- let charlist = (0 .. TLIMIT).map(|i| char::from_u32(i as u32).unwrap()).collect::<String>();
- let linestr = (0 .. TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
- let linelist: Vec<&[u8]> = (0 .. TLIMIT).map(|i| linestr[i].as_bytes()).collect();
+ let charlist = (0..TLIMIT)
+ .map(|i| char::from_u32(i as u32).unwrap())
+ .collect::<String>();
+ let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
+ let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
let mut diffs = [Diff::new(Ops::Delete, charlist.as_bytes())];
DiffMatchPatch::chars_to_lines(&mut diffs, &linelist[..]);
@@ -980,10 +1278,10 @@ mod tests {
// More than 65536 to verify any 16-bit limitation.
const ULIMIT: usize = 10;
- let linestr = (0 .. ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
+ let linestr = (0..ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
let lines = linestr.join("");
let l2c = DiffMatchPatch::lines_to_chars(lines.as_bytes(), b"");
-
+
let mut diffs = [Diff::new(Ops::Insert, &l2c.chars_old)];
DiffMatchPatch::chars_to_lines(&mut diffs, &l2c.lines);
@@ -995,58 +1293,128 @@ mod tests {
// Cleanup a messy diff.
// Null case.
let mut diffs = vec![];
- let mut test: Vec<Diff> = vec![];
+ let test: Vec<Diff> = vec![];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// No change case
- diffs = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Insert, b"c")];
- test = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Insert, b"c")];
+ let mut diffs = vec![
+ Diff::new(Ops::Equal, b"a"),
+ Diff::new(Ops::Delete, b"b"),
+ Diff::new(Ops::Insert, b"c"),
+ ];
+ let test = vec![
+ Diff::new(Ops::Equal, b"a"),
+ Diff::new(Ops::Delete, b"b"),
+ Diff::new(Ops::Insert, b"c"),
+ ];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge equalities.
- diffs = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Equal, b"b"), Diff::new(Ops::Equal, b"c")];
- test = vec![Diff::new(Ops::Equal, b"abc")];
+ let mut diffs = vec![
+ Diff::new(Ops::Equal, b"a"),
+ Diff::new(Ops::Equal, b"b"),
+ Diff::new(Ops::Equal, b"c"),
+ ];
+ let test = vec![Diff::new(Ops::Equal, b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge deletions.
- diffs = vec![Diff::new(Ops::Delete, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Delete, b"c")];
- test = vec![Diff::new(Ops::Delete, b"abc")];
+ let mut diffs = vec![
+ Diff::new(Ops::Delete, b"a"),
+ Diff::new(Ops::Delete, b"b"),
+ Diff::new(Ops::Delete, b"c"),
+ ];
+ let test = vec![Diff::new(Ops::Delete, b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge insertions.
- diffs = vec![Diff::new(Ops::Insert, b"a"), Diff::new(Ops::Insert, b"b"), Diff::new(Ops::Insert, b"c")];
- test = vec![Diff::new(Ops::Insert, b"abc")];
+ let mut diffs = vec![
+ Diff::new(Ops::Insert, b"a"),
+ Diff::new(Ops::Insert, b"b"),
+ Diff::new(Ops::Insert, b"c"),
+ ];
+ let test = vec![Diff::new(Ops::Insert, b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
- // // Merge interweave.
- // diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b'], [DIFF_DELETE, 'c'], [DIFF_INSERT, 'd'], [DIFF_EQUAL, 'e'], [DIFF_EQUAL, 'f']];
- // dmp.diff_cleanupMerge(diffs);
- // assertEquivalent([[DIFF_DELETE, 'ac'], [DIFF_INSERT, 'bd'], [DIFF_EQUAL, 'ef']], diffs);
+ // Merge interweave.
+ let mut diffs = vec![
+ Diff::new(Ops::Delete, b"a"),
+ Diff::new(Ops::Insert, b"b"),
+ Diff::new(Ops::Delete, b"c"),
+ Diff::new(Ops::Insert, b"d"),
+ Diff::new(Ops::Equal, b"e"),
+ Diff::new(Ops::Equal, b"f"),
+ ];
+ let test = vec![
+ Diff::new(Ops::Delete, b"ac"),
+ Diff::new(Ops::Insert, b"bd"),
+ Diff::new(Ops::Equal, b"ef"),
+ ];
+ DiffMatchPatch::cleanup_merge(&mut diffs);
+ assert_eq!(test, diffs);
- // // Prefix and suffix detection.
- // diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc']];
- // dmp.diff_cleanupMerge(diffs);
- // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'c']], diffs);
+ // Prefix and suffix detection.
+ let mut diffs = vec![
+ Diff::new(Ops::Delete, b"a"),
+ Diff::new(Ops::Insert, b"abc"),
+ Diff::new(Ops::Delete, b"dc")
+ ];
+ let test = vec![
+ Diff::new(Ops::Equal, b"a"),
+ Diff::new(Ops::Delete, b"d"),
+ Diff::new(Ops::Insert, b"b"),
+ Diff::new(Ops::Equal, b"c"),
+ ];
+ DiffMatchPatch::cleanup_merge(&mut diffs);
+ assert_eq!(test, diffs);
- // // Prefix and suffix detection with equalities.
- // diffs = [[DIFF_EQUAL, 'x'], [DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc'], [DIFF_EQUAL, 'y']];
- // dmp.diff_cleanupMerge(diffs);
- // assertEquivalent([[DIFF_EQUAL, 'xa'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'cy']], diffs);
+ // Prefix and suffix detection with equalities.
+ let mut diffs = vec![
+ Diff::new(Ops::Equal, b"x"),
+ Diff::new(Ops::Delete, b"a"),
+ Diff::new(Ops::Insert, b"abc"),
+ Diff::new(Ops::Delete, b"dc"),
+ Diff::new(Ops::Equal, b"y"),
+ ];
+ let test = vec![
+ Diff::new(Ops::Equal, b"xa"),
+ Diff::new(Ops::Delete, b"d"),
+ Diff::new(Ops::Insert, b"b"),
+ Diff::new(Ops::Equal, b"cy"),
+ ];
+ DiffMatchPatch::cleanup_merge(&mut diffs);
+ assert_eq!(test, diffs);
- // // Slide edit left.
- // diffs = [[DIFF_EQUAL, 'a'], [DIFF_INSERT, 'ba'], [DIFF_EQUAL, 'c']];
- // dmp.diff_cleanupMerge(diffs);
- // assertEquivalent([[DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'ac']], diffs);
+ // Slide edit left.
+ let mut diffs = vec![
+ Diff::new(Ops::Equal, b"a"),
+ Diff::new(Ops::Insert, b"ba"),
+ Diff::new(Ops::Equal, b"c"),
+ ];
+ let test = vec![
+ Diff::new(Ops::Insert, b"ab"),
+ Diff::new(Ops::Equal, b"ac"),
+ ];
+ DiffMatchPatch::cleanup_merge(&mut diffs);
+ assert_eq!(test, diffs);
- // // Slide edit right.
- // diffs = [[DIFF_EQUAL, 'c'], [DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'a']];
- // dmp.diff_cleanupMerge(diffs);
- // assertEquivalent([[DIFF_EQUAL, 'ca'], [DIFF_INSERT, 'ba']], diffs);
+ // Slide edit right
+ let mut diffs = vec![
+ Diff::new(Ops::Equal, b"c"),
+ Diff::new(Ops::Insert, b"ab"),
+ Diff::new(Ops::Equal, b"a"),
+ ];
+ let test = vec![
+ Diff::new(Ops::Equal, b"ca"),
+ Diff::new(Ops::Insert, b"ba"),
+ ];
+ DiffMatchPatch::cleanup_merge(&mut diffs);
+ assert_eq!(test, diffs);
// // Slide edit left recursive.
// diffs = [[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'b'], [DIFF_EQUAL, 'c'], [DIFF_DELETE, 'ac'], [DIFF_EQUAL, 'x']];
@@ -1068,4 +1436,4 @@ mod tests {
// dmp.diff_cleanupMerge(diffs);
// assertEquivalent([[DIFF_INSERT, 'a'], [DIFF_EQUAL, 'b']], diffs);
}
-} \ No newline at end of file
+}