WIP: testing cleanup algorithm - dmp

my fork of dmp

master 1Branch 0Tags

Clone

HTTPS

SSH

Open with VS Code

WIP: testing cleanup algorithm

Browse Source

Anubhab Bandyopadhyay 2024-08-06

parent c0e9c19 · commit ca9afcd

Diffstat

-rw-r--r--

benches/prefix.rs

-rw-r--r--

src/dmp.rs

940

2 files changed, 672 insertions, 301 deletions

diff --git a/benches/prefix.rs b/benches/prefix.rs
index 4ea6650..d82ce9d 100644
--- a/benches/prefix.rs
+++ b/benches/prefix.rs

@@ -4,10 +4,7 @@ use std::path::Path;

use criterion::{criterion_group, criterion_main, Criterion};

fn prefix_linear(a: &str, b: &str, res: usize) {

- let found = a.bytes()

- .zip(b.bytes())

- .take_while(|(a, b)| a == b )

- .count();

+ let found = a.bytes().zip(b.bytes()).take_while(|(a, b)| a == b).count();

if found != res {

panic!()

@@ -25,10 +22,9 @@ fn prefix_binary(a: &str, b: &str, res: usize) {

let mut pointstart = 0;

while pointmin < pointmid {

- if a[pointstart .. pointmid] ==

- b[pointstart .. pointmid] {

- pointmin = pointmid;

- pointstart = pointmin;

+ if a[pointstart..pointmid] == b[pointstart..pointmid] {

+ pointmin = pointmid;

+ pointstart = pointmin;

} else {

pointmax = pointmid;

}

@@ -49,24 +45,31 @@ fn create_data() -> Vec<(String, String, usize, usize)> {

let old = std::fs::read_to_string(basedir.join(format!("old_{n}.txt"))).unwrap();

let new = std::fs::read_to_string(basedir.join(format!("new_{n}.txt"))).unwrap();

- let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt"))).unwrap().parse::<usize>().unwrap();

+ let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt")))

+ .unwrap()

+ .parse::<usize>()

+ .unwrap();

(old, new, res, n)

- }).collect();

+ })

+ .collect();

data

}

pub fn prefix_bench(c: &mut Criterion) {

let d = create_data();

for (old, new, res, n) in d.iter() {

println!("For N={n}");

- c.bench_function("prefix linear", |bencher| bencher.iter(|| prefix_linear(old, new, *res)));

- c.bench_function("prefix binary", |bencher| bencher.iter(|| prefix_binary(old, new, *res)));

+ c.bench_function("prefix linear", |bencher| {

+ bencher.iter(|| prefix_linear(old, new, *res))

+ });

+ c.bench_function("prefix binary", |bencher| {

+ bencher.iter(|| prefix_binary(old, new, *res))

+ });

}

criterion_group!(prefix, prefix_bench);

-criterion_main!(prefix); \ No newline at end of file

+criterion_main!(prefix);

diff --git a/src/dmp.rs b/src/dmp.rs
index cf81328..5b88e5b 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs

@@ -1,5 +1,9 @@

-use std::{collections::HashMap, io::BufRead, time::{Duration, Instant}};

+use std::{

+ borrow::BorrowMut,

+ collections::HashMap,

+ io::BufRead,

+ time::{Duration, Instant},

+};

/**

* The data structure representing a diff is an array of tuples:

@@ -13,7 +17,7 @@ use std::{collections::HashMap, io::BufRead, time::{Duration, Instant}};

pub enum Ops {

Delete = -1,

Insert,

- Equal

+ Equal,

}

/// A structure representing a diff

@@ -40,14 +44,14 @@ pub struct DiffMatchPatch {

/// Defaults to true, which does a faster, slightly less optimal diff.

checklines: Option<bool>,

/// A default timeout in num seconds, defaults to 1

- timeout: Option<u64>

+ timeout: Option<u64>,

}

impl Default for DiffMatchPatch {

fn default() -> Self {

Self {

checklines: Some(true),

- timeout: Some(1)

+ timeout: Some(1),

}

@@ -58,7 +62,7 @@ struct HalfMatch<'a> {

suffix_long: &'a [u8],

prefix_short: &'a [u8],

suffix_short: &'a [u8],

- common: &'a [u8]

+ common: &'a [u8],

}

impl DiffMatchPatch {

@@ -68,7 +72,8 @@ impl DiffMatchPatch {

// returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout

fn timeout(&self) -> u64 {

- self.timeout.map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX })

+ self.timeout

+ .map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX })

}

fn main_internal<'a>(&self, old_bytes: &'a [u8], new_bytes: &'a [u8]) -> Vec<Diff> {

@@ -82,58 +87,76 @@ impl DiffMatchPatch {

}

if old_bytes.is_empty() {

- return vec![Diff::new(Ops::Insert, new_bytes)]

+ return vec![Diff::new(Ops::Insert, new_bytes)];

}

if new_bytes.is_empty() {

- return vec![Diff::new(Ops::Delete, old_bytes)]

+ return vec![Diff::new(Ops::Delete, old_bytes)];

}

- let deadline = Instant::now().checked_add(Duration::from_secs(self.timeout())).unwrap();

+ let deadline = Instant::now()

+ .checked_add(Duration::from_secs(self.timeout()))

+ .unwrap();

// Trim common prefix

let common_prefix = Self::common_prefix(old_bytes, new_bytes, false);

- let common_suffix = Self::common_prefix(&old_bytes[common_prefix..], &new_bytes[common_prefix..], true);

+ let common_suffix = Self::common_prefix(

+ &old_bytes[common_prefix..],

+ &new_bytes[common_prefix..],

+ true,

+ );

- let mut diffs = self.compute(&old_bytes[common_prefix .. old_bytes.len() - common_suffix], &new_bytes[common_prefix .. new_bytes.len() - common_suffix]);

+ let mut diffs = self.compute(

+ &old_bytes[common_prefix..old_bytes.len() - common_suffix],

+ &new_bytes[common_prefix..new_bytes.len() - common_suffix],

+ );

// Restore the prefix and suffix.

if common_prefix > 0 {

- let mut d = vec![Diff::new(Ops::Equal, &old_bytes[.. common_prefix])];

+ let mut d = vec![Diff::new(Ops::Equal, &old_bytes[..common_prefix])];

d.append(&mut diffs);

diffs = d;

}

if common_suffix > 0 {

- diffs.push(Diff::new(Ops::Equal, &new_bytes[new_bytes.len() - common_suffix ..]));

+ diffs.push(Diff::new(

+ Ops::Equal,

+ &new_bytes[new_bytes.len() - common_suffix..],

+ ));

}

diffs

}

fn compute<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {

// returning all of the new part

if old.is_empty() {

- return vec![Diff::new(Ops::Insert, new)]

+ return vec![Diff::new(Ops::Insert, new)];

}

// return everything deleted

if new.is_empty() {

- return vec![Diff::new(Ops::Delete, old)]

+ return vec![Diff::new(Ops::Delete, old)];

}

- let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) };

+ let (long, short, old_gt_new) = if old.len() > new.len() {

+ (old, new, true)

+ } else {

+ (new, old, false)

+ };

// found a subsequence which contains the short text

- if let Some(idx) = long.windows(short.len()).step_by(1).position(|k| k == short) {

+ if let Some(idx) = long

+ .windows(short.len())

+ .step_by(1)

+ .position(|k| k == short)

+ {

// Shorter text is inside the longer text (speedup).

let op = if old_gt_new { Ops::Delete } else { Ops::Insert };

let diffs = vec![

- Diff::new(op, &long[0 .. idx]),

+ Diff::new(op, &long[0..idx]),

Diff::new(Ops::Equal, short),

- Diff::new(op, &long[idx + short.len() ..])

+ Diff::new(op, &long[idx + short.len()..]),

];

return diffs;

@@ -143,12 +166,12 @@ impl DiffMatchPatch {

// After previous case, this can't be an equality

return vec![Diff::new(Ops::Delete, old), Diff::new(Ops::Insert, new)];

}

// Check if the problem can be split in two

if let Some(half_match) = self.half_match(old, new) {

let old_a = half_match.prefix_long;

let old_b = half_match.suffix_long;

let new_a = half_match.prefix_short;

let new_b = half_match.suffix_short;

@@ -161,10 +184,10 @@ impl DiffMatchPatch {

// Merge the results

diffs_a.push(Diff::new(Ops::Equal, mid_common));

diffs_a.append(&mut diffs_b);

return diffs_a;

}

if self.checklines() && old.len() > 100 && new.len() > 100 {

return self.line_mode(old, new);

}

@@ -178,7 +201,11 @@ impl DiffMatchPatch {

return None;

}

- let (long, short) = if old.len() > new.len() { (old, new) } else { (new, old) };

+ let (long, short) = if old.len() > new.len() {

+ (old, new)

+ } else {

+ (new, old)

+ };

// pointless - two small for this algo

if long.len() < 4 || short.len() * 2 < long.len() {

@@ -187,10 +214,10 @@ impl DiffMatchPatch {

// First check if the second quarter is the seed for a half-match.

// let hm1 = Self::diff_half_match_i(long, short, (long.len() as f32 / 4.).ceil() as usize);

- let hm1 = Self::half_match_i(long, short, long.len() / 4 );

+ let hm1 = Self::half_match_i(long, short, long.len() / 4);

// Check again based on the third quarter.

// let hm2 = Self::diff_half_match_i(long, short, (long.len() as f32 / 2.).ceil() as usize);

- let hm2 = Self::half_match_i(long, short, long.len() / 2 );

+ let hm2 = Self::half_match_i(long, short, long.len() / 2);

if hm1.is_none() && hm2.is_none() {

return None;

@@ -208,10 +235,9 @@ impl DiffMatchPatch {

hm2

}

} else {

- return None

+ return None;

};

// A half-match was found, sort out the return data.

let half_match = if old.len() > new.len() {

HalfMatch {

@@ -219,7 +245,7 @@ impl DiffMatchPatch {

suffix_long: hm.suffix_long,

prefix_short: hm.prefix_short,

suffix_short: hm.suffix_short,

- common: hm.common

+ common: hm.common,

}

} else {

HalfMatch {

@@ -227,22 +253,18 @@ impl DiffMatchPatch {

suffix_long: hm.suffix_short,

prefix_short: hm.prefix_long,

suffix_short: hm.suffix_long,

- common: hm.common

+ common: hm.common,

}

};

Some(half_match)

}

// Quick line-level diff on both strings, then rediff the parts for greater accuracy

// This speedup can produce non-minimal diffs

fn line_mode<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {

let to_chars = Self::lines_to_chars(old, new);

- let mut diffs = self.main_internal(

- &to_chars.chars_old,

- &to_chars.chars_new

- );

+ let mut diffs = self.main_internal(&to_chars.chars_old, &to_chars.chars_new);

Self::chars_to_lines(&mut diffs[..], &to_chars.lines[..]);

// anubhab

@@ -252,14 +274,14 @@ impl DiffMatchPatch {

fn diff_bisect<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {

todo!()

}

// Does a substring of shorttext exist within longtext such that the substring

// is at least half the length of longtext?

//idx Start index of quarter length substring within longtext.

- fn half_match_i<'a>(long: &'a[u8], short: &'a[u8], idx: usize) -> Option<HalfMatch<'a>> {

+ fn half_match_i<'a>(long: &'a [u8], short: &'a [u8], idx: usize) -> Option<HalfMatch<'a>> {

// Start with a 1/4 length substring at position i as a seed.

- let seed = &long[idx .. idx + long.len() / 4];

+ let seed = &long[idx..idx + long.len() / 4];

let mut j = 0;

let mut best_common: &[u8] = &[];

@@ -268,39 +290,37 @@ impl DiffMatchPatch {

let mut best_short_a: &[u8] = &[];

let mut best_short_b: &[u8] = &[];

- while let Some(pos) = &short[ j .. ]

+ while let Some(pos) = &short[j..]

.windows(seed.len())

.step_by(1)

- .position(|p| p == seed) {

- j += *pos;

+ .position(|p| p == seed)

+ {

+ j += *pos;

- let prefix_len = Self::common_prefix(&long[idx ..], &short[j ..], false);

- let suffix_len = Self::common_prefix(&long[.. idx], &short[.. j], true);

+ let prefix_len = Self::common_prefix(&long[idx..], &short[j..], false);

+ let suffix_len = Self::common_prefix(&long[..idx], &short[..j], true);

- if best_common.len() < suffix_len + prefix_len {

- best_common = &short[j - suffix_len .. j + prefix_len];

- best_long_a = &long[.. idx - suffix_len];

- best_long_b = &long[idx + prefix_len ..];

+ if best_common.len() < suffix_len + prefix_len {

+ best_common = &short[j - suffix_len..j + prefix_len];

- best_short_a = &short[.. j - suffix_len];

- best_short_b = &short[j + prefix_len ..];

- }

+ best_long_a = &long[..idx - suffix_len];

+ best_long_b = &long[idx + prefix_len..];

+ best_short_a = &short[..j - suffix_len];

+ best_short_b = &short[j + prefix_len..];

+ }

- j += 1;

+ j += 1;

}

if best_common.len() * 2 >= long.len() {

- Some(

- HalfMatch {

- prefix_long: best_long_a,

- suffix_long: best_long_b,

- prefix_short: best_short_a,

- suffix_short: best_short_b,

- common: best_common

- }

- )

+ Some(HalfMatch {

+ prefix_long: best_long_a,

+ suffix_long: best_long_b,

+ prefix_short: best_short_a,

+ suffix_short: best_short_b,

+ common: best_common,

+ })

} else {

None

}

@@ -312,12 +332,13 @@ impl DiffMatchPatch {

// Reverse prefix is suffix

// TODO: investigate this further

fn common_prefix(lhs: &[u8], rhs: &[u8], reverse: bool) -> usize {

- if lhs.is_empty() || rhs.is_empty() ||

- (!reverse && (lhs.first() != rhs.first())) ||

- (reverse && (lhs.last() != rhs.last())) {

- return 0;

- }

+ if lhs.is_empty()

+ || rhs.is_empty()

+ || (!reverse && (lhs.first() != rhs.first()))

+ || (reverse && (lhs.last() != rhs.last()))

+ {

+ return 0;

+ }

let mut pointmin = 0;

let mut pointmax = lhs.len().min(rhs.len());

@@ -327,14 +348,17 @@ impl DiffMatchPatch {

while pointmin < pointmid {

let (lhsrange, rhsrange) = if !reverse {

- (pointstart .. pointmid, pointstart .. pointmid)

+ (pointstart..pointmid, pointstart..pointmid)

} else {

- (lhs.len() - pointmid .. lhs.len() - pointstart, rhs.len() - pointmid .. rhs.len() - pointstart)

+ (

+ lhs.len() - pointmid..lhs.len() - pointstart,

+ rhs.len() - pointmid..rhs.len() - pointstart,

+ )

};

if lhs[lhsrange] == rhs[rhsrange] {

- pointmin = pointmid;

- pointstart = pointmin;

+ pointmin = pointmid;

+ pointstart = pointmin;

} else {

pointmax = pointmid;

}

@@ -377,7 +401,8 @@ impl DiffMatchPatch {

delete_len_post = 0;

last_equality = Some(diffs[pointer].1.clone());

- } else { // Ops::Insert || Ops::Delete

+ } else {

+ // Ops::Insert || Ops::Delete

// Increasing changes of post_equality metrics

if diffs[pointer].0 == Ops::Insert {

insert_len_post += diffs[pointer].1.len();

@@ -389,38 +414,38 @@ impl DiffMatchPatch {

// sides of it.

if let Some(last_eq) = &last_equality {

if last_eq.len() <= insert_len_pre.max(delete_len_pre)

- && last_eq.len() <= insert_len_post.max(delete_len_post) {

- if let Some(&last) = equalities.last() {

- // Duplicate record

- diffs.insert(last, Diff::new(Ops::Delete, last_eq));

- // Change the other copy to insert

- diffs[last + 1].0 = Ops::Insert;

- // change diff length

- difflen = diffs.len();

- // Throw away the equality we just deleted.

- equalities.pop();

- // Throw away the previous equality (it needs to be reevaluated).

- equalities.pop();

- diff_mod = true;

- changes = true;

- if let Some(&e) = equalities.last() {

- pointer = e;

- } else {

- pointer = 0;

- }

+ && last_eq.len() <= insert_len_post.max(delete_len_post)

+ {

+ if let Some(&last) = equalities.last() {

+ // Duplicate record

+ diffs.insert(last, Diff::new(Ops::Delete, last_eq));

+ // Change the other copy to insert

+ diffs[last + 1].0 = Ops::Insert;

+ // change diff length

+ difflen = diffs.len();

+ // Throw away the equality we just deleted.

+ equalities.pop();

+ // Throw away the previous equality (it needs to be reevaluated).

+ equalities.pop();

+ diff_mod = true;

+ changes = true;

+ if let Some(&e) = equalities.last() {

+ pointer = e;

+ } else {

+ pointer = 0;

+ }

- // reset all counters

- insert_len_pre = 0;

- delete_len_pre = 0;

- insert_len_post = 0;

- delete_len_post = 0;

+ // reset all counters

+ insert_len_pre = 0;

+ delete_len_pre = 0;

+ insert_len_post = 0;

+ delete_len_post = 0;

- last_equality = None;

- }

+ last_equality = None;

+ }

}

@@ -445,7 +470,6 @@ impl DiffMatchPatch {

// // Number of characters that changed after the equality.

// var length_insertions2 = 0;

// var length_deletions2 = 0;

// // Normalize the diff.

// if (changes) {

@@ -503,6 +527,9 @@ impl DiffMatchPatch {

// Reorder and merge like edit sections. Merge equalities.

// Any edit section can move as long as it doesn't cross an equality.

fn cleanup_merge(diffs: &mut Vec<Diff>) {

+ // Push a dummy diff ... this triggers the equality as a last step

+ diffs.push(Diff::new(Ops::Equal, b""));

let mut difflen = diffs.len();

let mut pointer = 0_usize;

@@ -519,12 +546,14 @@ impl DiffMatchPatch {

match diffs[pointer].0 {

Ops::Insert => {

insert_n += 1;

- insert_data.append(&mut diffs[pointer].1);

+ let mut data = diffs[pointer].1.to_vec();

+ insert_data.append(&mut data);

pointer += 1;

}

Ops::Delete => {

delete_n += 1;

- delete_data.append(&mut diffs[pointer].1);

+ let mut data = diffs[pointer].1.to_vec();

+ delete_data.append(&mut data);

pointer += 1;

}

Ops::Equal => {

@@ -532,18 +561,140 @@ impl DiffMatchPatch {

if delete_n + insert_n > 1 {

if delete_n != 0 && insert_n != 0 {

// Factor out any common prefixies.

- let commonlen = Self::common_prefix(&insert_data[..], &delete_data[..], false);

+ let commonlen =

+ Self::common_prefix(&insert_data[..], &delete_data[..], false);

if commonlen != 0 {

+ let tmpidx = pointer - delete_n - insert_n;

+ if tmpidx > 0 && diffs[tmpidx - 1].0 == Ops::Equal {

+ let mut appenddata = insert_data[..commonlen].to_vec();

+ diffs[tmpidx - 1].1.append(&mut appenddata);

+ } else {

+ diffs.insert(

+ 0,

+ Diff::new(Ops::Equal, &insert_data[..commonlen]),

+ );

+ pointer += 1;

+ difflen = diffs.len();

+ }

+ insert_data = insert_data[commonlen..].to_vec();

+ delete_data = delete_data[commonlen..].to_vec();

+ }

- } else {

+ // Factor out any common suffixies.

+ let commonlen =

+ Self::common_prefix(&insert_data[..], &delete_data[..], true);

+ if commonlen > 0 {

+ diffs[pointer].1 = [

+ insert_data[insert_data.len() - commonlen..].to_vec(),

+ diffs[pointer].1.to_vec(),

+ ]

+ .concat();

+ insert_data = insert_data[..insert_data.len() - commonlen].to_vec();

+ delete_data = delete_data[..delete_data.len() - commonlen].to_vec();

}

+ // Delete the offending records and add the merged ones.

+ pointer -= delete_n + insert_n;

+ // Reversing because index will not change

+ (pointer..pointer + delete_n + insert_n)

+ .rev()

+ .for_each(|i| {

+ diffs.remove(i);

+ });

+ difflen = diffs.len();

+ if !delete_data.is_empty() {

+ diffs.insert(pointer, Diff::new(Ops::Delete, &delete_data));

+ pointer += 1;

+ difflen = diffs.len();

+ }

+ if !insert_data.is_empty() {

+ diffs.insert(pointer, Diff::new(Ops::Insert, &insert_data));

+ pointer += 1;

+ difflen = diffs.len();

+ }

+ pointer += 1;

+ } else if pointer != 0 && diffs[pointer - 1].0 == Ops::Equal {

+ // Merge this equality with the previous one.

+ let mut to_merge = diffs[pointer].1.to_vec();

+ diffs[pointer - 1].1.append(&mut to_merge);

+ diffs.remove(pointer);

+ difflen = diffs.len();

+ } else {

+ pointer += 1;

}

+ insert_n = 0;

+ delete_n = 0;

+ insert_data = Vec::new();

+ delete_data = Vec::new();

}

+ if let Some(dl) = diffs.last() {

+ if dl.1.is_empty() {

+ diffs.pop();

+ }

+ difflen = diffs.len();

+ // Second pass: look for single edits surrounded on both sides by equalities

+ // which can be shifted sideways to eliminate an equality.

+ // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC

+ pointer = 1;

+ let mut changes = false;

+ while difflen > 0 && pointer < difflen - 1 {

+ if let (Some(diff_prev), Some(diff), Some(diff_next)) = (

+ diffs.get(pointer - 1),

+ diffs.get(pointer),

+ diffs.get(pointer + 1),

+ ) {

+ // This is a single edit surrounded by equalities.

+ if diff_prev.0 == Ops::Equal && diff_next.0 == Ops::Equal {

+ if diff.1[diff.1.len() - diff_prev.1.len()..] == diff_prev.1 {

+ // Shift the edit over the previous equality.

+ let new_current_data =

+ [&diff_prev.1, &diff.1[..diff.1.len() - diff_prev.1.len()]].concat();

+ let new_next_data = [&diff_prev.1[..], &diff_next.1[..]].concat();

+ diffs[pointer].1 = new_current_data;

+ diffs[pointer + 1].1 = new_next_data;

+ diffs.remove(pointer - 1);

+ difflen = diffs.len();

+ changes = true;

+ } else if diff.1[..diff_next.1.len()] == diff_next.1 {

+ // Shift the edit over the next equality.

+ let mut next_data = diffs[pointer + 1].1.to_vec();

+ diffs[pointer - 1].1.append(&mut next_data);

+ diffs[pointer].1 = [

+ &diffs[pointer].1[diffs[pointer + 1].1.len()..],

+ &diffs[pointer + 1].1[..],

+ ]

+ .concat();

+ diffs.remove(pointer + 1);

+ difflen = diffs.len();

+ changes = true;

+ }

+ pointer += 1;

+ }

+ if changes {

+ Self::cleanup_merge(diffs);

+ }

}

@@ -551,7 +702,7 @@ impl DiffMatchPatch {

struct LineToChars<'a> {

chars_old: Vec<u8>,

chars_new: Vec<u8>,

- lines: Vec<&'a [u8]>

+ lines: Vec<&'a [u8]>,

}

impl DiffMatchPatch {

@@ -563,20 +714,25 @@ impl DiffMatchPatch {

// let mut maxlines = 5;

let mut maxlines = 40000;

let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, maxlines);

// This basically represents the U16::MAX value

maxlines = 65535;

// maxlines = 7;

let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, maxlines);

LineToChars {

chars_old,

chars_new,

- lines

- }

+ lines,

+ }

}

- fn lines_to_chars_internal<'a>(text: &'a [u8], array: &mut Vec<&'a [u8]>, hash: &mut HashMap<&'a [u8], usize>, maxlines: usize) -> Vec<u8> {

+ fn lines_to_chars_internal<'a>(

+ text: &'a [u8],

+ array: &mut Vec<&'a [u8]>,

+ hash: &mut HashMap<&'a [u8], usize>,

+ maxlines: usize,

+ ) -> Vec<u8> {

let take = maxlines - array.len();

println!("Take: {take}");

@@ -604,30 +760,32 @@ impl DiffMatchPatch {

}

- //

+ //

if broke.is_some() {

- let line = &text[cursor ..];

+ let line = &text[cursor..];

let e = hash.entry(line).or_insert(array.len());

array.push(line);

charlist.push(char::from_u32(*e as u32).unwrap());

}

let chars: String = charlist.iter().collect::<String>();

chars.as_bytes().to_vec()

}

fn chars_to_lines(diffs: &mut [Diff], lines: &[&[u8]]) {

- diffs.iter_mut()

- .for_each(|d| {

+ diffs.iter_mut().for_each(|d| {

let chars = String::from_utf8(d.1.to_vec()).unwrap();

// let mut txt = &[];

- let t = chars.chars().map(|c| {

- let idx: u32 = c.into();

- *lines.get(idx as usize).unwrap()

- }).collect::<Vec<_>>()

- .concat();

+ let t = chars

+ .chars()

+ .map(|c| {

+ let idx: u32 = c.into();

+ *lines.get(idx as usize).unwrap()

+ })

+ .collect::<Vec<_>>()

+ .concat();

d.1 = t;

});

@@ -641,7 +799,7 @@ impl DiffMatchPatch {

/// new: New string to be diffed.

/// deadline: Optional time when the diff should be complete by. Used

/// internally for recursive calls. Users should set DiffTimeout instead.

- ///

+ ///

/// Returns:

/// Vec of changes (Diff).

pub fn diff_main<'a>(&self, old: &'a str, new: &'a str) -> Vec<Diff> {

@@ -650,7 +808,6 @@ impl DiffMatchPatch {

// Reduce the number of edits by eliminating semantically trivial equalities

pub fn diff_cleanup_semantic(diffs: &mut [Diff]) {

todo!()

}

@@ -695,7 +852,6 @@ impl DiffMatchPatch {

}

#[cfg(test)]

mod tests {

use crate::dmp::{Diff, HalfMatch, LineToChars, Ops};

@@ -727,36 +883,51 @@ mod tests {

// 'testPatchApply'

// ];

#[test]

- fn test_diff_is_destructurable() {

- }

+ fn test_diff_is_destructurable() {}

#[test]

fn test_prefix() {

// Detect any common prefix.

// Null case.

- assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false));

+ assert_eq!(

+ 0,

+ DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false)

+ );

// Non-null case.

- assert_eq!(4, DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false));

+ assert_eq!(

+ 4,

+ DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false)

+ );

// Whole case.

- assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false));

+ assert_eq!(

+ 4,

+ DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false)

+ );

}

#[test]

fn test_suffix() {

// Detect any common suffix.

// Null case.

- assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true));

+ assert_eq!(

+ 0,

+ DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true)

+ );

// Non-null case.

- assert_eq!(4, DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true));

+ assert_eq!(

+ 4,

+ DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true)

+ );

// Whole case.

- assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true));

+ assert_eq!(

+ 4,

+ DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true)

+ );

}

#[test]

@@ -764,51 +935,114 @@ mod tests {

let mut dmp = DiffMatchPatch::default();

// No match

- assert!(dmp.half_match("1234567890".as_bytes(), "abcdef".as_bytes()).is_none());

- assert!(dmp.half_match("12345".as_bytes(), "23".as_bytes()).is_none());

+ assert!(dmp

+ .half_match("1234567890".as_bytes(), "abcdef".as_bytes())

+ .is_none());

+ assert!(dmp

+ .half_match("12345".as_bytes(), "23".as_bytes())

+ .is_none());

// Single Match.

assert_eq!(

- Some(HalfMatch { prefix_long: "12".as_bytes(), suffix_long: "90".as_bytes(), prefix_short: "a".as_bytes(), suffix_short: "z".as_bytes(), common: "345678".as_bytes() }),

+ Some(HalfMatch {

+ prefix_long: "12".as_bytes(),

+ suffix_long: "90".as_bytes(),

+ prefix_short: "a".as_bytes(),

+ suffix_short: "z".as_bytes(),

+ common: "345678".as_bytes()

+ }),

dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())

);

assert_eq!(

- Some(HalfMatch { prefix_long: "a".as_bytes(), suffix_long: "z".as_bytes(), prefix_short: "12".as_bytes(), suffix_short: "90".as_bytes(), common: "345678".as_bytes() }),

+ Some(HalfMatch {

+ prefix_long: "a".as_bytes(),

+ suffix_long: "z".as_bytes(),

+ prefix_short: "12".as_bytes(),

+ suffix_short: "90".as_bytes(),

+ common: "345678".as_bytes()

+ }),

dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())

);

assert_eq!(

- Some(HalfMatch { prefix_long: "abc".as_bytes(), suffix_long: "z".as_bytes(), prefix_short: "1234".as_bytes(), suffix_short: "0".as_bytes(), common: "56789".as_bytes() }),

+ Some(HalfMatch {

+ prefix_long: "abc".as_bytes(),

+ suffix_long: "z".as_bytes(),

+ prefix_short: "1234".as_bytes(),

+ suffix_short: "0".as_bytes(),

+ common: "56789".as_bytes()

+ }),

dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())

);

assert_eq!(

- Some(HalfMatch { prefix_long: "a".as_bytes(), suffix_long: "xyz".as_bytes(), prefix_short: "1".as_bytes(), suffix_short: "7890".as_bytes(), common: "23456".as_bytes() }),

+ Some(HalfMatch {

+ prefix_long: "a".as_bytes(),

+ suffix_long: "xyz".as_bytes(),

+ prefix_short: "1".as_bytes(),

+ suffix_short: "7890".as_bytes(),

+ common: "23456".as_bytes()

+ }),

dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())

);

// Multiple Matches.

assert_eq!(

- Some(HalfMatch { prefix_long: "12123".as_bytes(), suffix_long: "123121".as_bytes(), prefix_short: "a".as_bytes(), suffix_short: "z".as_bytes(), common: "1234123451234".as_bytes() }),

- dmp.half_match("121231234123451234123121".as_bytes(), "a1234123451234z".as_bytes())

+ Some(HalfMatch {

+ prefix_long: "12123".as_bytes(),

+ suffix_long: "123121".as_bytes(),

+ prefix_short: "a".as_bytes(),

+ suffix_short: "z".as_bytes(),

+ common: "1234123451234".as_bytes()

+ }),

+ dmp.half_match(

+ "121231234123451234123121".as_bytes(),

+ "a1234123451234z".as_bytes()

+ )

);

assert_eq!(

- Some(HalfMatch { prefix_long: "".as_bytes(), suffix_long: "-=-=-=-=-=".as_bytes(), prefix_short: "x".as_bytes(), suffix_short: "".as_bytes(), common: "x-=-=-=-=-=-=-=".as_bytes() }),

- dmp.half_match("x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(), "xx-=-=-=-=-=-=-=".as_bytes())

+ Some(HalfMatch {

+ prefix_long: "".as_bytes(),

+ suffix_long: "-=-=-=-=-=".as_bytes(),

+ prefix_short: "x".as_bytes(),

+ suffix_short: "".as_bytes(),

+ common: "x-=-=-=-=-=-=-=".as_bytes()

+ }),

+ dmp.half_match(

+ "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),

+ "xx-=-=-=-=-=-=-=".as_bytes()

+ )

);

assert_eq!(

- Some(HalfMatch { prefix_long: "-=-=-=-=-=".as_bytes(), suffix_long: "".as_bytes(), prefix_short: "".as_bytes(), suffix_short: "y".as_bytes(), common: "-=-=-=-=-=-=-=y".as_bytes() }),

- dmp.half_match("-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(), "-=-=-=-=-=-=-=yy".as_bytes())

+ Some(HalfMatch {

+ prefix_long: "-=-=-=-=-=".as_bytes(),

+ suffix_long: "".as_bytes(),

+ prefix_short: "".as_bytes(),

+ suffix_short: "y".as_bytes(),

+ common: "-=-=-=-=-=-=-=y".as_bytes()

+ }),

+ dmp.half_match(

+ "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),

+ "-=-=-=-=-=-=-=yy".as_bytes()

+ )

);

// Non-optimal halfmatch.

// Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy

assert_eq!(

- Some(HalfMatch { prefix_long: "qHillo".as_bytes(), suffix_long: "w".as_bytes(), prefix_short: "x".as_bytes(), suffix_short: "Hulloy".as_bytes(), common: "HelloHe".as_bytes() }),

+ Some(HalfMatch {

+ prefix_long: "qHillo".as_bytes(),

+ suffix_long: "w".as_bytes(),

+ prefix_short: "x".as_bytes(),

+ suffix_short: "Hulloy".as_bytes(),

+ common: "HelloHe".as_bytes()

+ }),

dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())

);

// Optimal no halfmatch.

dmp.timeout = Some(u64::MAX);

- assert!(dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()).is_none());

+ assert!(dmp

+ .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())

+ .is_none());

}

#[test]

@@ -818,19 +1052,30 @@ mod tests {

// Perform a trivial diff.

// Null case.

assert!(dmp.diff_main("", "").is_empty());

// Equality

- assert_eq!(vec![Diff::new(Ops::Equal, "abc".as_bytes())], dmp.diff_main("abc", "abc"));

+ assert_eq!(

+ vec![Diff::new(Ops::Equal, "abc".as_bytes())],

+ dmp.diff_main("abc", "abc")

+ );

// Simple insert

assert_eq!(

- vec![Diff::new(Ops::Equal, "ab".as_bytes()), Diff::new(Ops::Insert, "123".as_bytes()), Diff::new(Ops::Equal, "c".as_bytes())],

+ vec![

+ Diff::new(Ops::Equal, "ab".as_bytes()),

+ Diff::new(Ops::Insert, "123".as_bytes()),

+ Diff::new(Ops::Equal, "c".as_bytes())

+ ],

dmp.diff_main("abc", "ab123c")

);

// Simple delete

assert_eq!(

- vec![Diff::new(Ops::Equal, "a".as_bytes()), Diff::new(Ops::Delete, "123".as_bytes()), Diff::new(Ops::Equal, "bc".as_bytes())],

+ vec![

+ Diff::new(Ops::Equal, "a".as_bytes()),

+ Diff::new(Ops::Delete, "123".as_bytes()),

+ Diff::new(Ops::Equal, "bc".as_bytes())

+ ],

dmp.diff_main("a123bc", "abc")

);

@@ -846,104 +1091,147 @@ mod tests {

// dmp.diff_main("abc", "a123b456c")

// );

-// // Two insertions.

-// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false));

-// // Two deletions.

-// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false));

-// // Perform a real diff.

-// // Switch off the timeout.

-// dmp.Diff_Timeout = 0;

-// // Simple cases.

-// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false));

-// assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false));

-// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false));

-// // Overlaps.

-// assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false));

-// assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false));

-// assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false));

-// // Large equality.

-// assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false));

-// // Timeout.

-// dmp.Diff_Timeout = 0.1; // 100ms

-// var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';

-// var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';

-// // Increase the text lengths by 1024 times to ensure a timeout.

-// for (var i = 0; i < 10; i++) {

-// a += a;

-// b += b;

-// }

-// var startTime = (new Date()).getTime();

-// dmp.diff_main(a, b);

-// var endTime = (new Date()).getTime();

-// // Test that we took at least the timeout period.

-// assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime);

-// // Test that we didn't take forever (be forgiving).

-// // Theoretically this test could fail very occasionally if the

-// // OS task swaps or locks up for a second at the wrong moment.

-// assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime);

-// dmp.Diff_Timeout = 0;

-// // Test the linemode speedup.

-// // Must be long to pass the 100 char cutoff.

-// // Simple line-mode.

-// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';

-// b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n';

-// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));

-// // Single line-mode.

-// a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890';

-// b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij';

-// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));

-// // Overlap line-mode.

-// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';

-// b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n';

-// var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));

-// var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));

-// assertEquivalent(texts_textmode, texts_linemode);

-// // Test null inputs.

-// try {

-// dmp.diff_main(null, null);

-// assertEquals(Error, null);

-// } catch (e) {

-// // Exception expected.

-// }

+ // // Two insertions.

+ // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false));

+ // // Two deletions.

+ // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false));

+ // // Perform a real diff.

+ // // Switch off the timeout.

+ // dmp.Diff_Timeout = 0;

+ // // Simple cases.

+ // assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false));

+ // assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false));

+ // assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false));

+ // // Overlaps.

+ // assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false));

+ // assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false));

+ // assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false));

+ // // Large equality.

+ // assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false));

+ // // Timeout.

+ // dmp.Diff_Timeout = 0.1; // 100ms

+ // var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';

+ // var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';

+ // // Increase the text lengths by 1024 times to ensure a timeout.

+ // for (var i = 0; i < 10; i++) {

+ // a += a;

+ // b += b;

+ // }

+ // var startTime = (new Date()).getTime();

+ // dmp.diff_main(a, b);

+ // var endTime = (new Date()).getTime();

+ // // Test that we took at least the timeout period.

+ // assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime);

+ // // Test that we didn't take forever (be forgiving).

+ // // Theoretically this test could fail very occasionally if the

+ // // OS task swaps or locks up for a second at the wrong moment.

+ // assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime);

+ // dmp.Diff_Timeout = 0;

+ // // Test the linemode speedup.

+ // // Must be long to pass the 100 char cutoff.

+ // // Simple line-mode.

+ // a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';

+ // b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n';

+ // assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));

+ // // Single line-mode.

+ // a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890';

+ // b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij';

+ // assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));

+ // // Overlap line-mode.

+ // a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';

+ // b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n';

+ // var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));

+ // var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));

+ // assertEquivalent(texts_textmode, texts_linemode);

+ // // Test null inputs.

+ // try {

+ // dmp.diff_main(null, null);

+ // assertEquals(Error, null);

+ // } catch (e) {

+ // // Exception expected.

+ // }

}

#[test]

fn test_diff_lines_to_chars() {

// Convert lines down to characters.

assert_eq!(

- LineToChars { chars_old: [0_usize, 1, 0].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), chars_new: [1_usize, 0, 1].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"alpha\n", b"beta\n"] },

+ LineToChars {

+ chars_old: [0_usize, 1, 0]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>()

+ .as_bytes()

+ .to_vec(),

+ chars_new: [1_usize, 0, 1]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>()

+ .as_bytes()

+ .to_vec(),

+ lines: vec![b"alpha\n", b"beta\n"]

+ },

DiffMatchPatch::lines_to_chars(b"alpha\nbeta\nalpha\n", b"beta\nalpha\nbeta\n")

);

assert_eq!(

- LineToChars { chars_old: "".as_bytes().to_vec(), chars_new: [0_usize, 1, 2, 2].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"] },

+ LineToChars {

+ chars_old: "".as_bytes().to_vec(),

+ chars_new: [0_usize, 1, 2, 2]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>()

+ .as_bytes()

+ .to_vec(),

+ lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"]

+ },

DiffMatchPatch::lines_to_chars(b"", b"alpha\r\nbeta\r\n\r\n\r\n")

);

assert_eq!(

- LineToChars { chars_old: [0_usize].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), chars_new: [1_usize].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>().as_bytes().to_vec(), lines: vec![b"a", b"b"] },

+ LineToChars {

+ chars_old: [0_usize]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>()

+ .as_bytes()

+ .to_vec(),

+ chars_new: [1_usize]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>()

+ .as_bytes()

+ .to_vec(),

+ lines: vec![b"a", b"b"]

+ },

DiffMatchPatch::lines_to_chars(b"a", b"b")

);

// More than 256 to reveal any 8-bit limitations.

const TLIMIT: usize = 300;

- let linestr = (0 .. TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

- let linelist: Vec<&[u8]> = (0 .. TLIMIT).map(|i| linestr[i].as_bytes()).collect();

- let charlist = (0 .. TLIMIT).map(|i| char::from_u32(i as u32).unwrap()).collect::<String>();

+ let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

+ let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();

+ let charlist = (0..TLIMIT)

+ .map(|i| char::from_u32(i as u32).unwrap())

+ .collect::<String>();

assert_eq!(

- LineToChars { chars_old: charlist.as_bytes().to_vec(), chars_new: String::new().as_bytes().to_vec(), lines: linelist },

+ LineToChars {

+ chars_old: charlist.as_bytes().to_vec(),

+ chars_new: String::new().as_bytes().to_vec(),

+ lines: linelist

+ },

DiffMatchPatch::lines_to_chars(linestr.join("").as_bytes(), b"")

);

}

@@ -951,27 +1239,37 @@ mod tests {

#[test]

fn test_diff_chars_to_lines() {

// Convert chars up to lines.

- let d1 = [0_usize, 1, 0].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>();

- let d2 = [1_usize, 0, 1].iter().map(|i| char::from_u32(*i as u32).unwrap()).collect::<String>();

+ let d1 = [0_usize, 1, 0]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>();

+ let d2 = [1_usize, 0, 1]

+ .iter()

+ .map(|i| char::from_u32(*i as u32).unwrap())

+ .collect::<String>();

let mut diffs = [

Diff::new(Ops::Equal, d1.as_bytes()),

- Diff::new(Ops::Insert, d2.as_bytes())

+ Diff::new(Ops::Insert, d2.as_bytes()),

];

DiffMatchPatch::chars_to_lines(&mut diffs, &[b"alpha\n", b"beta\n"]);

- assert_eq!([

- Diff::new(Ops::Equal,b"alpha\nbeta\nalpha\n"),

- Diff::new(Ops::Insert, b"beta\nalpha\nbeta\n")

- ], diffs);

+ assert_eq!(

+ [

+ Diff::new(Ops::Equal, b"alpha\nbeta\nalpha\n"),

+ Diff::new(Ops::Insert, b"beta\nalpha\nbeta\n")

+ ],

+ diffs

+ );

// More than 256 to reveal any 8-bit limitations.

const TLIMIT: usize = 300;

- let charlist = (0 .. TLIMIT).map(|i| char::from_u32(i as u32).unwrap()).collect::<String>();

- let linestr = (0 .. TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

- let linelist: Vec<&[u8]> = (0 .. TLIMIT).map(|i| linestr[i].as_bytes()).collect();

+ let charlist = (0..TLIMIT)

+ .map(|i| char::from_u32(i as u32).unwrap())

+ .collect::<String>();

+ let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

+ let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();

let mut diffs = [Diff::new(Ops::Delete, charlist.as_bytes())];

DiffMatchPatch::chars_to_lines(&mut diffs, &linelist[..]);

@@ -980,10 +1278,10 @@ mod tests {

// More than 65536 to verify any 16-bit limitation.

const ULIMIT: usize = 10;

- let linestr = (0 .. ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

+ let linestr = (0..ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();

let lines = linestr.join("");

let l2c = DiffMatchPatch::lines_to_chars(lines.as_bytes(), b"");

let mut diffs = [Diff::new(Ops::Insert, &l2c.chars_old)];

DiffMatchPatch::chars_to_lines(&mut diffs, &l2c.lines);

@@ -995,58 +1293,128 @@ mod tests {

// Cleanup a messy diff.

// Null case.

let mut diffs = vec![];

- let mut test: Vec<Diff> = vec![];

+ let test: Vec<Diff> = vec![];

DiffMatchPatch::cleanup_merge(&mut diffs);

assert_eq!(test, diffs);

// No change case

- diffs = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Insert, b"c")];

- test = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Insert, b"c")];

+ let mut diffs = vec![

+ Diff::new(Ops::Equal, b"a"),

+ Diff::new(Ops::Delete, b"b"),

+ Diff::new(Ops::Insert, b"c"),

+ ];

+ let test = vec![

+ Diff::new(Ops::Equal, b"a"),

+ Diff::new(Ops::Delete, b"b"),

+ Diff::new(Ops::Insert, b"c"),

+ ];

DiffMatchPatch::cleanup_merge(&mut diffs);

assert_eq!(test, diffs);

// Merge equalities.

- diffs = vec![Diff::new(Ops::Equal, b"a"), Diff::new(Ops::Equal, b"b"), Diff::new(Ops::Equal, b"c")];

- test = vec![Diff::new(Ops::Equal, b"abc")];

+ let mut diffs = vec![

+ Diff::new(Ops::Equal, b"a"),

+ Diff::new(Ops::Equal, b"b"),

+ Diff::new(Ops::Equal, b"c"),

+ ];

+ let test = vec![Diff::new(Ops::Equal, b"abc")];

DiffMatchPatch::cleanup_merge(&mut diffs);

assert_eq!(test, diffs);

// Merge deletions.

- diffs = vec![Diff::new(Ops::Delete, b"a"), Diff::new(Ops::Delete, b"b"), Diff::new(Ops::Delete, b"c")];

- test = vec![Diff::new(Ops::Delete, b"abc")];

+ let mut diffs = vec![

+ Diff::new(Ops::Delete, b"a"),

+ Diff::new(Ops::Delete, b"b"),

+ Diff::new(Ops::Delete, b"c"),

+ ];

+ let test = vec![Diff::new(Ops::Delete, b"abc")];

DiffMatchPatch::cleanup_merge(&mut diffs);

assert_eq!(test, diffs);

// Merge insertions.

- diffs = vec![Diff::new(Ops::Insert, b"a"), Diff::new(Ops::Insert, b"b"), Diff::new(Ops::Insert, b"c")];

- test = vec![Diff::new(Ops::Insert, b"abc")];

+ let mut diffs = vec![

+ Diff::new(Ops::Insert, b"a"),

+ Diff::new(Ops::Insert, b"b"),

+ Diff::new(Ops::Insert, b"c"),

+ ];

+ let test = vec![Diff::new(Ops::Insert, b"abc")];

DiffMatchPatch::cleanup_merge(&mut diffs);

assert_eq!(test, diffs);

- // // Merge interweave.

- // diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b'], [DIFF_DELETE, 'c'], [DIFF_INSERT, 'd'], [DIFF_EQUAL, 'e'], [DIFF_EQUAL, 'f']];

- // dmp.diff_cleanupMerge(diffs);

- // assertEquivalent([[DIFF_DELETE, 'ac'], [DIFF_INSERT, 'bd'], [DIFF_EQUAL, 'ef']], diffs);

+ // Merge interweave.

+ let mut diffs = vec![

+ Diff::new(Ops::Delete, b"a"),

+ Diff::new(Ops::Insert, b"b"),

+ Diff::new(Ops::Delete, b"c"),

+ Diff::new(Ops::Insert, b"d"),

+ Diff::new(Ops::Equal, b"e"),

+ Diff::new(Ops::Equal, b"f"),

+ ];

+ let test = vec![

+ Diff::new(Ops::Delete, b"ac"),

+ Diff::new(Ops::Insert, b"bd"),

+ Diff::new(Ops::Equal, b"ef"),

+ ];

+ DiffMatchPatch::cleanup_merge(&mut diffs);

+ assert_eq!(test, diffs);

- // // Prefix and suffix detection.

- // diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc']];

- // dmp.diff_cleanupMerge(diffs);

- // assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'c']], diffs);

+ // Prefix and suffix detection.

+ let mut diffs = vec![

+ Diff::new(Ops::Delete, b"a"),

+ Diff::new(Ops::Insert, b"abc"),

+ Diff::new(Ops::Delete, b"dc")

+ ];

+ let test = vec![

+ Diff::new(Ops::Equal, b"a"),

+ Diff::new(Ops::Delete, b"d"),

+ Diff::new(Ops::Insert, b"b"),

+ Diff::new(Ops::Equal, b"c"),

+ ];

+ DiffMatchPatch::cleanup_merge(&mut diffs);

+ assert_eq!(test, diffs);

- // // Prefix and suffix detection with equalities.

- // diffs = [[DIFF_EQUAL, 'x'], [DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc'], [DIFF_EQUAL, 'y']];

- // dmp.diff_cleanupMerge(diffs);

- // assertEquivalent([[DIFF_EQUAL, 'xa'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'cy']], diffs);

+ // Prefix and suffix detection with equalities.

+ let mut diffs = vec![

+ Diff::new(Ops::Equal, b"x"),

+ Diff::new(Ops::Delete, b"a"),

+ Diff::new(Ops::Insert, b"abc"),

+ Diff::new(Ops::Delete, b"dc"),

+ Diff::new(Ops::Equal, b"y"),

+ ];

+ let test = vec![

+ Diff::new(Ops::Equal, b"xa"),

+ Diff::new(Ops::Delete, b"d"),

+ Diff::new(Ops::Insert, b"b"),

+ Diff::new(Ops::Equal, b"cy"),

+ ];

+ DiffMatchPatch::cleanup_merge(&mut diffs);

+ assert_eq!(test, diffs);

- // // Slide edit left.

- // diffs = [[DIFF_EQUAL, 'a'], [DIFF_INSERT, 'ba'], [DIFF_EQUAL, 'c']];

- // dmp.diff_cleanupMerge(diffs);

- // assertEquivalent([[DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'ac']], diffs);

+ // Slide edit left.

+ let mut diffs = vec![

+ Diff::new(Ops::Equal, b"a"),

+ Diff::new(Ops::Insert, b"ba"),

+ Diff::new(Ops::Equal, b"c"),

+ ];

+ let test = vec![

+ Diff::new(Ops::Insert, b"ab"),

+ Diff::new(Ops::Equal, b"ac"),

+ ];

+ DiffMatchPatch::cleanup_merge(&mut diffs);

+ assert_eq!(test, diffs);

- // // Slide edit right.

- // diffs = [[DIFF_EQUAL, 'c'], [DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'a']];

- // dmp.diff_cleanupMerge(diffs);

- // assertEquivalent([[DIFF_EQUAL, 'ca'], [DIFF_INSERT, 'ba']], diffs);

+ // Slide edit right

+ let mut diffs = vec![

+ Diff::new(Ops::Equal, b"c"),

+ Diff::new(Ops::Insert, b"ab"),

+ Diff::new(Ops::Equal, b"a"),

+ ];

+ let test = vec![

+ Diff::new(Ops::Equal, b"ca"),

+ Diff::new(Ops::Insert, b"ba"),

+ ];

+ DiffMatchPatch::cleanup_merge(&mut diffs);

+ assert_eq!(test, diffs);

// // Slide edit left recursive.

// diffs = [[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'b'], [DIFF_EQUAL, 'c'], [DIFF_DELETE, 'ac'], [DIFF_EQUAL, 'x']];

@@ -1068,4 +1436,4 @@ mod tests {

// dmp.diff_cleanupMerge(diffs);

// assertEquivalent([[DIFF_INSERT, 'a'], [DIFF_EQUAL, 'b']], diffs);

}

-} \ No newline at end of file