WIP: Bisect - dmp - my fork of dmp

my fork of dmp

master 1Branch 0Tags

Clone

HTTPS

SSH

Open with VS Code

WIP: Bisect

Browse Source

Anubhab Bandyopadhyay 2024-08-07

parent 6c1dc0e · commit a7368c2

Diffstat

-rw-r--r--

src/dmp.rs

631

1 files changed, 423 insertions, 208 deletions

diff --git a/src/dmp.rs b/src/dmp.rs
index 63f9bf8..16131a9 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs

@@ -357,6 +357,7 @@ impl DiffMatchPatch {

}

fn diff_bisect<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Vec<Diff> {

+ // anubhab

todo!()

}

@@ -454,6 +455,44 @@ impl DiffMatchPatch {

pointmid

}

+ fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize {

+ if lhs.is_empty() || rhs.is_empty() {

+ return 0;

+ }

+ let minlen = lhs.len().min(rhs.len());

+ // A working set with longer string truncated

+ let l = if lhs.len() > rhs.len() { &lhs[lhs.len() - rhs.len() ..] } else { lhs };

+ let r = if lhs.len() < rhs.len() { &rhs[..lhs.len()] } else { rhs };

+ // Quick check for the worst case.

+ if l == r {

+ return minlen;

+ }

+ // Start by looking for a single character match

+ // and increase length until no match is found.

+ // Performance analysis: https://neil.fraser.name/news/2010/11/04/

+ let mut len = 1;

+ let mut best = 0;

+ loop {

+ let pattern = &l[minlen - len ..];

+ let found = if let Some(found) = r.windows(pattern.len()).step_by(1).position(|p| p == pattern) {

+ found

+ } else {

+ return best;

+ };

+ len += found;

+ if found == 0 || l[minlen - len ..] == r[..len] {

+ best = len;

+ len += 1;

+ }

// Reduce the number of edits by eliminating semantically trivial equalities

fn cleanup_semantic(diffs: &mut Vec<Diff>) {

let mut changes = false;

@@ -544,192 +583,180 @@ impl DiffMatchPatch {

}

Self::cleanup_semantic_lossless(diffs);

- // var changes = false;

- // var equalities = []; // Stack of indices where equalities are found.

- // var equalitiesLength = 0; // Keeping our own length var is faster in JS.

- // /** @type {?string} */

- // var lastEquality = null;

- // // Always equal to diffs[equalities[equalitiesLength - 1]][1]

- // var pointer = 0; // Index of current position.

- // // Number of characters that changed prior to the equality.

- // var length_insertions1 = 0;

- // var length_deletions1 = 0;

- // // Number of characters that changed after the equality.

- // var length_insertions2 = 0;

- // var length_deletions2 = 0;

- // // Normalize the diff.

- // if (changes) {

- // this.diff_cleanupMerge(diffs);

- // }

- // this.diff_cleanupSemanticLossless(diffs);

- // // Find any overlaps between deletions and insertions.

- // // e.g: <del>abcxxx</del><ins>xxxdef</ins>

- // // -> <del>abc</del>xxx<ins>def</ins>

- // // e.g: <del>xxxabc</del><ins>defxxx</ins>

- // // -> <ins>def</ins>xxx<del>abc</del>

- // // Only extract an overlap if it is as big as the edit ahead or behind it.

- // pointer = 1;

- // while (pointer < diffs.length) {

- // if (diffs[pointer - 1][0] == DIFF_DELETE &&

- // diffs[pointer][0] == DIFF_INSERT) {

- // var deletion = diffs[pointer - 1][1];

- // var insertion = diffs[pointer][1];

- // var overlap_length1 = this.diff_commonOverlap_(deletion, insertion);

- // var overlap_length2 = this.diff_commonOverlap_(insertion, deletion);

- // if (overlap_length1 >= overlap_length2) {

- // if (overlap_length1 >= deletion.length / 2 ||

- // overlap_length1 >= insertion.length / 2) {

- // // Overlap found. Insert an equality and trim the surrounding edits.

- // diffs.splice(pointer, 0, new diff_match_patch.Diff(DIFF_EQUAL,

- // insertion.substring(0, overlap_length1)));

- // diffs[pointer - 1][1] =

- // deletion.substring(0, deletion.length - overlap_length1);

- // diffs[pointer + 1][1] = insertion.substring(overlap_length1);

- // pointer++;

- // }

- // } else {

- // if (overlap_length2 >= deletion.length / 2 ||

- // overlap_length2 >= insertion.length / 2) {

- // // Reverse overlap found.

- // // Insert an equality and swap and trim the surrounding edits.

- // diffs.splice(pointer, 0, new diff_match_patch.Diff(DIFF_EQUAL,

- // deletion.substring(0, overlap_length2)));

- // diffs[pointer - 1][0] = DIFF_INSERT;

- // diffs[pointer - 1][1] =

- // insertion.substring(0, insertion.length - overlap_length2);

- // diffs[pointer + 1][0] = DIFF_DELETE;

- // diffs[pointer + 1][1] =

- // deletion.substring(overlap_length2);

- // pointer++;

- // }

- // pointer++;

- // }

- // pointer++;

- // }

+ difflen = diffs.len();

+ // Find any overlaps between deletions and insertions.

+ // e.g: <del>abcxxx</del><ins>xxxdef</ins>

+ // -> <del>abc</del>xxx<ins>def</ins>

+ // e.g: <del>xxxabc</del><ins>defxxx</ins>

+ // -> <ins>def</ins>xxx<del>abc</del>

+ // Only extract an overlap if it is as big as the edit ahead or behind it.

+ pointer = 1;

+ while difflen > 0 && pointer < difflen {

+ if diffs[pointer - 1].0 == Ops::Delete && diffs[pointer].0 == Ops::Insert {

+ let delete = diffs[pointer - 1].1.to_vec();

+ let insert = diffs[pointer].1.to_vec();

+ let delete_thres = delete.len() / 2 + if delete.len() % 2 > 0 { 1 } else { 0 };

+ let insert_thres = insert.len() / 2 + if insert.len() % 2 > 0 { 1 } else { 0 };

+ let overlap_1 = Self::common_overlap(&delete[..], &insert[..]);

+ let overlap_2 = Self::common_overlap(&insert[..], &delete[..]);

+ if overlap_1 >= overlap_2 &&

+ (overlap_1 >= delete_thres ||

+ overlap_1 >= insert_thres) {

+ // Overlap found. Insert an equality and trim the surrounding edits.

+ diffs.insert(pointer, Diff::equal(&insert[..overlap_1]));

+ diffs[pointer - 1].1 = delete[.. delete.len() - overlap_1].to_vec();

+ diffs[pointer + 1].1 = insert[overlap_1 ..].to_vec();

+ difflen = diffs.len();

+ pointer += 1;

+ } else if overlap_2 >= delete_thres || overlap_2 >= insert_thres {

+ // Reverse overlap

+ // Insert equality and swap and trim surrounding edits

+ diffs.insert(pointer, Diff::equal(&delete[.. overlap_2]));

+ diffs[pointer - 1] = Diff::insert(&insert[.. insert.len() - overlap_2]);

+ diffs[pointer + 1] = Diff::delete(&delete[overlap_2 ..]);

+ difflen = diffs.len();

+ pointer += 1;

+ }

+ pointer += 1;

+ }

+ pointer += 1;

+ }

}

// Look for single edits surrounded on both sides by equalities

// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.

fn cleanup_semantic_lossless(diffs: &mut Vec<Diff>) {

- // /**

- // * Given two strings, compute a score representing whether the internal

- // * boundary falls on logical boundaries.

- // * Scores range from 6 (best) to 0 (worst).

- // * Closure, but does not reference any external variables.

- // * @param {string} one First string.

- // * @param {string} two Second string.

- // * @return {number} The score.

- // * @private

- // */

- // function diff_cleanupSemanticScore_(one, two) {

- // if (!one || !two) {

- // // Edges are the best.

- // return 6;

- // }

- // // Each port of this function behaves slightly differently due to

- // // subtle differences in each language's definition of things like

- // // 'whitespace'. Since this function's purpose is largely cosmetic,

- // // the choice has been made to use each language's native features

- // // rather than force total conformity.

- // var char1 = one.charAt(one.length - 1);

- // var char2 = two.charAt(0);

- // var nonAlphaNumeric1 = char1.match(diff_match_patch.nonAlphaNumericRegex_);

- // var nonAlphaNumeric2 = char2.match(diff_match_patch.nonAlphaNumericRegex_);

- // var whitespace1 = nonAlphaNumeric1 &&

- // char1.match(diff_match_patch.whitespaceRegex_);

- // var whitespace2 = nonAlphaNumeric2 &&

- // char2.match(diff_match_patch.whitespaceRegex_);

- // var lineBreak1 = whitespace1 &&

- // char1.match(diff_match_patch.linebreakRegex_);

- // var lineBreak2 = whitespace2 &&

- // char2.match(diff_match_patch.linebreakRegex_);

- // var blankLine1 = lineBreak1 &&

- // one.match(diff_match_patch.blanklineEndRegex_);

- // var blankLine2 = lineBreak2 &&

- // two.match(diff_match_patch.blanklineStartRegex_);

- // if (blankLine1 || blankLine2) {

- // // Five points for blank lines.

- // return 5;

- // } else if (lineBreak1 || lineBreak2) {

- // // Four points for line breaks.

- // return 4;

- // } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {

- // // Three points for end of sentences.

- // return 3;

- // } else if (whitespace1 || whitespace2) {

- // // Two points for whitespace.

- // return 2;

- // } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {

- // // One point for non-alphanumeric.

- // return 1;

- // }

- // return 0;

- // }

- // var pointer = 1;

- // // Intentionally ignore the first and last element (don't need checking).

- // while (pointer < diffs.length - 1) {

- // if (diffs[pointer - 1][0] == DIFF_EQUAL &&

- // diffs[pointer + 1][0] == DIFF_EQUAL) {

- // // This is a single edit surrounded by equalities.

- // var equality1 = diffs[pointer - 1][1];

- // var edit = diffs[pointer][1];

- // var equality2 = diffs[pointer + 1][1];

- // // First, shift the edit as far left as possible.

- // var commonOffset = this.diff_commonSuffix(equality1, edit);

- // if (commonOffset) {

- // var commonString = edit.substring(edit.length - commonOffset);

- // equality1 = equality1.substring(0, equality1.length - commonOffset);

- // edit = commonString + edit.substring(0, edit.length - commonOffset);

- // equality2 = commonString + equality2;

- // }

- // // Second, step character by character right, looking for the best fit.

- // var bestEquality1 = equality1;

- // var bestEdit = edit;

- // var bestEquality2 = equality2;

- // var bestScore = diff_cleanupSemanticScore_(equality1, edit) +

- // diff_cleanupSemanticScore_(edit, equality2);

- // while (edit.charAt(0) === equality2.charAt(0)) {

- // equality1 += edit.charAt(0);

- // edit = edit.substring(1) + equality2.charAt(0);

- // equality2 = equality2.substring(1);

- // var score = diff_cleanupSemanticScore_(equality1, edit) +

- // diff_cleanupSemanticScore_(edit, equality2);

- // // The >= encourages trailing rather than leading whitespace on edits.

- // if (score >= bestScore) {

- // bestScore = score;

- // bestEquality1 = equality1;

- // bestEdit = edit;

- // bestEquality2 = equality2;

- // }

- // if (diffs[pointer - 1][1] != bestEquality1) {

- // // We have an improvement, save it back to the diff.

- // if (bestEquality1) {

- // diffs[pointer - 1][1] = bestEquality1;

- // } else {

- // diffs.splice(pointer - 1, 1);

- // pointer--;

- // }

- // diffs[pointer][1] = bestEdit;

- // if (bestEquality2) {

- // diffs[pointer + 1][1] = bestEquality2;

- // } else {

- // diffs.splice(pointer + 1, 1);

- // pointer--;

- // }

- // pointer++;

- // }

+ let mut pointer = 1_usize;

+ let mut difflen = diffs.len();

+ // Intentionally ignore the first and last element (don't need checking).

+ while difflen > 0 && pointer < difflen - 1 {

+ // an edit surrounded by equalities

+ if diffs[pointer - 1].0 == Ops::Equal && diffs[pointer + 1].0 == Ops::Equal {

+ let mut equality_prev = diffs[pointer - 1].1.clone();

+ let mut edit = diffs[pointer].1.clone();

+ let mut equality_next = diffs[pointer + 1].1.clone();

+ // Shift the edit as far left as possible

+ let commonlen = Self::common_prefix(&equality_prev[..], &edit[..], true);

+ if commonlen > 0 {

+ let mut common_prev = edit[edit.len() - commonlen ..].to_vec();

+ let mut common_next = common_prev.clone();

+ equality_prev = equality_prev[.. equality_prev.len() - commonlen].to_vec();

+ common_prev.append(&mut edit[.. edit.len() - commonlen].to_vec());

+ edit = common_prev;

+ common_next.append(&mut equality_next.to_vec());

+ equality_next = common_next;

+ }

+ // Step byte by byte right looking for the best fit

+ let mut best_equality_prev = equality_prev.clone();

+ let mut best_edit = edit.clone();

+ let mut best_equality_next = equality_next.clone();

+ let mut best_score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..])

+ + Self::cleanup_semantic_score(&edit[..], &equality_next[..]);

+ while !edit.is_empty() && !equality_next.is_empty() && edit[0] == equality_next[0] {

+ equality_prev.push(edit[0]);

+ edit.remove(0);

+ edit.push(equality_next[0]);

+ equality_next.remove(0);

+ let score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..]) +

+ Self::cleanup_semantic_score(&edit[..], &equality_next[..]);

+ // The >= encourages trailing rather than leading whitespace on edits.

+ if score >= best_score {

+ best_score = score;

+ best_equality_prev.clone_from(&equality_prev);

+ best_edit.clone_from(&edit);

+ best_equality_next.clone_from(&equality_next);

+ }

+ // We have an improvement, save it back to the diff.

+ if diffs[pointer - 1].1 != best_equality_prev {

+ if !best_equality_prev.is_empty() {

+ diffs[pointer - 1].1 = best_equality_prev.to_vec();

+ } else {

+ diffs.remove(pointer - 1);

+ pointer -= 1;

+ difflen = diffs.len();

+ }

+ diffs[pointer].1 = best_edit.to_vec();

+ if !best_equality_next.is_empty() {

+ diffs[pointer + 1].1 = best_equality_next.to_vec();

+ } else {

+ diffs.remove(pointer + 1);

+ pointer -= 1;

+ difflen = diffs.len();

+ }

+ pointer += 1;

+ }

+ // Given two strings, compute a score representing whether the internal

+ // boundary falls on logical boundaries

+ // Scores range from 6 (best) to 0 (worst)

+ fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 {

+ let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {

+ (char1 as char, char2 as char)

+ } else {

+ return 6;

+ };

+ // Each port of this function behaves slightly differently due to

+ // subtle differences in each language's definition of things like

+ // 'whitespace'. Since this function's purpose is largely cosmetic,

+ // the choice has been made to use each language's native features

+ // rather than force total conformity.

+ let whitespace_1 = char1.is_whitespace();

+ let whitespace_2 = char2.is_whitespace();

+ let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r');

+ let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r');

+ let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n")));

+ let blankline_2 = linebreak_2 && (two.starts_with(b"\r\n\n") || two.starts_with(b"\r\n\r\n") || two.starts_with(b"\n\r\n") || two.starts_with(b"\n\n"));

+ // println!("Non-alphanum: [{} {}] Whitespace: [{whitespace_1} {whitespace_2}] Linebreak: [{linebreak_1} {linebreak_2}] Blankline: [{blankline_1} {blankline_2}] Blanklinematch: [{} {}]", !char1.is_alphanumeric(), !char2.is_alphanumeric(), one.ends_with(b"\n"), (two.starts_with(b"\n") || two.starts_with(b"\r")));

+ if blankline_1 || blankline_2 {

+ // 5 for blank lines

+ 5

+ } else if linebreak_1 || linebreak_2 {

+ // Four points for line breaks.

+ 4

+ } else if !char1.is_alphanumeric() && !whitespace_1 && whitespace_2 {

+ // Three points for end of sentences.

+ 3

+ } else if whitespace_1 || whitespace_2 {

+ // 2 for whitespace

+ 2

+ } else if !char1.is_alphanumeric() || !char2.is_alphanumeric() {

+ // 1 for not alphanumeric

+ 1

+ } else {

+ 0

+ }

}

// Reorder and merge like edit sections. Merge equalities.

@@ -1068,15 +1095,12 @@ mod tests {

// const tests = [

// 'testDiffIsDestructurable', // TODO

- // 'testDiffCommonOverlap',

- // 'testDiffCleanupSemanticLossless',

// 'testDiffCleanupEfficiency',

// 'testDiffPrettyHtml',

// 'testDiffText',

// 'testDiffDelta',

// 'testDiffXIndex',

// 'testDiffLevenshtein',

- // 'testDiffBisect',

// 'testMatchAlphabet',

// 'testMatchBitap',

// 'testMatchMain',

@@ -1561,7 +1585,7 @@ mod tests {

}

#[test]

- fn test_diff_cleanup_semantic() {

+ fn test_diff_cleanup_semantic_overall() {

// Cleanup semantically trivial equalities.

// Null case.

let mut diffs = vec![];

@@ -1661,29 +1685,220 @@ mod tests {

];

DiffMatchPatch::cleanup_semantic(&mut diffs);

assert_eq!(test, diffs);

- // diffs = [[DIFF_EQUAL, 'The c'], [DIFF_DELETE, 'ow and the c'], [DIFF_EQUAL, 'at.']];

- // dmp.diff_cleanupSemantic(diffs);

- // assertEquivalent([[DIFF_EQUAL, 'The '], [DIFF_DELETE, 'cow and the '], [DIFF_EQUAL, 'cat.']], diffs);

- // // No overlap elimination.

- // diffs = [[DIFF_DELETE, 'abcxx'], [DIFF_INSERT, 'xxdef']];

- // dmp.diff_cleanupSemantic(diffs);

- // assertEquivalent([[DIFF_DELETE, 'abcxx'], [DIFF_INSERT, 'xxdef']], diffs);

- // // Overlap elimination.

- // diffs = [[DIFF_DELETE, 'abcxxx'], [DIFF_INSERT, 'xxxdef']];

- // dmp.diff_cleanupSemantic(diffs);

- // assertEquivalent([[DIFF_DELETE, 'abc'], [DIFF_EQUAL, 'xxx'], [DIFF_INSERT, 'def']], diffs);

- // // Reverse overlap elimination.

- // diffs = [[DIFF_DELETE, 'xxxabc'], [DIFF_INSERT, 'defxxx']];

- // dmp.diff_cleanupSemantic(diffs);

- // assertEquivalent([[DIFF_INSERT, 'def'], [DIFF_EQUAL, 'xxx'], [DIFF_DELETE, 'abc']], diffs);

- // // Two overlap eliminations.

- // diffs = [[DIFF_DELETE, 'abcd1212'], [DIFF_INSERT, '1212efghi'], [DIFF_EQUAL, '----'], [DIFF_DELETE, 'A3'], [DIFF_INSERT, '3BC']];

- // dmp.diff_cleanupSemantic(diffs);

- // assertEquivalent([[DIFF_DELETE, 'abcd'], [DIFF_EQUAL, '1212'], [DIFF_INSERT, 'efghi'], [DIFF_EQUAL, '----'], [DIFF_DELETE, 'A'], [DIFF_EQUAL, '3'], [DIFF_INSERT, 'BC']], diffs);

+ // No overlap elimination.

+ let mut diffs = vec![

+ Diff::delete(b"abcxx"),

+ Diff::insert(b"xxdef"),

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::delete(b"abcxx"),

+ Diff::insert(b"xxdef"),

+ ];

+ DiffMatchPatch::cleanup_semantic(&mut diffs);

+ assert_eq!(test, diffs);

+ // Overlap elimination.

+ let mut diffs = vec![

+ Diff::delete(b"abcxxx"),

+ Diff::insert(b"xxxdef"),

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::delete(b"abc"),

+ Diff::equal(b"xxx"),

+ Diff::insert(b"def")

+ ];

+ DiffMatchPatch::cleanup_semantic(&mut diffs);

+ assert_eq!(test, diffs);

+ // Reverse overlap elimination.

+ let mut diffs = vec![

+ Diff::delete(b"xxxabc"),

+ Diff::insert(b"defxxx"),

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::insert(b"def"),

+ Diff::equal(b"xxx"),

+ Diff::delete(b"abc")

+ ];

+ DiffMatchPatch::cleanup_semantic(&mut diffs);

+ assert_eq!(test, diffs);

+ // Two overlap eliminations.

+ let mut diffs = vec![

+ Diff::delete(b"abcd1212"),

+ Diff::insert(b"1212efghi"),

+ Diff::equal(b"----"),

+ Diff::delete(b"A3"),

+ Diff::insert(b"3BC"),

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::delete(b"abcd"),

+ Diff::equal(b"1212"),

+ Diff::insert(b"efghi"),

+ Diff::equal(b"----"),

+ Diff::delete(b"A"),

+ Diff::equal(b"3"),

+ Diff::insert(b"BC"),

+ ];

+ DiffMatchPatch::cleanup_semantic(&mut diffs);

+ assert_eq!(test, diffs);

+ }

+ #[test]

+ fn test_diff_cleanup_semantic_lossless() {

+ // Slide diffs to match logical boundaries.

+ // Null case.

+ let mut diffs: Vec<Diff> = vec![];

+ let test: Vec<Diff> = vec![];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Blank lines.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"AAA\r\n\r\nBBB"),

+ Diff::insert(b"\r\nDDD\r\n\r\nBBB"),

+ Diff::equal(b"\r\nEEE")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"AAA\r\n\r\n"),

+ Diff::insert(b"BBB\r\nDDD\r\n\r\n"),

+ Diff::equal(b"BBB\r\nEEE")

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Line boundaries.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"AAA\r\nBBB"),

+ Diff::insert(b" DDD\r\nBBB"),

+ Diff::equal(b" EEE")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"AAA\r\n"),

+ Diff::insert(b"BBB DDD\r\n"),

+ Diff::equal(b"BBB EEE")

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Word boundaries.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"The c"),

+ Diff::insert(b"ow and the c"),

+ Diff::equal(b"at.")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"The "),

+ Diff::insert(b"cow and the "),

+ Diff::equal(b"cat.")

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Alphanumeric boundaries.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"The-c"),

+ Diff::insert(b"ow-and-the-c"),

+ Diff::equal(b"at.")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"The-"),

+ Diff::insert(b"cow-and-the-"),

+ Diff::equal(b"cat.")

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Hitting the start.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"a"),

+ Diff::delete(b"a"),

+ Diff::equal(b"ax")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::delete(b"a"),

+ Diff::equal(b"aax")

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Hitting the end.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"xa"),

+ Diff::delete(b"a"),

+ Diff::equal(b"a")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"xaa"),

+ Diff::delete(b"a"),

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ // Sentence boundaries.

+ let mut diffs: Vec<Diff> = vec![

+ Diff::equal(b"The xxx. The "),

+ Diff::insert(b"zzz. The "),

+ Diff::equal(b"yyy.")

+ ];

+ let test: Vec<Diff> = vec![

+ Diff::equal(b"The xxx."),

+ Diff::insert(b" The zzz."),

+ Diff::equal(b" The yyy."),

+ ];

+ DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);

+ assert_eq!(test, diffs);

+ }

+ #[test]

+ fn test_diff_common_overlap() {

+ // Detect any suffix/prefix overlap.

+ // Null case

+ assert_eq!(0, DiffMatchPatch::common_overlap(b"", b"abcd"));

+ // Whole case.

+ assert_eq!(3, DiffMatchPatch::common_overlap(b"abc", b"abcd"));

+ // No overlap.

+ assert_eq!(0, DiffMatchPatch::common_overlap(b"123456", b"abcd"));

+ // Overlap.

+ assert_eq!(3, DiffMatchPatch::common_overlap(b"123456xxx", b"xxxabcd"));

+ // Unicode.

+ // Some overly clever languages (C#) may treat ligatures as equal to their

+ // component letters. E.g. U+FB01 == 'fi'

+ assert_eq!(0, DiffMatchPatch::common_overlap(b"fi", "\u{7FFF}".as_bytes()));

+ // Complete overlap

+ assert_eq!(6, DiffMatchPatch::common_overlap("❤️".as_bytes(), "❤️".as_bytes()));

+ // Partial unicode overlap

+ assert_eq!(0, DiffMatchPatch::common_overlap("ஆ".as_bytes(), "இ".as_bytes()));

+ assert_eq!(3, DiffMatchPatch::common_overlap("ஆஇ".as_bytes(), "இ".as_bytes()));

+ }

+ #[test]

+ fn test_diff_bisect() {

+ // Normal.

+ // var a = 'cat';

+ // var b = 'map';

+ // Since the resulting diff hasn't been normalized, it would be ok if

+ // the insertion and deletion pairs are swapped.

+ // If the order changes, tweak this test as required.

+ // assert_eq!(vec![

+ // Diff::delete(b"c"),

+ // Diff::insert(b"m"),

+ // Diff::equal(b"a"),

+ // Diff::delete(b"t"),

+ // Diff::insert(b"p")

+ // ], Self::diff_bisect());

+ // assertEquivalent([[DIFF_DELETE, 'c'], [DIFF_INSERT, 'm'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 't'], [DIFF_INSERT, 'p']], dmp.diff_bisect_(a, b, Number.MAX_VALUE));

+ // // Timeout.

+ // assertEquivalent([[DIFF_DELETE, 'cat'], [DIFF_INSERT, 'map']], dmp.diff_bisect_(a, b, 0));

}

#[test]