my fork of dmp
Diffstat (limited to 'src/dmp.rs')
| -rw-r--r-- | src/dmp.rs | 988 |
1 files changed, 102 insertions, 886 deletions
@@ -159,8 +159,8 @@ impl DiffMatchPatch { self.timeout = tout; } - // creates a deadline from the given timeout - fn deadline(&self) -> Option<NaiveTime> { + /// creates a deadline from the given timeout + pub fn deadline(&self) -> Option<NaiveTime> { self.timeout() .and_then(|t| Utc::now().checked_add_signed(TimeDelta::milliseconds(t))) .map(|t| t.time()) @@ -1649,7 +1649,7 @@ impl DiffMatchPatch { last_char2 + (loc - last_char1) } - fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> { + pub fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> { diffs .iter() .filter_map(|diff| { @@ -1663,7 +1663,7 @@ impl DiffMatchPatch { .concat() } - fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> { + pub fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> { diffs .iter() .filter_map(|diff| { @@ -2159,6 +2159,11 @@ impl Display for Patch { } } +impl Patch { + pub fn diffs(&self) -> &[Diff<u8>] { + &self.diffs[..] + } +} pub enum PatchInput<'a> { Texts(&'a str, &'a str), Diffs(&'a [Diff<u8>]), @@ -2983,18 +2988,7 @@ impl DiffMatchPatch { #[cfg(test)] mod tests { - use std::{collections::HashMap, time::Instant}; - - use chrono::Utc; - - use crate::dmp::{Diff, HalfMatch, LineToChars}; - - use super::{DiffMatchPatch, Ops, Patch, PatchInput}; - - // const tests = [ - // 'testDiffIsDestructurable', - // 'testDiffCleanupEfficiency', - // ]; + use crate::{dmp::{Diff, HalfMatch, LineToChars}, DiffMatchPatch, Error, Patch, PatchInput}; #[test] fn test_prefix() { @@ -3041,121 +3035,6 @@ mod tests { } #[test] - fn test_diff_half_match() { - let mut dmp = DiffMatchPatch::default(); - - // No match - assert!(dmp - .half_match("1234567890".as_bytes(), "abcdef".as_bytes()) - .is_none()); - assert!(dmp - .half_match("12345".as_bytes(), "23".as_bytes()) - .is_none()); - - // Single Match. - assert_eq!( - Some(HalfMatch { - prefix_long: "12".as_bytes(), - suffix_long: "90".as_bytes(), - prefix_short: "a".as_bytes(), - suffix_short: "z".as_bytes(), - common: "345678".as_bytes() - }), - dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes()) - ); - assert_eq!( - Some(HalfMatch { - prefix_long: "a".as_bytes(), - suffix_long: "z".as_bytes(), - prefix_short: "12".as_bytes(), - suffix_short: "90".as_bytes(), - common: "345678".as_bytes() - }), - dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes()) - ); - assert_eq!( - Some(HalfMatch { - prefix_long: "abc".as_bytes(), - suffix_long: "z".as_bytes(), - prefix_short: "1234".as_bytes(), - suffix_short: "0".as_bytes(), - common: "56789".as_bytes() - }), - dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes()) - ); - assert_eq!( - Some(HalfMatch { - prefix_long: "a".as_bytes(), - suffix_long: "xyz".as_bytes(), - prefix_short: "1".as_bytes(), - suffix_short: "7890".as_bytes(), - common: "23456".as_bytes() - }), - dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes()) - ); - - // Multiple Matches. - assert_eq!( - Some(HalfMatch { - prefix_long: "12123".as_bytes(), - suffix_long: "123121".as_bytes(), - prefix_short: "a".as_bytes(), - suffix_short: "z".as_bytes(), - common: "1234123451234".as_bytes() - }), - dmp.half_match( - "121231234123451234123121".as_bytes(), - "a1234123451234z".as_bytes() - ) - ); - assert_eq!( - Some(HalfMatch { - prefix_long: "".as_bytes(), - suffix_long: "-=-=-=-=-=".as_bytes(), - prefix_short: "x".as_bytes(), - suffix_short: "".as_bytes(), - common: "x-=-=-=-=-=-=-=".as_bytes() - }), - dmp.half_match( - "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(), - "xx-=-=-=-=-=-=-=".as_bytes() - ) - ); - assert_eq!( - Some(HalfMatch { - prefix_long: "-=-=-=-=-=".as_bytes(), - suffix_long: "".as_bytes(), - prefix_short: "".as_bytes(), - suffix_short: "y".as_bytes(), - common: "-=-=-=-=-=-=-=y".as_bytes() - }), - dmp.half_match( - "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(), - "-=-=-=-=-=-=-=yy".as_bytes() - ) - ); - - // Non-optimal halfmatch. - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assert_eq!( - Some(HalfMatch { - prefix_long: "qHillo".as_bytes(), - suffix_long: "w".as_bytes(), - prefix_short: "x".as_bytes(), - suffix_short: "Hulloy".as_bytes(), - common: "HelloHe".as_bytes() - }), - dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()) - ); - - // Optimal no halfmatch. - dmp.timeout = None; - assert!(dmp - .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()) - .is_none()); - } - - #[test] fn test_diff_lines_to_chars() { // Convert lines down to characters. assert_eq!( @@ -3599,31 +3478,6 @@ mod tests { } #[test] - fn test_diff_levenshtein() { - let dmp = DiffMatchPatch::new(); - let diffs = vec![ - Diff::delete(b"abc"), - Diff::insert(b"1234"), - Diff::equal(b"xyz"), - ]; - assert_eq!(4, dmp.diff_levenshtein(&diffs)); - - let diffs = vec![ - Diff::equal(b"xyz"), - Diff::delete(b"abc"), - Diff::insert(b"1234"), - ]; - assert_eq!(4, dmp.diff_levenshtein(&diffs)); - - let diffs = vec![ - Diff::delete(b"abc"), - Diff::equal(b"xyz"), - Diff::insert(b"1234"), - ]; - assert_eq!(7, dmp.diff_levenshtein(&diffs)); - } - - #[test] fn test_diff_x_index() { // Translate a location in text1 to text2. let diffs = vec![ @@ -3682,401 +3536,118 @@ mod tests { } #[test] - fn test_diff_bisect() -> Result<(), crate::errors::Error> { + fn test_diff_half_match() { let mut dmp = DiffMatchPatch::default(); - // Normal. - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - assert_eq!( - vec![ - Diff::delete(b"c"), - Diff::insert(b"m"), - Diff::equal(b"a"), - Diff::delete(b"t"), - Diff::insert(b"p") - ], - dmp.bisect(b"cat", b"map", None)? - ); - - // Timeout. - dmp.timeout = Some(0); - let deadline = dmp.deadline(); - assert_eq!( - vec![Diff::delete(b"cat"), Diff::insert(b"map"),], - dmp.bisect(b"cat", b"map", deadline)? - ); - - Ok(()) - } - - #[test] - fn test_diff_pretty_html() -> Result<(), crate::errors::Error> { - let dmp = DiffMatchPatch::new(); - // Basic - let diffs = [ - Diff::equal(b"a\n"), - Diff::delete(b"<B>b</B>"), - Diff::insert(b"c&d"), - ]; - assert_eq!("<span>a¶<br></span><del style=\"background:#ffe6e6;\"><B>b</B></del><ins style=\"background:#e6ffe6;\">c&d</ins>", dmp.diff_pretty_html(&diffs)?); - - // Monkey busiess around Emoticons and extended utf-8 π€ͺπ€©π€ - // This gave me a lot of heart-burn - - // Case 1. Two similar emoticons - // In bytes representation, these would have the last u8 different - // Which means the the diff should an equality block of 3 bytes folloed by insert and delete - let old = "π€ͺ"; // [240, 159, 164, 170] - let new = "π€"; // [240, 159, 164, 148] - let diffs = dmp.diff_main(old, new)?; - assert_eq!( - "<span></span><del style=\"background:#ffe6e6;\">π€ͺ</del><ins style=\"background:#e6ffe6;\">π€</ins>", - dmp.diff_pretty_html(&diffs)? - ); - - // Now Case 1. but with some text before and after - let old = "I'm puzzledπ€ͺ or am I?"; - let new = "I'm puzzledπ€ or thinking I guess!"; - let diffs = dmp.diff_main(old, new)?; - assert_eq!( - "<span>I'm puzzled</span><del style=\"background:#ffe6e6;\">π€ͺ</del><ins style=\"background:#e6ffe6;\">π€</ins><span> or </span><del style=\"background:#ffe6e6;\">am I?</del><ins style=\"background:#e6ffe6;\">thinking I guess!</ins>", - dmp.diff_pretty_html(&diffs)? - ); - - // Case 2. Emoticons with the third position different - let old = "π"; // [240, 159, 141, 138] - let new = "π"; // [240, 159, 140, 138] - let diffs = dmp.diff_main(old, new)?; - assert_eq!( - "<span></span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins>", - dmp.diff_pretty_html(&diffs)? - ); - - // Now Case 2. but with some text, lets complicate this - let old = "π, aah orange is the new black!"; // [240, 159, 141, 138] - let new = "Aah orange!πis the new π"; // [240, 159, 140, 138] - let diffs = dmp.diff_main(old, new)?; - assert_eq!( - "<del style=\"background:#ffe6e6;\">π, a</del><ins style=\"background:#e6ffe6;\">A</ins><span>ah orange</span><del style=\"background:#ffe6e6;\"> </del><ins style=\"background:#e6ffe6;\">!π</ins><span>is the new </span><del style=\"background:#ffe6e6;\">black!</del><ins style=\"background:#e6ffe6;\">π</ins>", - dmp.diff_pretty_html(&diffs)? - ); + // No match + assert!(dmp + .half_match("1234567890".as_bytes(), "abcdef".as_bytes()) + .is_none()); + assert!(dmp + .half_match("12345".as_bytes(), "23".as_bytes()) + .is_none()); - // Case 3. with second and third different, but lets complicate this with an equality - let old = "π "; // [240, 160, 140, 138] - let new = "π "; // [240, 150, 160, 138] - let diffs = dmp.diff_main(old, new)?; + // Single Match. assert_eq!( - "<span></span><ins style=\"background:#e6ffe6;\">π </ins><del style=\"background:#ffe6e6;\">π </del>", - dmp.diff_pretty_html(&diffs)? + Some(HalfMatch { + prefix_long: "12".as_bytes(), + suffix_long: "90".as_bytes(), + prefix_short: "a".as_bytes(), + suffix_short: "z".as_bytes(), + common: "345678".as_bytes() + }), + dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes()) ); - - // Case 3. but let there be a swap - let old = "π "; // [240, 158, 160, 132] - let new = std::str::from_utf8(&[240, 160, 158, 132]).unwrap(); // basically an undefined element `π `. Should still work - let diffs = dmp.diff_main(old, new)?; assert_eq!( - "<span></span><del style=\"background:#ffe6e6;\">π </del><ins style=\"background:#e6ffe6;\">π </ins>", - dmp.diff_pretty_html(&diffs)? + Some(HalfMatch { + prefix_long: "a".as_bytes(), + suffix_long: "z".as_bytes(), + prefix_short: "12".as_bytes(), + suffix_short: "90".as_bytes(), + common: "345678".as_bytes() + }), + dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes()) ); - - // Case 4. swap at the last 2 positions - let old = "π"; // [240, 159, 141, 140] -- FINALLY A BANANA - let new = "π"; // [240, 159, 140, 141] -- interesting revelation - last 2 bytes swapped and π becomes π. Guess the world is going `Bananas!!` - let diffs = dmp.diff_main(old, new)?; assert_eq!( - "<span></span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins>", - dmp.diff_pretty_html(&diffs)? + Some(HalfMatch { + prefix_long: "abc".as_bytes(), + suffix_long: "z".as_bytes(), + prefix_short: "1234".as_bytes(), + suffix_short: "0".as_bytes(), + common: "56789".as_bytes() + }), + dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes()) ); - - // Let's do this with a slightly longish string - let old = "Now, let's explore some emotional extremes π.\nWe've got your ecstatic face π€©, your devastated face π, and your utterly confused face π€―. But that's not all! π€ We've also got some subtle emotions like π, π, and π."; - let new = "Let's start with some basics π.\nWe've got your standard smiley face π, your sad face βΉοΈ, and your angry face π . But wait, there's more! π€© We've also got some more complex emotions like π, π€€, and π. And let's not forget about the classics: π, π, and π."; - let diffs = dmp.diff_main(old, new)?; - assert_eq!( - "<del style=\"background:#ffe6e6;\">Now, let's explore some emotional extreme</del><ins style=\"background:#e6ffe6;\">Let's start with some basic</ins><span>s </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>.¶<br>We've got your </span><del style=\"background:#ffe6e6;\">ec</del><span>sta</span><del style=\"background:#ffe6e6;\">tic</del><ins style=\"background:#e6ffe6;\">ndard smiley</ins><span> face </span><del style=\"background:#ffe6e6;\">π€©</del><ins style=\"background:#e6ffe6;\">π</ins><span>, your </span><del style=\"background:#ffe6e6;\">devastate</del><ins style=\"background:#e6ffe6;\">sa</ins><span>d face </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">βΉοΈ</ins><span>, and your </span><del style=\"background:#ffe6e6;\">utterly confused</del><ins style=\"background:#e6ffe6;\">angry</ins><span> face </span><del style=\"background:#ffe6e6;\">π€―</del><ins style=\"background:#e6ffe6;\">π </ins><span>. But </span><del style=\"background:#ffe6e6;\">that's not all</del><ins style=\"background:#e6ffe6;\">wait, there's more</ins><span>! </span><del style=\"background:#ffe6e6;\">π€</del><ins style=\"background:#e6ffe6;\">π€©</ins><span> We've also got some </span><del style=\"background:#ffe6e6;\">subt</del><ins style=\"background:#e6ffe6;\">more comp</ins><span>le</span><ins style=\"background:#e6ffe6;\">x</ins><span> emotions like </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π, π€€, and π. And let's not forget about the classics: π</ins><span>, </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>, and </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>.</span>", - dmp.diff_pretty_html(&diffs)? + Some(HalfMatch { + prefix_long: "a".as_bytes(), + suffix_long: "xyz".as_bytes(), + prefix_short: "1".as_bytes(), + suffix_short: "7890".as_bytes(), + common: "23456".as_bytes() + }), + dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes()) ); - Ok(()) - } - - #[test] - fn test_diff_main() -> Result<(), crate::errors::Error> { - let mut dmp = DiffMatchPatch::default(); - - // Perform a trivial diff. - // Null case. - assert!(dmp.diff_main("", "")?.is_empty()); - - // Equality - assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?); - - // Simple insert + // Multiple Matches. assert_eq!( - vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")], - dmp.diff_main("abc", "ab123c")? + Some(HalfMatch { + prefix_long: "12123".as_bytes(), + suffix_long: "123121".as_bytes(), + prefix_short: "a".as_bytes(), + suffix_short: "z".as_bytes(), + common: "1234123451234".as_bytes() + }), + dmp.half_match( + "121231234123451234123121".as_bytes(), + "a1234123451234z".as_bytes() + ) ); - - // Simple delete assert_eq!( - vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")], - dmp.diff_main("a123bc", "abc")? + Some(HalfMatch { + prefix_long: "".as_bytes(), + suffix_long: "-=-=-=-=-=".as_bytes(), + prefix_short: "x".as_bytes(), + suffix_short: "".as_bytes(), + common: "x-=-=-=-=-=-=-=".as_bytes() + }), + dmp.half_match( + "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(), + "xx-=-=-=-=-=-=-=".as_bytes() + ) ); - - // Two insertions assert_eq!( - vec![ - Diff::equal(b"a"), - Diff::insert(b"123"), - Diff::equal(b"b"), - Diff::insert(b"456"), - Diff::equal(b"c"), - ], - dmp.diff_main("abc", "a123b456c")? + Some(HalfMatch { + prefix_long: "-=-=-=-=-=".as_bytes(), + suffix_long: "".as_bytes(), + prefix_short: "".as_bytes(), + suffix_short: "y".as_bytes(), + common: "-=-=-=-=-=-=-=y".as_bytes() + }), + dmp.half_match( + "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(), + "-=-=-=-=-=-=-=yy".as_bytes() + ) ); - // Two deletions. + // Non-optimal halfmatch. + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy assert_eq!( - vec![ - Diff::equal(b"a"), - Diff::delete(b"123"), - Diff::equal(b"b"), - Diff::delete(b"456"), - Diff::equal(b"c"), - ], - dmp.diff_main("a123b456c", "abc")? + Some(HalfMatch { + prefix_long: "qHillo".as_bytes(), + suffix_long: "w".as_bytes(), + prefix_short: "x".as_bytes(), + suffix_short: "Hulloy".as_bytes(), + common: "HelloHe".as_bytes() + }), + dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()) ); - // Perform a real diff. - // Switch off the timeout. + // Optimal no halfmatch. dmp.timeout = None; - // Simple cases. - assert_eq!( - vec![Diff::delete(b"a"), Diff::insert(b"b"),], - dmp.diff_main("a", "b")? - ); - - assert_eq!( - vec![ - Diff::delete(b"Apple"), - Diff::insert(b"Banana"), - Diff::equal(b"s are a"), - Diff::insert(b"lso"), - Diff::equal(b" fruit.") - ], - dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")? - ); - - assert_eq!( - vec![ - Diff::delete(b"a"), - Diff::insert("\u{0680}".as_bytes()), - Diff::equal(b"x"), - Diff::delete(b"\t"), - Diff::insert(b"\0") - ], - dmp.diff_main("ax\t", "\u{0680}x\0")? - ); - - // Overlaps. - assert_eq!( - vec![ - Diff::delete(b"1"), - Diff::equal(b"a"), - Diff::delete(b"y"), - Diff::equal(b"b"), - Diff::delete(b"2"), - Diff::insert(b"xab"), - ], - dmp.diff_main("1ayb2", "abxab")? - ); - - assert_eq!( - vec![ - Diff::insert(b"xaxcx"), - Diff::equal(b"abc"), - Diff::delete(b"y"), - ], - dmp.diff_main("abcy", "xaxcxabc")? - ); - - assert_eq!( - vec![ - Diff::delete(b"ABCD"), - Diff::equal(b"a"), - Diff::delete(b"="), - Diff::insert(b"-"), - Diff::equal(b"bcd"), - Diff::delete(b"="), - Diff::insert(b"-"), - Diff::equal(b"efghijklmnopqrs"), - Diff::delete(b"EFGHIJKLMNOefg"), - ], - dmp.diff_main( - "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", - "a-bcd-efghijklmnopqrs" - )? - ); - - // Large equality. - assert_eq!( - vec![ - Diff::insert(b" "), - Diff::equal(b"a"), - Diff::insert(b"nd"), - Diff::equal(b" [[Hepatopancreatic]]"), - Diff::delete(b" and [[New"), - ], - dmp.diff_main( - "a [[Hepatopancreatic]] and [[New", - " and [[Hepatopancreatic]]" - )? - ); - - // Timeout. - const LOW_TIMEOUT: u32 = 100; - dmp.set_timeout(Some(LOW_TIMEOUT)); - let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); - let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); - - let start = Utc::now().time(); - dmp.diff_main(&a, &b)?; - let end = Utc::now().time(); - // Test that we took at least the timeout period (+ 5ms being generous). - assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - // Simple line-mode. - dmp.timeout = Some(1000); - let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - dmp.set_checklines(false); - let res_no_lm = dmp.diff_main(a, b)?; - dmp.set_checklines(true); - let res_yes_lm = dmp.diff_main(a, b)?; - - // Now, we'll run 2 checks - one for result equality - assert_eq!(res_no_lm, res_yes_lm); - - // Single line-mode. - let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - dmp.set_checklines(false); - let yes_lm = dmp.diff_main(a, b)?; - dmp.set_checklines(true); - let no_lm = dmp.diff_main(a, b)?; - assert_eq!(no_lm, yes_lm); - - // Overlap line-mode. - let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - dmp.set_checklines(false); - let no_lm = dmp.diff_main(a, b)?; - dmp.set_checklines(true); - let yes_lm = dmp.diff_main(a, b)?; - assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); - - // Benefits of checklines can only be realized in text with many lines - let mut dmp = DiffMatchPatch::default(); - let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap(); - let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap(); - - let start = Instant::now(); - let diff_yes_lm = dmp.diff_main(&old, &new); - let yes_lm_dur = Instant::now() - start; - assert!(diff_yes_lm.is_ok()); - - dmp.set_checklines(false); - let start = Instant::now(); - let diff_no_lm = dmp.diff_main(&old, &new); - let no_lm_dur = Instant::now() - start; - assert!(diff_no_lm.is_ok()); - - assert!(no_lm_dur > yes_lm_dur); - Ok(()) - } - - #[test] - fn test_diff_delta() -> Result<(), crate::errors::Error> { - let diffs = vec![ - Diff::equal(b"jump"), - Diff::delete(b"s"), - Diff::insert(b"ed"), - Diff::equal(b" over "), - Diff::delete(b"the"), - Diff::insert(b"a"), - Diff::equal(b" lazy"), - Diff::insert(b"old dog"), - ]; - let txt_old = "jumps over the lazy".as_bytes(); - assert_eq!( - txt_old, - DiffMatchPatch::diff_text_old(&diffs) - ); - - let delta = DiffMatchPatch::to_delta(&diffs); - assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta); - // Convert delta string into a diff. - assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?); - - // Generates error (19 != 20). - assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err()); - - // Generates error (19 != 18). - assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err()); - - // Test deltas with special characters. - let diffs = vec![ - Diff::equal("\u{0680} \x00 \t %".as_bytes()), - Diff::delete("\u{0681} \x01 \n ^".as_bytes()), - Diff::insert("\u{0682} \x02 \\ |".as_bytes()) - ]; - let txt_old = DiffMatchPatch::diff_text_old(&diffs); - assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old); - let delta = DiffMatchPatch::to_delta(&diffs); - - assert_eq!(b"=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]); - // Convert delta string into a diff. - assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?); - - // Verify pool of unchanged characters. - let diffs = vec![ - Diff::insert("A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes()) - ]; - let txt_new = DiffMatchPatch::diff_text_new(&diffs); - assert_eq!("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&txt_new).unwrap()); - - let delta = DiffMatchPatch::to_delta(&diffs); - assert_eq!("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&delta).unwrap()); - - // Convert delta string into a diff. - assert_eq!(diffs, DiffMatchPatch::from_delta("".as_bytes(), &delta)?); - Ok(()) - } - - // Helper to construct the two texts which made up the diff originally. - fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), crate::errors::Error> { - let mut txt1 = vec![]; - let mut txt2 = vec![]; - - diffs.iter().for_each(|d| { - if d.op() != Ops::Insert { - txt1.push(d.data()); - } - - if d.op() != Ops::Delete { - txt2.push(d.data()); - } - }); - - Ok(( - String::from_utf8(txt1.concat()).map_err(|_| crate::errors::Error::Utf8Error)?, - String::from_utf8(txt2.concat()).map_err(|_| crate::errors::Error::Utf8Error)?, - )) + assert!(dmp + .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes()) + .is_none()); } #[test] @@ -4103,7 +3674,7 @@ mod tests { } #[test] - fn test_patch_add_context() -> Result<(), crate::errors::Error> { + fn test_patch_add_context() -> Result<(), Error> { let dmp = DiffMatchPatch::default(); let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?; @@ -4145,115 +3716,6 @@ mod tests { } #[test] - fn test_patch_from_text() -> Result<(), crate::errors::Error> { - let dmp = DiffMatchPatch::new(); - - assert!(dmp.patch_from_text("")?.is_empty()); - - let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assert_eq!( - strp, - dmp.patch_from_text( - strp - )?[0].to_string() - ); - - assert_eq!( - "@@ -1 +1 @@\n-a\n+b\n", - dmp.patch_from_text("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string() - ); - - assert_eq!( - "@@ -1,3 +0,0 @@\n-abc\n", - dmp.patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string() - ); - - assert_eq!( - "@@ -0,0 +1,3 @@\n+abc\n", - dmp.patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string() - ); - - // Generates error. - assert!(dmp.patch_from_text("Bad\nPatch\n").is_err()); - - Ok(()) - } - - #[test] - fn test_patch_to_text() -> Result<(), crate::errors::Error> { - let dmp = DiffMatchPatch::new(); - - let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - let patches = dmp.patch_from_text(strp)?; - assert_eq!(strp, dmp.patch_to_text(&patches)); - - let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - let patches = dmp.patch_from_text(strp)?; - assert_eq!(strp, dmp.patch_to_text(&patches)); - - Ok(()) - } - - #[test] - fn test_patch_make() -> Result<(), crate::errors::Error> { - let dmp = DiffMatchPatch::default(); - let patches = dmp.patch_make(super::PatchInput::Texts("", ""))?; - assert!(patches.is_empty()); - - let txt1 = "The quick brown fox jumps over the lazy dog."; - let txt2 = "That quick brown fox jumped over a lazy dog."; - - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt2, txt1))?; - assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches)); - - // Text1+Text2 inputs. - let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt1, txt2))?; - assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); - - // Diff input. - let diffs = dmp.diff_main(txt1, txt2)?; - let patches = dmp.patch_make(crate::dmp::PatchInput::Diffs(&diffs[..]))?; - assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); - - // Text1+Diff inputs. - let patches = dmp.patch_make(crate::dmp::PatchInput::TextDiffs(txt1, &diffs[..]))?; - assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); - - // Character encoding. - let patches = dmp.patch_make(crate::dmp::PatchInput::Texts( - "`1234567890-=[]\\;',./", - "~!@#$%^&*()_+{}|:\"<>?", - ))?; - - assert_eq!( - "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", - dmp.patch_to_text(&patches) - ); - - // Character decoding. - let diffs = vec![ - Diff::delete(b"`1234567890-=[]\\;',./"), - Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?"), - ]; - assert_eq!( - diffs, - dmp.patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")?[0].diffs - ); - - // Long string with repeats. - let txt1 = vec!["abcdef"; 100].join(""); - let txt2 = [&txt1, "123"].join(""); - let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(&txt1, &txt2))?; - assert_eq!( - "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n", - dmp.patch_to_text(&patches) - ); - - Ok(()) - } - - #[test] fn test_parse_patch_header() { assert_eq!( Some((21, Some(4), 21, Some(10))), @@ -4270,29 +3732,7 @@ mod tests { } #[test] - fn test_diff_text() { - let diffs = vec![ - Diff::equal(b"jump"), - Diff::delete(b"s"), - Diff::insert(b"ed"), - Diff::equal(b" over "), - Diff::delete(b"the"), - Diff::insert(b"a"), - Diff::equal(b" lazy"), - ]; - - assert_eq!( - b"jumps over the lazy", - &DiffMatchPatch::diff_text_old(&diffs[..])[..] - ); - assert_eq!( - b"jumped over a lazy", - &DiffMatchPatch::diff_text_new(&diffs[..])[..] - ); - } - - #[test] - fn test_patch_add_padding() -> Result<(), crate::errors::Error> { + fn test_patch_add_padding() -> Result<(), Error> { let dmp = DiffMatchPatch::default(); // Both edges full. let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?; @@ -4336,7 +3776,7 @@ mod tests { } #[test] - fn test_patch_split_max() -> Result<(), crate::errors::Error> { + fn test_patch_split_max() -> Result<(), Error> { let dmp = DiffMatchPatch::default(); // Assumes that dmp.Match_MaxBits is 32. @@ -4380,228 +3820,4 @@ mod tests { Ok(()) } - - #[test] - fn test_patch_apply() -> Result<(), crate::errors::Error> { - let mut dmp = DiffMatchPatch::default(); - - let patches = dmp.patch_make(PatchInput::Texts("", ""))?; - let (txt, results) = dmp.patch_apply_internal(&patches, b"Hello world.")?; - assert_eq!( - format!("{}\t{}", std::str::from_utf8(&txt).unwrap(), results.len()), - "Hello world.\t0" - ); - - let patches = dmp.patch_make(PatchInput::Texts( - "The quick brown fox jumps over the lazy dog.", - "That quick brown fox jumped over a lazy dog.", - ))?; - - // Exact match - assert_eq!( - ( - b"That quick brown fox jumped over a lazy dog.".to_vec(), - vec![true, true] - ), - dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")? - ); - - // Partial match - assert_eq!( - ( - b"That quick red rabbit jumped over a tired tiger.".to_vec(), - vec![true, true] - ), - dmp.patch_apply_internal( - &patches, - b"The quick red rabbit jumps over the tired tiger." - )? - ); - - // Failed match - assert_eq!( - ( - b"I am the very model of a modern major general.".to_vec(), - vec![false, false] - ), - dmp.patch_apply_internal(&patches, b"I am the very model of a modern major general.")? - ); - - // Big delete, small change - let patches = dmp.patch_make(PatchInput::Texts( - "x1234567890123456789012345678901234567890123456789012345678901234567890y", - "xabcy", - ))?; - assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y")?); - - // Big delete, large change - let patches = dmp.patch_make(PatchInput::Texts( - "x1234567890123456789012345678901234567890123456789012345678901234567890y", - "xabcy", - ))?; - assert_eq!((b"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y".to_vec(), vec![false, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?); - - dmp.delete_threshold = 0.6; - let patches = dmp.patch_make(PatchInput::Texts( - "x1234567890123456789012345678901234567890123456789012345678901234567890y", - "xabcy", - ))?; - assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?); - dmp.delete_threshold = 0.5; - - // Compesate for failed patch - dmp.match_threshold = 0.; - dmp.match_distance = 0; - let patches = dmp.patch_make(PatchInput::Texts( - "abcdefghijklmnopqrstuvwxyz--------------------1234567890", - "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890", - ))?; - assert_eq!( - ( - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_vec(), - vec![false, true] - ), - dmp.patch_apply_internal( - &patches, - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" - )? - ); - dmp.match_threshold = 0.5; - dmp.match_distance = 1000; - - // No side-effects - kinds useless cos patches is not mutable in rust - let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; - let srcstr = dmp.patch_to_text(&patches); - dmp.patch_apply_internal(&patches, b"")?; - assert_eq!(srcstr, dmp.patch_to_text(&patches)); - - let patches = dmp.patch_make(PatchInput::Texts( - "The quick brown fox jumps over the lazy dog.", - "Woof", - ))?; - let srcstr = dmp.patch_to_text(&patches); - dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?; - assert_eq!(srcstr, dmp.patch_to_text(&patches)); - - // Edge exact match - let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; - assert_eq!( - (b"test".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"")? - ); - - // Near edge exact match - let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?; - assert_eq!( - (b"XtestY".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"XY")? - ); - - // Edge partial match - let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?; - assert_eq!( - (b"x123".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"x")? - ); - - Ok(()) - } - - #[test] - fn test_match_alphabet() { - // Initialise the bitmasks for Bitap. - // Unique. - assert_eq!( - HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]), - DiffMatchPatch::match_alphabet(b"abc") - ); - - // Duplicates. - assert_eq!( - HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]), - DiffMatchPatch::match_alphabet(b"abcaba") - ) - } - - #[test] - fn test_match_bitap() { - // Bitap algorithm. - let mut dmp = DiffMatchPatch { - match_distance: 100, - ..Default::default() - }; - - // Exact matches. - assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5)); - assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0)); - - // Fuzzy matches. - assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0)); - assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5)); - assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1)); - - // Overflow. - assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2)); - - // Threshold test. - dmp.match_threshold = 0.4; - assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); - - // dmp.`match_threshold` = 0.3; - dmp.match_threshold = 0.3; - assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); - - dmp.match_threshold = 0.; - assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1)); - - dmp.match_threshold = 0.5; - - // Multiple select. - assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3)); - assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5)); - - // Distance test. - dmp.match_distance = 10; - assert_eq!( - None, - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) - ); - assert_eq!( - Some(0), - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1) - ); - - dmp.match_distance = 1000; - assert_eq!( - Some(0), - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) - ); - } - - #[test] - fn test_match_main() { - let dmp = DiffMatchPatch::default(); - // Full match. - // Shortcut matches. - assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdef", 1000)); - assert_eq!(None, dmp.match_internal(b"", b"abcdef", 1)); - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"", 3)); - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"de", 3)); - - // Beyond end match. - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"defy", 4)); - - // Oversized pattern. - assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdefy", 0)); - - // Complex match. - assert_eq!( - Some(4), - dmp.match_internal( - b"I am the very model of a modern major general.", - b" that berry ", - 5 - ) - ); - } } |