my fork of dmp
WIP: Separating pub-api tests
Anubhab Bandyopadhyay 2024-08-23
parent 37b33dd · commit a20624d
-rw-r--r--src/dmp.rs988
-rw-r--r--src/lib.rs2
-rw-r--r--tests/test.rs791
3 files changed, 894 insertions, 887 deletions
diff --git a/src/dmp.rs b/src/dmp.rs
index 41b46ef..28821e4 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -159,8 +159,8 @@ impl DiffMatchPatch {
self.timeout = tout;
}
- // creates a deadline from the given timeout
- fn deadline(&self) -> Option<NaiveTime> {
+ /// creates a deadline from the given timeout
+ pub fn deadline(&self) -> Option<NaiveTime> {
self.timeout()
.and_then(|t| Utc::now().checked_add_signed(TimeDelta::milliseconds(t)))
.map(|t| t.time())
@@ -1649,7 +1649,7 @@ impl DiffMatchPatch {
last_char2 + (loc - last_char1)
}
- fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> {
+ pub fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> {
diffs
.iter()
.filter_map(|diff| {
@@ -1663,7 +1663,7 @@ impl DiffMatchPatch {
.concat()
}
- fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> {
+ pub fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> {
diffs
.iter()
.filter_map(|diff| {
@@ -2159,6 +2159,11 @@ impl Display for Patch {
}
}
+impl Patch {
+ pub fn diffs(&self) -> &[Diff<u8>] {
+ &self.diffs[..]
+ }
+}
pub enum PatchInput<'a> {
Texts(&'a str, &'a str),
Diffs(&'a [Diff<u8>]),
@@ -2983,18 +2988,7 @@ impl DiffMatchPatch {
#[cfg(test)]
mod tests {
- use std::{collections::HashMap, time::Instant};
-
- use chrono::Utc;
-
- use crate::dmp::{Diff, HalfMatch, LineToChars};
-
- use super::{DiffMatchPatch, Ops, Patch, PatchInput};
-
- // const tests = [
- // 'testDiffIsDestructurable',
- // 'testDiffCleanupEfficiency',
- // ];
+ use crate::{dmp::{Diff, HalfMatch, LineToChars}, DiffMatchPatch, Error, Patch, PatchInput};
#[test]
fn test_prefix() {
@@ -3041,121 +3035,6 @@ mod tests {
}
#[test]
- fn test_diff_half_match() {
- let mut dmp = DiffMatchPatch::default();
-
- // No match
- assert!(dmp
- .half_match("1234567890".as_bytes(), "abcdef".as_bytes())
- .is_none());
- assert!(dmp
- .half_match("12345".as_bytes(), "23".as_bytes())
- .is_none());
-
- // Single Match.
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "12".as_bytes(),
- suffix_long: "90".as_bytes(),
- prefix_short: "a".as_bytes(),
- suffix_short: "z".as_bytes(),
- common: "345678".as_bytes()
- }),
- dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())
- );
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "a".as_bytes(),
- suffix_long: "z".as_bytes(),
- prefix_short: "12".as_bytes(),
- suffix_short: "90".as_bytes(),
- common: "345678".as_bytes()
- }),
- dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())
- );
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "abc".as_bytes(),
- suffix_long: "z".as_bytes(),
- prefix_short: "1234".as_bytes(),
- suffix_short: "0".as_bytes(),
- common: "56789".as_bytes()
- }),
- dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())
- );
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "a".as_bytes(),
- suffix_long: "xyz".as_bytes(),
- prefix_short: "1".as_bytes(),
- suffix_short: "7890".as_bytes(),
- common: "23456".as_bytes()
- }),
- dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())
- );
-
- // Multiple Matches.
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "12123".as_bytes(),
- suffix_long: "123121".as_bytes(),
- prefix_short: "a".as_bytes(),
- suffix_short: "z".as_bytes(),
- common: "1234123451234".as_bytes()
- }),
- dmp.half_match(
- "121231234123451234123121".as_bytes(),
- "a1234123451234z".as_bytes()
- )
- );
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "".as_bytes(),
- suffix_long: "-=-=-=-=-=".as_bytes(),
- prefix_short: "x".as_bytes(),
- suffix_short: "".as_bytes(),
- common: "x-=-=-=-=-=-=-=".as_bytes()
- }),
- dmp.half_match(
- "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),
- "xx-=-=-=-=-=-=-=".as_bytes()
- )
- );
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "-=-=-=-=-=".as_bytes(),
- suffix_long: "".as_bytes(),
- prefix_short: "".as_bytes(),
- suffix_short: "y".as_bytes(),
- common: "-=-=-=-=-=-=-=y".as_bytes()
- }),
- dmp.half_match(
- "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),
- "-=-=-=-=-=-=-=yy".as_bytes()
- )
- );
-
- // Non-optimal halfmatch.
- // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
- assert_eq!(
- Some(HalfMatch {
- prefix_long: "qHillo".as_bytes(),
- suffix_long: "w".as_bytes(),
- prefix_short: "x".as_bytes(),
- suffix_short: "Hulloy".as_bytes(),
- common: "HelloHe".as_bytes()
- }),
- dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
- );
-
- // Optimal no halfmatch.
- dmp.timeout = None;
- assert!(dmp
- .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
- .is_none());
- }
-
- #[test]
fn test_diff_lines_to_chars() {
// Convert lines down to characters.
assert_eq!(
@@ -3599,31 +3478,6 @@ mod tests {
}
#[test]
- fn test_diff_levenshtein() {
- let dmp = DiffMatchPatch::new();
- let diffs = vec![
- Diff::delete(b"abc"),
- Diff::insert(b"1234"),
- Diff::equal(b"xyz"),
- ];
- assert_eq!(4, dmp.diff_levenshtein(&diffs));
-
- let diffs = vec![
- Diff::equal(b"xyz"),
- Diff::delete(b"abc"),
- Diff::insert(b"1234"),
- ];
- assert_eq!(4, dmp.diff_levenshtein(&diffs));
-
- let diffs = vec![
- Diff::delete(b"abc"),
- Diff::equal(b"xyz"),
- Diff::insert(b"1234"),
- ];
- assert_eq!(7, dmp.diff_levenshtein(&diffs));
- }
-
- #[test]
fn test_diff_x_index() {
// Translate a location in text1 to text2.
let diffs = vec![
@@ -3682,401 +3536,118 @@ mod tests {
}
#[test]
- fn test_diff_bisect() -> Result<(), crate::errors::Error> {
+ fn test_diff_half_match() {
let mut dmp = DiffMatchPatch::default();
- // Normal.
- // Since the resulting diff hasn't been normalized, it would be ok if
- // the insertion and deletion pairs are swapped.
- // If the order changes, tweak this test as required.
- assert_eq!(
- vec![
- Diff::delete(b"c"),
- Diff::insert(b"m"),
- Diff::equal(b"a"),
- Diff::delete(b"t"),
- Diff::insert(b"p")
- ],
- dmp.bisect(b"cat", b"map", None)?
- );
-
- // Timeout.
- dmp.timeout = Some(0);
- let deadline = dmp.deadline();
- assert_eq!(
- vec![Diff::delete(b"cat"), Diff::insert(b"map"),],
- dmp.bisect(b"cat", b"map", deadline)?
- );
-
- Ok(())
- }
-
- #[test]
- fn test_diff_pretty_html() -> Result<(), crate::errors::Error> {
- let dmp = DiffMatchPatch::new();
- // Basic
- let diffs = [
- Diff::equal(b"a\n"),
- Diff::delete(b"<B>b</B>"),
- Diff::insert(b"c&d"),
- ];
- assert_eq!("<span>a&para;<br></span><del style=\"background:#ffe6e6;\">&lt;B&gt;b&lt;/B&gt;</del><ins style=\"background:#e6ffe6;\">c&amp;d</ins>", dmp.diff_pretty_html(&diffs)?);
-
- // Monkey busiess around Emoticons and extended utf-8 πŸ€ͺπŸ€©πŸ€”
- // This gave me a lot of heart-burn
-
- // Case 1. Two similar emoticons
- // In bytes representation, these would have the last u8 different
- // Which means the the diff should an equality block of 3 bytes folloed by insert and delete
- let old = "πŸ€ͺ"; // [240, 159, 164, 170]
- let new = "πŸ€”"; // [240, 159, 164, 148]
- let diffs = dmp.diff_main(old, new)?;
- assert_eq!(
- "<span></span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins>",
- dmp.diff_pretty_html(&diffs)?
- );
-
- // Now Case 1. but with some text before and after
- let old = "I'm puzzledπŸ€ͺ or am I?";
- let new = "I'm puzzledπŸ€” or thinking I guess!";
- let diffs = dmp.diff_main(old, new)?;
- assert_eq!(
- "<span>I'm puzzled</span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins><span> or </span><del style=\"background:#ffe6e6;\">am I?</del><ins style=\"background:#e6ffe6;\">thinking I guess!</ins>",
- dmp.diff_pretty_html(&diffs)?
- );
-
- // Case 2. Emoticons with the third position different
- let old = "🍊"; // [240, 159, 141, 138]
- let new = "🌊"; // [240, 159, 140, 138]
- let diffs = dmp.diff_main(old, new)?;
- assert_eq!(
- "<span></span><del style=\"background:#ffe6e6;\">🍊</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
- dmp.diff_pretty_html(&diffs)?
- );
-
- // Now Case 2. but with some text, lets complicate this
- let old = "🍊, aah orange is the new black!"; // [240, 159, 141, 138]
- let new = "Aah orange!🌊is the new 🌊"; // [240, 159, 140, 138]
- let diffs = dmp.diff_main(old, new)?;
- assert_eq!(
- "<del style=\"background:#ffe6e6;\">🍊, a</del><ins style=\"background:#e6ffe6;\">A</ins><span>ah orange</span><del style=\"background:#ffe6e6;\"> </del><ins style=\"background:#e6ffe6;\">!🌊</ins><span>is the new </span><del style=\"background:#ffe6e6;\">black!</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
- dmp.diff_pretty_html(&diffs)?
- );
+ // No match
+ assert!(dmp
+ .half_match("1234567890".as_bytes(), "abcdef".as_bytes())
+ .is_none());
+ assert!(dmp
+ .half_match("12345".as_bytes(), "23".as_bytes())
+ .is_none());
- // Case 3. with second and third different, but lets complicate this with an equality
- let old = "𠌊"; // [240, 160, 140, 138]
- let new = "π– Š"; // [240, 150, 160, 138]
- let diffs = dmp.diff_main(old, new)?;
+ // Single Match.
assert_eq!(
- "<span></span><ins style=\"background:#e6ffe6;\">π– Š</ins><del style=\"background:#ffe6e6;\">𠌊</del>",
- dmp.diff_pretty_html(&diffs)?
+ Some(HalfMatch {
+ prefix_long: "12".as_bytes(),
+ suffix_long: "90".as_bytes(),
+ prefix_short: "a".as_bytes(),
+ suffix_short: "z".as_bytes(),
+ common: "345678".as_bytes()
+ }),
+ dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())
);
-
- // Case 3. but let there be a swap
- let old = "πž „"; // [240, 158, 160, 132]
- let new = std::str::from_utf8(&[240, 160, 158, 132]).unwrap(); // basically an undefined element `π ž„`. Should still work
- let diffs = dmp.diff_main(old, new)?;
assert_eq!(
- "<span></span><del style=\"background:#ffe6e6;\">πž „</del><ins style=\"background:#e6ffe6;\">π ž„</ins>",
- dmp.diff_pretty_html(&diffs)?
+ Some(HalfMatch {
+ prefix_long: "a".as_bytes(),
+ suffix_long: "z".as_bytes(),
+ prefix_short: "12".as_bytes(),
+ suffix_short: "90".as_bytes(),
+ common: "345678".as_bytes()
+ }),
+ dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())
);
-
- // Case 4. swap at the last 2 positions
- let old = "🍌"; // [240, 159, 141, 140] -- FINALLY A BANANA
- let new = "🌍"; // [240, 159, 140, 141] -- interesting revelation - last 2 bytes swapped and 🍌 becomes 🌍. Guess the world is going `Bananas!!`
- let diffs = dmp.diff_main(old, new)?;
assert_eq!(
- "<span></span><del style=\"background:#ffe6e6;\">🍌</del><ins style=\"background:#e6ffe6;\">🌍</ins>",
- dmp.diff_pretty_html(&diffs)?
+ Some(HalfMatch {
+ prefix_long: "abc".as_bytes(),
+ suffix_long: "z".as_bytes(),
+ prefix_short: "1234".as_bytes(),
+ suffix_short: "0".as_bytes(),
+ common: "56789".as_bytes()
+ }),
+ dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())
);
-
- // Let's do this with a slightly longish string
- let old = "Now, let's explore some emotional extremes 🌊.\nWe've got your ecstatic face 🀩, your devastated face 😭, and your utterly confused face 🀯. But that's not all! πŸ€” We've also got some subtle emotions like 😐, πŸ™ƒ, and πŸ‘€.";
- let new = "Let's start with some basics 😊.\nWe've got your standard smiley face πŸ™‚, your sad face ☹️, and your angry face 😠. But wait, there's more! 🀩 We've also got some more complex emotions like 😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰, πŸ‘, and πŸ‘.";
- let diffs = dmp.diff_main(old, new)?;
-
assert_eq!(
- "<del style=\"background:#ffe6e6;\">Now, let's explore some emotional extreme</del><ins style=\"background:#e6ffe6;\">Let's start with some basic</ins><span>s </span><del style=\"background:#ffe6e6;\">🌊</del><ins style=\"background:#e6ffe6;\">😊</ins><span>.&para;<br>We've got your </span><del style=\"background:#ffe6e6;\">ec</del><span>sta</span><del style=\"background:#ffe6e6;\">tic</del><ins style=\"background:#e6ffe6;\">ndard smiley</ins><span> face </span><del style=\"background:#ffe6e6;\">🀩</del><ins style=\"background:#e6ffe6;\">πŸ™‚</ins><span>, your </span><del style=\"background:#ffe6e6;\">devastate</del><ins style=\"background:#e6ffe6;\">sa</ins><span>d face </span><del style=\"background:#ffe6e6;\">😭</del><ins style=\"background:#e6ffe6;\">☹️</ins><span>, and your </span><del style=\"background:#ffe6e6;\">utterly confused</del><ins style=\"background:#e6ffe6;\">angry</ins><span> face </span><del style=\"background:#ffe6e6;\">🀯</del><ins style=\"background:#e6ffe6;\">😠</ins><span>. But </span><del style=\"background:#ffe6e6;\">that's not all</del><ins style=\"background:#e6ffe6;\">wait, there's more</ins><span>! </span><del style=\"background:#ffe6e6;\">πŸ€”</del><ins style=\"background:#e6ffe6;\">🀩</ins><span> We've also got some </span><del style=\"background:#ffe6e6;\">subt</del><ins style=\"background:#e6ffe6;\">more comp</ins><span>le</span><ins style=\"background:#e6ffe6;\">x</ins><span> emotions like </span><del style=\"background:#ffe6e6;\">😐</del><ins style=\"background:#e6ffe6;\">😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰</ins><span>, </span><del style=\"background:#ffe6e6;\">πŸ™ƒ</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>, and </span><del style=\"background:#ffe6e6;\">πŸ‘€</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>.</span>",
- dmp.diff_pretty_html(&diffs)?
+ Some(HalfMatch {
+ prefix_long: "a".as_bytes(),
+ suffix_long: "xyz".as_bytes(),
+ prefix_short: "1".as_bytes(),
+ suffix_short: "7890".as_bytes(),
+ common: "23456".as_bytes()
+ }),
+ dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())
);
- Ok(())
- }
-
- #[test]
- fn test_diff_main() -> Result<(), crate::errors::Error> {
- let mut dmp = DiffMatchPatch::default();
-
- // Perform a trivial diff.
- // Null case.
- assert!(dmp.diff_main("", "")?.is_empty());
-
- // Equality
- assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?);
-
- // Simple insert
+ // Multiple Matches.
assert_eq!(
- vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
- dmp.diff_main("abc", "ab123c")?
+ Some(HalfMatch {
+ prefix_long: "12123".as_bytes(),
+ suffix_long: "123121".as_bytes(),
+ prefix_short: "a".as_bytes(),
+ suffix_short: "z".as_bytes(),
+ common: "1234123451234".as_bytes()
+ }),
+ dmp.half_match(
+ "121231234123451234123121".as_bytes(),
+ "a1234123451234z".as_bytes()
+ )
);
-
- // Simple delete
assert_eq!(
- vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
- dmp.diff_main("a123bc", "abc")?
+ Some(HalfMatch {
+ prefix_long: "".as_bytes(),
+ suffix_long: "-=-=-=-=-=".as_bytes(),
+ prefix_short: "x".as_bytes(),
+ suffix_short: "".as_bytes(),
+ common: "x-=-=-=-=-=-=-=".as_bytes()
+ }),
+ dmp.half_match(
+ "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),
+ "xx-=-=-=-=-=-=-=".as_bytes()
+ )
);
-
- // Two insertions
assert_eq!(
- vec![
- Diff::equal(b"a"),
- Diff::insert(b"123"),
- Diff::equal(b"b"),
- Diff::insert(b"456"),
- Diff::equal(b"c"),
- ],
- dmp.diff_main("abc", "a123b456c")?
+ Some(HalfMatch {
+ prefix_long: "-=-=-=-=-=".as_bytes(),
+ suffix_long: "".as_bytes(),
+ prefix_short: "".as_bytes(),
+ suffix_short: "y".as_bytes(),
+ common: "-=-=-=-=-=-=-=y".as_bytes()
+ }),
+ dmp.half_match(
+ "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),
+ "-=-=-=-=-=-=-=yy".as_bytes()
+ )
);
- // Two deletions.
+ // Non-optimal halfmatch.
+ // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
assert_eq!(
- vec![
- Diff::equal(b"a"),
- Diff::delete(b"123"),
- Diff::equal(b"b"),
- Diff::delete(b"456"),
- Diff::equal(b"c"),
- ],
- dmp.diff_main("a123b456c", "abc")?
+ Some(HalfMatch {
+ prefix_long: "qHillo".as_bytes(),
+ suffix_long: "w".as_bytes(),
+ prefix_short: "x".as_bytes(),
+ suffix_short: "Hulloy".as_bytes(),
+ common: "HelloHe".as_bytes()
+ }),
+ dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
);
- // Perform a real diff.
- // Switch off the timeout.
+ // Optimal no halfmatch.
dmp.timeout = None;
- // Simple cases.
- assert_eq!(
- vec![Diff::delete(b"a"), Diff::insert(b"b"),],
- dmp.diff_main("a", "b")?
- );
-
- assert_eq!(
- vec![
- Diff::delete(b"Apple"),
- Diff::insert(b"Banana"),
- Diff::equal(b"s are a"),
- Diff::insert(b"lso"),
- Diff::equal(b" fruit.")
- ],
- dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")?
- );
-
- assert_eq!(
- vec![
- Diff::delete(b"a"),
- Diff::insert("\u{0680}".as_bytes()),
- Diff::equal(b"x"),
- Diff::delete(b"\t"),
- Diff::insert(b"\0")
- ],
- dmp.diff_main("ax\t", "\u{0680}x\0")?
- );
-
- // Overlaps.
- assert_eq!(
- vec![
- Diff::delete(b"1"),
- Diff::equal(b"a"),
- Diff::delete(b"y"),
- Diff::equal(b"b"),
- Diff::delete(b"2"),
- Diff::insert(b"xab"),
- ],
- dmp.diff_main("1ayb2", "abxab")?
- );
-
- assert_eq!(
- vec![
- Diff::insert(b"xaxcx"),
- Diff::equal(b"abc"),
- Diff::delete(b"y"),
- ],
- dmp.diff_main("abcy", "xaxcxabc")?
- );
-
- assert_eq!(
- vec![
- Diff::delete(b"ABCD"),
- Diff::equal(b"a"),
- Diff::delete(b"="),
- Diff::insert(b"-"),
- Diff::equal(b"bcd"),
- Diff::delete(b"="),
- Diff::insert(b"-"),
- Diff::equal(b"efghijklmnopqrs"),
- Diff::delete(b"EFGHIJKLMNOefg"),
- ],
- dmp.diff_main(
- "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
- "a-bcd-efghijklmnopqrs"
- )?
- );
-
- // Large equality.
- assert_eq!(
- vec![
- Diff::insert(b" "),
- Diff::equal(b"a"),
- Diff::insert(b"nd"),
- Diff::equal(b" [[Hepatopancreatic]]"),
- Diff::delete(b" and [[New"),
- ],
- dmp.diff_main(
- "a [[Hepatopancreatic]] and [[New",
- " and [[Hepatopancreatic]]"
- )?
- );
-
- // Timeout.
- const LOW_TIMEOUT: u32 = 100;
- dmp.set_timeout(Some(LOW_TIMEOUT));
- let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
- let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
-
- let start = Utc::now().time();
- dmp.diff_main(&a, &b)?;
- let end = Utc::now().time();
- // Test that we took at least the timeout period (+ 5ms being generous).
- assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
-
- // Test the linemode speedup.
- // Must be long to pass the 100 char cutoff.
- // Simple line-mode.
- dmp.timeout = Some(1000);
- let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
- let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
- dmp.set_checklines(false);
- let res_no_lm = dmp.diff_main(a, b)?;
- dmp.set_checklines(true);
- let res_yes_lm = dmp.diff_main(a, b)?;
-
- // Now, we'll run 2 checks - one for result equality
- assert_eq!(res_no_lm, res_yes_lm);
-
- // Single line-mode.
- let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
- let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
- dmp.set_checklines(false);
- let yes_lm = dmp.diff_main(a, b)?;
- dmp.set_checklines(true);
- let no_lm = dmp.diff_main(a, b)?;
- assert_eq!(no_lm, yes_lm);
-
- // Overlap line-mode.
- let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
- let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
- dmp.set_checklines(false);
- let no_lm = dmp.diff_main(a, b)?;
- dmp.set_checklines(true);
- let yes_lm = dmp.diff_main(a, b)?;
- assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?);
-
- // Benefits of checklines can only be realized in text with many lines
- let mut dmp = DiffMatchPatch::default();
- let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap();
- let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
-
- let start = Instant::now();
- let diff_yes_lm = dmp.diff_main(&old, &new);
- let yes_lm_dur = Instant::now() - start;
- assert!(diff_yes_lm.is_ok());
-
- dmp.set_checklines(false);
- let start = Instant::now();
- let diff_no_lm = dmp.diff_main(&old, &new);
- let no_lm_dur = Instant::now() - start;
- assert!(diff_no_lm.is_ok());
-
- assert!(no_lm_dur > yes_lm_dur);
- Ok(())
- }
-
- #[test]
- fn test_diff_delta() -> Result<(), crate::errors::Error> {
- let diffs = vec![
- Diff::equal(b"jump"),
- Diff::delete(b"s"),
- Diff::insert(b"ed"),
- Diff::equal(b" over "),
- Diff::delete(b"the"),
- Diff::insert(b"a"),
- Diff::equal(b" lazy"),
- Diff::insert(b"old dog"),
- ];
- let txt_old = "jumps over the lazy".as_bytes();
- assert_eq!(
- txt_old,
- DiffMatchPatch::diff_text_old(&diffs)
- );
-
- let delta = DiffMatchPatch::to_delta(&diffs);
- assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta);
- // Convert delta string into a diff.
- assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?);
-
- // Generates error (19 != 20).
- assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err());
-
- // Generates error (19 != 18).
- assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err());
-
- // Test deltas with special characters.
- let diffs = vec![
- Diff::equal("\u{0680} \x00 \t %".as_bytes()),
- Diff::delete("\u{0681} \x01 \n ^".as_bytes()),
- Diff::insert("\u{0682} \x02 \\ |".as_bytes())
- ];
- let txt_old = DiffMatchPatch::diff_text_old(&diffs);
- assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old);
- let delta = DiffMatchPatch::to_delta(&diffs);
-
- assert_eq!(b"=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]);
- // Convert delta string into a diff.
- assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?);
-
- // Verify pool of unchanged characters.
- let diffs = vec![
- Diff::insert("A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes())
- ];
- let txt_new = DiffMatchPatch::diff_text_new(&diffs);
- assert_eq!("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&txt_new).unwrap());
-
- let delta = DiffMatchPatch::to_delta(&diffs);
- assert_eq!("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&delta).unwrap());
-
- // Convert delta string into a diff.
- assert_eq!(diffs, DiffMatchPatch::from_delta("".as_bytes(), &delta)?);
- Ok(())
- }
-
- // Helper to construct the two texts which made up the diff originally.
- fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), crate::errors::Error> {
- let mut txt1 = vec![];
- let mut txt2 = vec![];
-
- diffs.iter().for_each(|d| {
- if d.op() != Ops::Insert {
- txt1.push(d.data());
- }
-
- if d.op() != Ops::Delete {
- txt2.push(d.data());
- }
- });
-
- Ok((
- String::from_utf8(txt1.concat()).map_err(|_| crate::errors::Error::Utf8Error)?,
- String::from_utf8(txt2.concat()).map_err(|_| crate::errors::Error::Utf8Error)?,
- ))
+ assert!(dmp
+ .half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
+ .is_none());
}
#[test]
@@ -4103,7 +3674,7 @@ mod tests {
}
#[test]
- fn test_patch_add_context() -> Result<(), crate::errors::Error> {
+ fn test_patch_add_context() -> Result<(), Error> {
let dmp = DiffMatchPatch::default();
let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?;
@@ -4145,115 +3716,6 @@ mod tests {
}
#[test]
- fn test_patch_from_text() -> Result<(), crate::errors::Error> {
- let dmp = DiffMatchPatch::new();
-
- assert!(dmp.patch_from_text("")?.is_empty());
-
- let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
- assert_eq!(
- strp,
- dmp.patch_from_text(
- strp
- )?[0].to_string()
- );
-
- assert_eq!(
- "@@ -1 +1 @@\n-a\n+b\n",
- dmp.patch_from_text("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string()
- );
-
- assert_eq!(
- "@@ -1,3 +0,0 @@\n-abc\n",
- dmp.patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string()
- );
-
- assert_eq!(
- "@@ -0,0 +1,3 @@\n+abc\n",
- dmp.patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string()
- );
-
- // Generates error.
- assert!(dmp.patch_from_text("Bad\nPatch\n").is_err());
-
- Ok(())
- }
-
- #[test]
- fn test_patch_to_text() -> Result<(), crate::errors::Error> {
- let dmp = DiffMatchPatch::new();
-
- let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
- let patches = dmp.patch_from_text(strp)?;
- assert_eq!(strp, dmp.patch_to_text(&patches));
-
- let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n";
- let patches = dmp.patch_from_text(strp)?;
- assert_eq!(strp, dmp.patch_to_text(&patches));
-
- Ok(())
- }
-
- #[test]
- fn test_patch_make() -> Result<(), crate::errors::Error> {
- let dmp = DiffMatchPatch::default();
- let patches = dmp.patch_make(super::PatchInput::Texts("", ""))?;
- assert!(patches.is_empty());
-
- let txt1 = "The quick brown fox jumps over the lazy dog.";
- let txt2 = "That quick brown fox jumped over a lazy dog.";
-
- // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
- let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt2, txt1))?;
- assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches));
-
- // Text1+Text2 inputs.
- let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt1, txt2))?;
- assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
-
- // Diff input.
- let diffs = dmp.diff_main(txt1, txt2)?;
- let patches = dmp.patch_make(crate::dmp::PatchInput::Diffs(&diffs[..]))?;
- assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
-
- // Text1+Diff inputs.
- let patches = dmp.patch_make(crate::dmp::PatchInput::TextDiffs(txt1, &diffs[..]))?;
- assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
-
- // Character encoding.
- let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(
- "`1234567890-=[]\\;',./",
- "~!@#$%^&*()_+{}|:\"<>?",
- ))?;
-
- assert_eq!(
- "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n",
- dmp.patch_to_text(&patches)
- );
-
- // Character decoding.
- let diffs = vec![
- Diff::delete(b"`1234567890-=[]\\;',./"),
- Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?"),
- ];
- assert_eq!(
- diffs,
- dmp.patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")?[0].diffs
- );
-
- // Long string with repeats.
- let txt1 = vec!["abcdef"; 100].join("");
- let txt2 = [&txt1, "123"].join("");
- let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(&txt1, &txt2))?;
- assert_eq!(
- "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n",
- dmp.patch_to_text(&patches)
- );
-
- Ok(())
- }
-
- #[test]
fn test_parse_patch_header() {
assert_eq!(
Some((21, Some(4), 21, Some(10))),
@@ -4270,29 +3732,7 @@ mod tests {
}
#[test]
- fn test_diff_text() {
- let diffs = vec![
- Diff::equal(b"jump"),
- Diff::delete(b"s"),
- Diff::insert(b"ed"),
- Diff::equal(b" over "),
- Diff::delete(b"the"),
- Diff::insert(b"a"),
- Diff::equal(b" lazy"),
- ];
-
- assert_eq!(
- b"jumps over the lazy",
- &DiffMatchPatch::diff_text_old(&diffs[..])[..]
- );
- assert_eq!(
- b"jumped over a lazy",
- &DiffMatchPatch::diff_text_new(&diffs[..])[..]
- );
- }
-
- #[test]
- fn test_patch_add_padding() -> Result<(), crate::errors::Error> {
+ fn test_patch_add_padding() -> Result<(), Error> {
let dmp = DiffMatchPatch::default();
// Both edges full.
let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
@@ -4336,7 +3776,7 @@ mod tests {
}
#[test]
- fn test_patch_split_max() -> Result<(), crate::errors::Error> {
+ fn test_patch_split_max() -> Result<(), Error> {
let dmp = DiffMatchPatch::default();
// Assumes that dmp.Match_MaxBits is 32.
@@ -4380,228 +3820,4 @@ mod tests {
Ok(())
}
-
- #[test]
- fn test_patch_apply() -> Result<(), crate::errors::Error> {
- let mut dmp = DiffMatchPatch::default();
-
- let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
- let (txt, results) = dmp.patch_apply_internal(&patches, b"Hello world.")?;
- assert_eq!(
- format!("{}\t{}", std::str::from_utf8(&txt).unwrap(), results.len()),
- "Hello world.\t0"
- );
-
- let patches = dmp.patch_make(PatchInput::Texts(
- "The quick brown fox jumps over the lazy dog.",
- "That quick brown fox jumped over a lazy dog.",
- ))?;
-
- // Exact match
- assert_eq!(
- (
- b"That quick brown fox jumped over a lazy dog.".to_vec(),
- vec![true, true]
- ),
- dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?
- );
-
- // Partial match
- assert_eq!(
- (
- b"That quick red rabbit jumped over a tired tiger.".to_vec(),
- vec![true, true]
- ),
- dmp.patch_apply_internal(
- &patches,
- b"The quick red rabbit jumps over the tired tiger."
- )?
- );
-
- // Failed match
- assert_eq!(
- (
- b"I am the very model of a modern major general.".to_vec(),
- vec![false, false]
- ),
- dmp.patch_apply_internal(&patches, b"I am the very model of a modern major general.")?
- );
-
- // Big delete, small change
- let patches = dmp.patch_make(PatchInput::Texts(
- "x1234567890123456789012345678901234567890123456789012345678901234567890y",
- "xabcy",
- ))?;
- assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y")?);
-
- // Big delete, large change
- let patches = dmp.patch_make(PatchInput::Texts(
- "x1234567890123456789012345678901234567890123456789012345678901234567890y",
- "xabcy",
- ))?;
- assert_eq!((b"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y".to_vec(), vec![false, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
-
- dmp.delete_threshold = 0.6;
- let patches = dmp.patch_make(PatchInput::Texts(
- "x1234567890123456789012345678901234567890123456789012345678901234567890y",
- "xabcy",
- ))?;
- assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
- dmp.delete_threshold = 0.5;
-
- // Compesate for failed patch
- dmp.match_threshold = 0.;
- dmp.match_distance = 0;
- let patches = dmp.patch_make(PatchInput::Texts(
- "abcdefghijklmnopqrstuvwxyz--------------------1234567890",
- "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
- ))?;
- assert_eq!(
- (
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_vec(),
- vec![false, true]
- ),
- dmp.patch_apply_internal(
- &patches,
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"
- )?
- );
- dmp.match_threshold = 0.5;
- dmp.match_distance = 1000;
-
- // No side-effects - kinds useless cos patches is not mutable in rust
- let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
- let srcstr = dmp.patch_to_text(&patches);
- dmp.patch_apply_internal(&patches, b"")?;
- assert_eq!(srcstr, dmp.patch_to_text(&patches));
-
- let patches = dmp.patch_make(PatchInput::Texts(
- "The quick brown fox jumps over the lazy dog.",
- "Woof",
- ))?;
- let srcstr = dmp.patch_to_text(&patches);
- dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?;
- assert_eq!(srcstr, dmp.patch_to_text(&patches));
-
- // Edge exact match
- let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
- assert_eq!(
- (b"test".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"")?
- );
-
- // Near edge exact match
- let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?;
- assert_eq!(
- (b"XtestY".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"XY")?
- );
-
- // Edge partial match
- let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?;
- assert_eq!(
- (b"x123".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"x")?
- );
-
- Ok(())
- }
-
- #[test]
- fn test_match_alphabet() {
- // Initialise the bitmasks for Bitap.
- // Unique.
- assert_eq!(
- HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]),
- DiffMatchPatch::match_alphabet(b"abc")
- );
-
- // Duplicates.
- assert_eq!(
- HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]),
- DiffMatchPatch::match_alphabet(b"abcaba")
- )
- }
-
- #[test]
- fn test_match_bitap() {
- // Bitap algorithm.
- let mut dmp = DiffMatchPatch {
- match_distance: 100,
- ..Default::default()
- };
-
- // Exact matches.
- assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5));
- assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0));
-
- // Fuzzy matches.
- assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0));
- assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5));
- assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1));
-
- // Overflow.
- assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2));
-
- // Threshold test.
- dmp.match_threshold = 0.4;
- assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
-
- // dmp.`match_threshold` = 0.3;
- dmp.match_threshold = 0.3;
- assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
-
- dmp.match_threshold = 0.;
- assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1));
-
- dmp.match_threshold = 0.5;
-
- // Multiple select.
- assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3));
- assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5));
-
- // Distance test.
- dmp.match_distance = 10;
- assert_eq!(
- None,
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
- );
- assert_eq!(
- Some(0),
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1)
- );
-
- dmp.match_distance = 1000;
- assert_eq!(
- Some(0),
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
- );
- }
-
- #[test]
- fn test_match_main() {
- let dmp = DiffMatchPatch::default();
- // Full match.
- // Shortcut matches.
- assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdef", 1000));
- assert_eq!(None, dmp.match_internal(b"", b"abcdef", 1));
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"", 3));
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"de", 3));
-
- // Beyond end match.
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"defy", 4));
-
- // Oversized pattern.
- assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdefy", 0));
-
- // Complex match.
- assert_eq!(
- Some(4),
- dmp.match_internal(
- b"I am the very model of a modern major general.",
- b" that berry ",
- 5
- )
- );
- }
}
diff --git a/src/lib.rs b/src/lib.rs
index 02d1ed1..a41d6a3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,5 +2,5 @@ pub mod dmp;
pub mod errors;
pub mod traits;
-pub use dmp::{DiffMatchPatch, PatchInput};
+pub use dmp::{DiffMatchPatch, PatchInput, Ops, Patch, Patches};
pub use errors::Error; \ No newline at end of file
diff --git a/tests/test.rs b/tests/test.rs
new file mode 100644
index 0000000..2c043b5
--- /dev/null
+++ b/tests/test.rs
@@ -0,0 +1,791 @@
+use std::{collections::HashMap, time::Instant};
+
+use chrono::Utc;
+
+use diff_match_patch_rs::dmp::Diff;
+
+use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, Patch, PatchInput};
+
+// const tests = [
+// 'testDiffIsDestructurable',
+// 'testDiffCleanupEfficiency',
+// ];
+
+#[test]
+fn test_diff_levenshtein() {
+ let dmp = DiffMatchPatch::new();
+ let diffs = vec![
+ Diff::delete(b"abc"),
+ Diff::insert(b"1234"),
+ Diff::equal(b"xyz"),
+ ];
+ assert_eq!(4, dmp.diff_levenshtein(&diffs));
+
+ let diffs = vec![
+ Diff::equal(b"xyz"),
+ Diff::delete(b"abc"),
+ Diff::insert(b"1234"),
+ ];
+ assert_eq!(4, dmp.diff_levenshtein(&diffs));
+
+ let diffs = vec![
+ Diff::delete(b"abc"),
+ Diff::equal(b"xyz"),
+ Diff::insert(b"1234"),
+ ];
+ assert_eq!(7, dmp.diff_levenshtein(&diffs));
+}
+
+#[test]
+fn test_diff_bisect() -> Result<(), Error> {
+ let mut dmp = DiffMatchPatch::default();
+
+ // Normal.
+ // Since the resulting diff hasn't been normalized, it would be ok if
+ // the insertion and deletion pairs are swapped.
+ // If the order changes, tweak this test as required.
+ assert_eq!(
+ vec![
+ Diff::delete(b"c"),
+ Diff::insert(b"m"),
+ Diff::equal(b"a"),
+ Diff::delete(b"t"),
+ Diff::insert(b"p")
+ ],
+ dmp.bisect(b"cat", b"map", None)?
+ );
+
+ // Timeout.
+ dmp.set_timeout(Some(0));
+ let deadline = dmp.deadline();
+ assert_eq!(
+ vec![Diff::delete(b"cat"), Diff::insert(b"map"),],
+ dmp.bisect(b"cat", b"map", deadline)?
+ );
+
+ Ok(())
+}
+
+#[test]
+fn test_diff_pretty_html() -> Result<(), Error> {
+ let dmp = DiffMatchPatch::new();
+ // Basic
+ let diffs = [
+ Diff::equal(b"a\n"),
+ Diff::delete(b"<B>b</B>"),
+ Diff::insert(b"c&d"),
+ ];
+ assert_eq!("<span>a&para;<br></span><del style=\"background:#ffe6e6;\">&lt;B&gt;b&lt;/B&gt;</del><ins style=\"background:#e6ffe6;\">c&amp;d</ins>", dmp.diff_pretty_html(&diffs)?);
+
+ // Monkey busiess around Emoticons and extended utf-8 πŸ€ͺπŸ€©πŸ€”
+ // This gave me a lot of heart-burn
+
+ // Case 1. Two similar emoticons
+ // In bytes representation, these would have the last u8 different
+ // Which means the the diff should an equality block of 3 bytes folloed by insert and delete
+ let old = "πŸ€ͺ"; // [240, 159, 164, 170]
+ let new = "πŸ€”"; // [240, 159, 164, 148]
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span></span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Now Case 1. but with some text before and after
+ let old = "I'm puzzledπŸ€ͺ or am I?";
+ let new = "I'm puzzledπŸ€” or thinking I guess!";
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span>I'm puzzled</span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins><span> or </span><del style=\"background:#ffe6e6;\">am I?</del><ins style=\"background:#e6ffe6;\">thinking I guess!</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Case 2. Emoticons with the third position different
+ let old = "🍊"; // [240, 159, 141, 138]
+ let new = "🌊"; // [240, 159, 140, 138]
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span></span><del style=\"background:#ffe6e6;\">🍊</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Now Case 2. but with some text, lets complicate this
+ let old = "🍊, aah orange is the new black!"; // [240, 159, 141, 138]
+ let new = "Aah orange!🌊is the new 🌊"; // [240, 159, 140, 138]
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<del style=\"background:#ffe6e6;\">🍊, a</del><ins style=\"background:#e6ffe6;\">A</ins><span>ah orange</span><del style=\"background:#ffe6e6;\"> </del><ins style=\"background:#e6ffe6;\">!🌊</ins><span>is the new </span><del style=\"background:#ffe6e6;\">black!</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Case 3. with second and third different, but lets complicate this with an equality
+ let old = "𠌊"; // [240, 160, 140, 138]
+ let new = "π– Š"; // [240, 150, 160, 138]
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span></span><ins style=\"background:#e6ffe6;\">π– Š</ins><del style=\"background:#ffe6e6;\">𠌊</del>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Case 3. but let there be a swap
+ let old = "πž „"; // [240, 158, 160, 132]
+ let new = std::str::from_utf8(&[240, 160, 158, 132]).unwrap(); // basically an undefined element `π ž„`. Should still work
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span></span><del style=\"background:#ffe6e6;\">πž „</del><ins style=\"background:#e6ffe6;\">π ž„</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Case 4. swap at the last 2 positions
+ let old = "🍌"; // [240, 159, 141, 140] -- FINALLY A BANANA
+ let new = "🌍"; // [240, 159, 140, 141] -- interesting revelation - last 2 bytes swapped and 🍌 becomes 🌍. Guess the world is going `Bananas!!`
+ let diffs = dmp.diff_main(old, new)?;
+ assert_eq!(
+ "<span></span><del style=\"background:#ffe6e6;\">🍌</del><ins style=\"background:#e6ffe6;\">🌍</ins>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ // Let's do this with a slightly longish string
+ let old = "Now, let's explore some emotional extremes 🌊.\nWe've got your ecstatic face 🀩, your devastated face 😭, and your utterly confused face 🀯. But that's not all! πŸ€” We've also got some subtle emotions like 😐, πŸ™ƒ, and πŸ‘€.";
+ let new = "Let's start with some basics 😊.\nWe've got your standard smiley face πŸ™‚, your sad face ☹️, and your angry face 😠. But wait, there's more! 🀩 We've also got some more complex emotions like 😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰, πŸ‘, and πŸ‘.";
+ let diffs = dmp.diff_main(old, new)?;
+
+ assert_eq!(
+ "<del style=\"background:#ffe6e6;\">Now, let's explore some emotional extreme</del><ins style=\"background:#e6ffe6;\">Let's start with some basic</ins><span>s </span><del style=\"background:#ffe6e6;\">🌊</del><ins style=\"background:#e6ffe6;\">😊</ins><span>.&para;<br>We've got your </span><del style=\"background:#ffe6e6;\">ec</del><span>sta</span><del style=\"background:#ffe6e6;\">tic</del><ins style=\"background:#e6ffe6;\">ndard smiley</ins><span> face </span><del style=\"background:#ffe6e6;\">🀩</del><ins style=\"background:#e6ffe6;\">πŸ™‚</ins><span>, your </span><del style=\"background:#ffe6e6;\">devastate</del><ins style=\"background:#e6ffe6;\">sa</ins><span>d face </span><del style=\"background:#ffe6e6;\">😭</del><ins style=\"background:#e6ffe6;\">☹️</ins><span>, and your </span><del style=\"background:#ffe6e6;\">utterly confused</del><ins style=\"background:#e6ffe6;\">angry</ins><span> face </span><del style=\"background:#ffe6e6;\">🀯</del><ins style=\"background:#e6ffe6;\">😠</ins><span>. But </span><del style=\"background:#ffe6e6;\">that's not all</del><ins style=\"background:#e6ffe6;\">wait, there's more</ins><span>! </span><del style=\"background:#ffe6e6;\">πŸ€”</del><ins style=\"background:#e6ffe6;\">🀩</ins><span> We've also got some </span><del style=\"background:#ffe6e6;\">subt</del><ins style=\"background:#e6ffe6;\">more comp</ins><span>le</span><ins style=\"background:#e6ffe6;\">x</ins><span> emotions like </span><del style=\"background:#ffe6e6;\">😐</del><ins style=\"background:#e6ffe6;\">😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰</ins><span>, </span><del style=\"background:#ffe6e6;\">πŸ™ƒ</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>, and </span><del style=\"background:#ffe6e6;\">πŸ‘€</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>.</span>",
+ dmp.diff_pretty_html(&diffs)?
+ );
+
+ Ok(())
+}
+
+#[test]
+fn test_diff_main() -> Result<(), Error> {
+ let mut dmp = DiffMatchPatch::default();
+
+ // Perform a trivial diff.
+ // Null case.
+ assert!(dmp.diff_main("", "")?.is_empty());
+
+ // Equality
+ assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?);
+
+ // Simple insert
+ assert_eq!(
+ vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
+ dmp.diff_main("abc", "ab123c")?
+ );
+
+ // Simple delete
+ assert_eq!(
+ vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
+ dmp.diff_main("a123bc", "abc")?
+ );
+
+ // Two insertions
+ assert_eq!(
+ vec![
+ Diff::equal(b"a"),
+ Diff::insert(b"123"),
+ Diff::equal(b"b"),
+ Diff::insert(b"456"),
+ Diff::equal(b"c"),
+ ],
+ dmp.diff_main("abc", "a123b456c")?
+ );
+
+ // Two deletions.
+ assert_eq!(
+ vec![
+ Diff::equal(b"a"),
+ Diff::delete(b"123"),
+ Diff::equal(b"b"),
+ Diff::delete(b"456"),
+ Diff::equal(b"c"),
+ ],
+ dmp.diff_main("a123b456c", "abc")?
+ );
+
+ // Perform a real diff.
+ // Switch off the timeout.
+ dmp.set_timeout(None);
+ // Simple cases.
+ assert_eq!(
+ vec![Diff::delete(b"a"), Diff::insert(b"b"),],
+ dmp.diff_main("a", "b")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(b"Apple"),
+ Diff::insert(b"Banana"),
+ Diff::equal(b"s are a"),
+ Diff::insert(b"lso"),
+ Diff::equal(b" fruit.")
+ ],
+ dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(b"a"),
+ Diff::insert("\u{0680}".as_bytes()),
+ Diff::equal(b"x"),
+ Diff::delete(b"\t"),
+ Diff::insert(b"\0")
+ ],
+ dmp.diff_main("ax\t", "\u{0680}x\0")?
+ );
+
+ // Overlaps.
+ assert_eq!(
+ vec![
+ Diff::delete(b"1"),
+ Diff::equal(b"a"),
+ Diff::delete(b"y"),
+ Diff::equal(b"b"),
+ Diff::delete(b"2"),
+ Diff::insert(b"xab"),
+ ],
+ dmp.diff_main("1ayb2", "abxab")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::insert(b"xaxcx"),
+ Diff::equal(b"abc"),
+ Diff::delete(b"y"),
+ ],
+ dmp.diff_main("abcy", "xaxcxabc")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(b"ABCD"),
+ Diff::equal(b"a"),
+ Diff::delete(b"="),
+ Diff::insert(b"-"),
+ Diff::equal(b"bcd"),
+ Diff::delete(b"="),
+ Diff::insert(b"-"),
+ Diff::equal(b"efghijklmnopqrs"),
+ Diff::delete(b"EFGHIJKLMNOefg"),
+ ],
+ dmp.diff_main(
+ "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
+ "a-bcd-efghijklmnopqrs"
+ )?
+ );
+
+ // Large equality.
+ assert_eq!(
+ vec![
+ Diff::insert(b" "),
+ Diff::equal(b"a"),
+ Diff::insert(b"nd"),
+ Diff::equal(b" [[Hepatopancreatic]]"),
+ Diff::delete(b" and [[New"),
+ ],
+ dmp.diff_main(
+ "a [[Hepatopancreatic]] and [[New",
+ " and [[Hepatopancreatic]]"
+ )?
+ );
+
+ // Timeout.
+ const LOW_TIMEOUT: u32 = 100;
+ dmp.set_timeout(Some(LOW_TIMEOUT));
+ let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
+ let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
+
+ let start = Utc::now().time();
+ dmp.diff_main(&a, &b)?;
+ let end = Utc::now().time();
+ // Test that we took at least the timeout period (+ 5ms being generous).
+ assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
+
+ // Test the linemode speedup.
+ // Must be long to pass the 100 char cutoff.
+ // Simple line-mode.
+ dmp.set_timeout(Some(1000));
+ let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let res_no_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let res_yes_lm = dmp.diff_main(a, b)?;
+
+ // Now, we'll run 2 checks - one for result equality
+ assert_eq!(res_no_lm, res_yes_lm);
+
+ // Single line-mode.
+ let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+ let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
+ dmp.set_checklines(false);
+ let yes_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let no_lm = dmp.diff_main(a, b)?;
+ assert_eq!(no_lm, yes_lm);
+
+ // Overlap line-mode.
+ let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let no_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let yes_lm = dmp.diff_main(a, b)?;
+ assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?);
+
+ // Benefits of checklines can only be realized in text with many lines
+ let mut dmp = DiffMatchPatch::default();
+ let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap();
+ let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
+
+ let start = Instant::now();
+ let diff_yes_lm = dmp.diff_main(&old, &new);
+ let yes_lm_dur = Instant::now() - start;
+ assert!(diff_yes_lm.is_ok());
+
+ dmp.set_checklines(false);
+ let start = Instant::now();
+ let diff_no_lm = dmp.diff_main(&old, &new);
+ let no_lm_dur = Instant::now() - start;
+ assert!(diff_no_lm.is_ok());
+
+ assert!(no_lm_dur > yes_lm_dur);
+ Ok(())
+}
+
+#[test]
+fn test_diff_delta() -> Result<(), Error> {
+ let diffs = vec![
+ Diff::equal(b"jump"),
+ Diff::delete(b"s"),
+ Diff::insert(b"ed"),
+ Diff::equal(b" over "),
+ Diff::delete(b"the"),
+ Diff::insert(b"a"),
+ Diff::equal(b" lazy"),
+ Diff::insert(b"old dog"),
+ ];
+ let txt_old = "jumps over the lazy".as_bytes();
+ assert_eq!(
+ txt_old,
+ DiffMatchPatch::diff_text_old(&diffs)
+ );
+
+ let delta = DiffMatchPatch::to_delta(&diffs);
+ assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta);
+ // Convert delta string into a diff.
+ assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?);
+
+ // Generates error (19 != 20).
+ assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err());
+
+ // Generates error (19 != 18).
+ assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err());
+
+ // Test deltas with special characters.
+ let diffs = vec![
+ Diff::equal("\u{0680} \x00 \t %".as_bytes()),
+ Diff::delete("\u{0681} \x01 \n ^".as_bytes()),
+ Diff::insert("\u{0682} \x02 \\ |".as_bytes())
+ ];
+ let txt_old = DiffMatchPatch::diff_text_old(&diffs);
+ assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old);
+ let delta = DiffMatchPatch::to_delta(&diffs);
+
+ assert_eq!(b"=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]);
+ // Convert delta string into a diff.
+ assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?);
+
+ // Verify pool of unchanged characters.
+ let diffs = vec![
+ Diff::insert("A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes())
+ ];
+ let txt_new = DiffMatchPatch::diff_text_new(&diffs);
+ assert_eq!("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&txt_new).unwrap());
+
+ let delta = DiffMatchPatch::to_delta(&diffs);
+ assert_eq!("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&delta).unwrap());
+
+ // Convert delta string into a diff.
+ assert_eq!(diffs, DiffMatchPatch::from_delta("".as_bytes(), &delta)?);
+ Ok(())
+}
+
+// Helper to construct the two texts which made up the diff originally.
+fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), Error> {
+ let mut txt1 = vec![];
+ let mut txt2 = vec![];
+
+ diffs.iter().for_each(|d| {
+ if d.op() != Ops::Insert {
+ txt1.push(d.data());
+ }
+
+ if d.op() != Ops::Delete {
+ txt2.push(d.data());
+ }
+ });
+
+ Ok((
+ String::from_utf8(txt1.concat()).map_err(|_| Error::Utf8Error)?,
+ String::from_utf8(txt2.concat()).map_err(|_| Error::Utf8Error)?,
+ ))
+}
+
+#[test]
+fn test_patch_from_text() -> Result<(), Error> {
+ let dmp = DiffMatchPatch::new();
+
+ assert!(dmp.patch_from_text("")?.is_empty());
+
+ let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
+ assert_eq!(
+ strp,
+ dmp.patch_from_text(
+ strp
+ )?[0].to_string()
+ );
+
+ assert_eq!(
+ "@@ -1 +1 @@\n-a\n+b\n",
+ dmp.patch_from_text("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string()
+ );
+
+ assert_eq!(
+ "@@ -1,3 +0,0 @@\n-abc\n",
+ dmp.patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string()
+ );
+
+ assert_eq!(
+ "@@ -0,0 +1,3 @@\n+abc\n",
+ dmp.patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string()
+ );
+
+ // Generates error.
+ assert!(dmp.patch_from_text("Bad\nPatch\n").is_err());
+
+ Ok(())
+}
+
+#[test]
+fn test_patch_to_text() -> Result<(), Error> {
+ let dmp = DiffMatchPatch::new();
+
+ let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
+ let patches = dmp.patch_from_text(strp)?;
+ assert_eq!(strp, dmp.patch_to_text(&patches));
+
+ let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n";
+ let patches = dmp.patch_from_text(strp)?;
+ assert_eq!(strp, dmp.patch_to_text(&patches));
+
+ Ok(())
+}
+
+#[test]
+fn test_patch_make() -> Result<(), Error> {
+ let dmp = DiffMatchPatch::default();
+ let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
+ assert!(patches.is_empty());
+
+ let txt1 = "The quick brown fox jumps over the lazy dog.";
+ let txt2 = "That quick brown fox jumped over a lazy dog.";
+
+ // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
+ let patches = dmp.patch_make(PatchInput::Texts(txt2, txt1))?;
+ assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches));
+
+ // Text1+Text2 inputs.
+ let patches = dmp.patch_make(PatchInput::Texts(txt1, txt2))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Diff input.
+ let diffs = dmp.diff_main(txt1, txt2)?;
+ let patches = dmp.patch_make(PatchInput::Diffs(&diffs[..]))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Text1+Diff inputs.
+ let patches = dmp.patch_make(PatchInput::TextDiffs(txt1, &diffs[..]))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Character encoding.
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "`1234567890-=[]\\;',./",
+ "~!@#$%^&*()_+{}|:\"<>?",
+ ))?;
+
+ assert_eq!(
+ "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n",
+ dmp.patch_to_text(&patches)
+ );
+
+ // Character decoding.
+ let diffs = vec![
+ Diff::delete(b"`1234567890-=[]\\;',./"),
+ Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?"),
+ ];
+ assert_eq!(
+ diffs,
+ dmp.patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")?[0].diffs()
+ );
+
+ // Long string with repeats.
+ let txt1 = vec!["abcdef"; 100].join("");
+ let txt2 = [&txt1, "123"].join("");
+ let patches = dmp.patch_make(PatchInput::Texts(&txt1, &txt2))?;
+ assert_eq!(
+ "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n",
+ dmp.patch_to_text(&patches)
+ );
+
+ Ok(())
+}
+
+#[test]
+fn test_diff_text() {
+ let diffs = vec![
+ Diff::equal(b"jump"),
+ Diff::delete(b"s"),
+ Diff::insert(b"ed"),
+ Diff::equal(b" over "),
+ Diff::delete(b"the"),
+ Diff::insert(b"a"),
+ Diff::equal(b" lazy"),
+ ];
+
+ assert_eq!(
+ b"jumps over the lazy",
+ &DiffMatchPatch::diff_text_old(&diffs[..])[..]
+ );
+ assert_eq!(
+ b"jumped over a lazy",
+ &DiffMatchPatch::diff_text_new(&diffs[..])[..]
+ );
+}
+
+
+#[test]
+fn test_patch_apply() -> Result<(), Error> {
+ let mut dmp = DiffMatchPatch::default();
+
+ let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
+ let (txt, results) = dmp.patch_apply_internal(&patches, b"Hello world.")?;
+ assert_eq!(
+ format!("{}\t{}", std::str::from_utf8(&txt).unwrap(), results.len()),
+ "Hello world.\t0"
+ );
+
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ ))?;
+
+ // Exact match
+ assert_eq!(
+ (
+ b"That quick brown fox jumped over a lazy dog.".to_vec(),
+ vec![true, true]
+ ),
+ dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?
+ );
+
+ // Partial match
+ assert_eq!(
+ (
+ b"That quick red rabbit jumped over a tired tiger.".to_vec(),
+ vec![true, true]
+ ),
+ dmp.patch_apply_internal(
+ &patches,
+ b"The quick red rabbit jumps over the tired tiger."
+ )?
+ );
+
+ // Failed match
+ assert_eq!(
+ (
+ b"I am the very model of a modern major general.".to_vec(),
+ vec![false, false]
+ ),
+ dmp.patch_apply_internal(&patches, b"I am the very model of a modern major general.")?
+ );
+
+ // Big delete, small change
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y")?);
+
+ // Big delete, large change
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!((b"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y".to_vec(), vec![false, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
+
+ dmp.delete_threshold = 0.6;
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
+ dmp.delete_threshold = 0.5;
+
+ // Compesate for failed patch
+ dmp.match_threshold = 0.;
+ dmp.match_distance = 0;
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "abcdefghijklmnopqrstuvwxyz--------------------1234567890",
+ "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
+ ))?;
+ assert_eq!(
+ (
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_vec(),
+ vec![false, true]
+ ),
+ dmp.patch_apply_internal(
+ &patches,
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"
+ )?
+ );
+ dmp.match_threshold = 0.5;
+ dmp.match_distance = 1000;
+
+ // No side-effects - kinds useless cos patches is not mutable in rust
+ let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
+ let srcstr = dmp.patch_to_text(&patches);
+ dmp.patch_apply_internal(&patches, b"")?;
+ assert_eq!(srcstr, dmp.patch_to_text(&patches));
+
+ let patches = dmp.patch_make(PatchInput::Texts(
+ "The quick brown fox jumps over the lazy dog.",
+ "Woof",
+ ))?;
+ let srcstr = dmp.patch_to_text(&patches);
+ dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?;
+ assert_eq!(srcstr, dmp.patch_to_text(&patches));
+
+ // Edge exact match
+ let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
+ assert_eq!(
+ (b"test".to_vec(), vec![true]),
+ dmp.patch_apply_internal(&patches, b"")?
+ );
+
+ // Near edge exact match
+ let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?;
+ assert_eq!(
+ (b"XtestY".to_vec(), vec![true]),
+ dmp.patch_apply_internal(&patches, b"XY")?
+ );
+
+ // Edge partial match
+ let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?;
+ assert_eq!(
+ (b"x123".to_vec(), vec![true]),
+ dmp.patch_apply_internal(&patches, b"x")?
+ );
+
+ Ok(())
+}
+
+#[test]
+fn test_match_alphabet() {
+ // Initialise the bitmasks for Bitap.
+ // Unique.
+ assert_eq!(
+ HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]),
+ DiffMatchPatch::match_alphabet(b"abc")
+ );
+
+ // Duplicates.
+ assert_eq!(
+ HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]),
+ DiffMatchPatch::match_alphabet(b"abcaba")
+ )
+}
+
+#[test]
+fn test_match_bitap() {
+ // Bitap algorithm.
+ let mut dmp = DiffMatchPatch {
+ match_distance: 100,
+ ..Default::default()
+ };
+
+ // Exact matches.
+ assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5));
+ assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0));
+
+ // Fuzzy matches.
+ assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0));
+ assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5));
+ assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1));
+
+ // Overflow.
+ assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2));
+
+ // Threshold test.
+ dmp.match_threshold = 0.4;
+ assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
+
+ // dmp.`match_threshold` = 0.3;
+ dmp.match_threshold = 0.3;
+ assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
+
+ dmp.match_threshold = 0.;
+ assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1));
+
+ dmp.match_threshold = 0.5;
+
+ // Multiple select.
+ assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3));
+ assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5));
+
+ // Distance test.
+ dmp.match_distance = 10;
+ assert_eq!(
+ None,
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
+ );
+ assert_eq!(
+ Some(0),
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1)
+ );
+
+ dmp.match_distance = 1000;
+ assert_eq!(
+ Some(0),
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
+ );
+}
+
+#[test]
+fn test_match_main() {
+ let dmp = DiffMatchPatch::default();
+ // Full match.
+ // Shortcut matches.
+ assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdef", 1000));
+ assert_eq!(None, dmp.match_internal(b"", b"abcdef", 1));
+ assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"", 3));
+ assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"de", 3));
+
+ // Beyond end match.
+ assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"defy", 4));
+
+ // Oversized pattern.
+ assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdefy", 0));
+
+ // Complex match.
+ assert_eq!(
+ Some(4),
+ dmp.match_internal(
+ b"I am the very model of a modern major general.",
+ b" that berry ",
+ 5
+ )
+ );
+} \ No newline at end of file