my fork of dmp
| -rw-r--r-- | benches/diff.rs | 2 | ||||
| -rw-r--r-- | src/dmp.rs | 123 | ||||
| -rw-r--r-- | src/lib.rs | 3 | ||||
| -rw-r--r-- | src/traits.rs | 114 | ||||
| -rw-r--r-- | tests/test.rs | 198 |
5 files changed, 388 insertions, 52 deletions
diff --git a/benches/diff.rs b/benches/diff.rs index dcd9330..4c58d5a 100644 --- a/benches/diff.rs +++ b/benches/diff.rs @@ -11,7 +11,7 @@ fn diff_main(c: &mut Criterion) { let dmp = DiffMatchPatch::default(); c.bench_function("diff-match-patch", |bencher| { - bencher.iter(|| dmp.diff_main(&old, &new).unwrap()); + bencher.iter(|| dmp.diff_main_compat(&old, &new).unwrap()); }); } @@ -4,7 +4,7 @@ use std::{char, collections::HashMap, fmt::Display}; use chrono::{NaiveTime, TimeDelta, Utc}; use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS}; -use crate::{errors::Error, traits::BisectSplit}; +use crate::{errors::Error, DType}; // Appending controls to ensure exact same encoding as cpp variant pub const ENCODE_SET: &AsciiSet = &CONTROLS @@ -34,7 +34,7 @@ pub enum Ops { /// (Ops::Insert, String::new("Goodbye")) means add `Goodbye` /// (Ops::Equal, String::new("World")) means keep world #[derive(Debug, Clone, PartialEq, Eq)] -pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>); +pub struct Diff<T: DType>(Ops, Vec<T>); impl Display for Diff<u8> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -47,7 +47,18 @@ impl Display for Diff<u8> { } } -impl<T: Copy + Ord + Eq> Diff<T> { +impl Display for Diff<char> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({:?}, {})", + self.op(), + self.data().iter().collect::<String>() + ) + } +} + +impl<T: DType> Diff<T> { /// Create a new diff object pub fn new(op: Ops, data: &[T]) -> Self { Self(op, data.to_vec()) @@ -89,6 +100,11 @@ pub struct DiffMatchPatch { checklines: bool, /// A default timeout in num milliseconda, defaults to 1000 (1 second) timeout: Option<u32>, + /// enable/ disable `compatibility mode` + /// If you are preparing `patches` that need to be compatible across other `diff-match-patch` libraries enable `compatibility` mode + /// Compatibility mode adds some extra overhead of preparing diffs cohereant with `char` representation instead of bytes + /// defaults to `false` + compat_mode: bool, /// At what point is no match declared (0.0 = perfection, 1.0 = very loose). match_threshold: f32, /// How far to search for a match (0 = exact location, 1000+ = broad match). @@ -96,6 +112,8 @@ pub struct DiffMatchPatch { /// 1.0 to the score (0.0 is a perfect match). /// int Match_Distance; match_distance: usize, + /// The number of bits in an int. + match_max_bits: usize, /// When deleting a large block of text (over ~64 characters), how close does /// the contents have to match the expected contents. (0.0 = perfection, /// 1.0 = very loose). Note that `match_threshold` controls how closely the @@ -103,8 +121,6 @@ pub struct DiffMatchPatch { delete_threshold: f32, /// Chunk size for context length. patch_margin: u16, - /// The number of bits in an int. - match_max_bits: usize, } impl Default for DiffMatchPatch { @@ -112,6 +128,7 @@ impl Default for DiffMatchPatch { Self { checklines: true, timeout: Some(1000), + compat_mode: false, match_threshold: 0.5, match_distance: 1000, match_max_bits: 32, @@ -122,7 +139,7 @@ impl Default for DiffMatchPatch { } #[derive(Debug, PartialEq, Eq)] -struct HalfMatch<'a, T: Copy + Ord + Eq> { +struct HalfMatch<'a, T: DType> { prefix_long: &'a [T], suffix_long: &'a [T], prefix_short: &'a [T], @@ -215,13 +232,13 @@ impl DiffMatchPatch { self.match_distance = distance } - pub(crate) fn diff_internal<'a>( + pub(crate) fn diff_internal<'a, T: DType>( &self, - old_bytes: &'a [u8], - new_bytes: &'a [u8], + old_bytes: &'a [T], + new_bytes: &'a [T], linemode: bool, deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // First, check if lhs and rhs are equal if old_bytes == new_bytes { if old_bytes.is_empty() { @@ -273,13 +290,13 @@ impl DiffMatchPatch { Ok(diffs) } - fn compute<'a>( + fn compute<'a, T: DType>( &self, - old: &'a [u8], - new: &'a [u8], + old: &'a [T], + new: &'a [T], linemode: bool, deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // returning all of the new part if old.is_empty() { return Ok(vec![Diff::insert(new)]); @@ -355,7 +372,7 @@ impl DiffMatchPatch { } } - fn half_match<'a, T: Copy + Ord + Eq>( + fn half_match<'a, T: DType>( &self, old: &'a [T], new: &'a [T], @@ -424,12 +441,12 @@ impl DiffMatchPatch { // Quick line-level diff on both strings, then rediff the parts for greater accuracy // This speedup can produce non-minimal diffs - fn line_mode<'a>( + fn line_mode<'a, T: DType>( &self, - old: &'a [u8], - new: &'a [u8], + old: &'a [T], + new: &'a [T], deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { let mut diffs = { let to_chars = Self::lines_to_chars(old, new); let diffs = @@ -629,7 +646,7 @@ impl DiffMatchPatch { // Find the 'middle snake' of a diff, split the problem in two // and return the recursively constructed diff. // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - pub fn bisect<'a, T: BisectSplit>( + pub fn bisect<'a, T: DType>( &self, old: &'a [T], new: &'a [T], @@ -799,7 +816,7 @@ impl DiffMatchPatch { // is at least half the length of longtext? //idx Start index of quarter length substring within longtext. #[inline] - fn half_match_i<'a, T: Copy + Ord + Eq>( + fn half_match_i<'a, T: DType>( long: &'a [T], short: &'a [T], idx: usize, @@ -858,7 +875,7 @@ impl DiffMatchPatch { // Reverse prefix is suffix // TODO: investigate this further #[inline] - fn common_prefix<T: Copy + Ord + Eq>(lhs: &[T], rhs: &[T], reverse: bool) -> usize { + fn common_prefix<T: DType>(lhs: &[T], rhs: &[T], reverse: bool) -> usize { if lhs.is_empty() || rhs.is_empty() || (!reverse && (lhs.first() != rhs.first())) @@ -897,7 +914,7 @@ impl DiffMatchPatch { } #[inline] - fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize { + fn common_overlap<T: DType>(lhs: &[T], rhs: &[T]) -> usize { if lhs.is_empty() || rhs.is_empty() { return 0; } @@ -949,7 +966,7 @@ impl DiffMatchPatch { // Reduce the number of edits by eliminating semantically trivial equalities #[inline] - fn cleanup_semantic(diffs: &mut Vec<Diff<u8>>) { + fn cleanup_semantic<T: DType>(diffs: &mut Vec<Diff<T>>) { let mut changes = false; let mut pointer = 0_usize; @@ -1087,7 +1104,7 @@ impl DiffMatchPatch { // Look for single edits surrounded on both sides by equalities // e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. #[inline] - fn cleanup_semantic_lossless(diffs: &mut Vec<Diff<u8>>) { + fn cleanup_semantic_lossless<T: DType>(diffs: &mut Vec<Diff<T>>) { let mut pointer = 1_usize; let mut difflen = diffs.len(); @@ -1182,9 +1199,9 @@ impl DiffMatchPatch { // boundary falls on logical boundaries // Scores range from 6 (best) to 0 (worst) #[inline] - fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 { - let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) { - (char1 as char, char2 as char) + fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 { + let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) && let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char()) { + (c1, c2) } else { return 6; }; @@ -1201,12 +1218,8 @@ impl DiffMatchPatch { let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r'); let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r'); - let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n"))); - let blankline_2 = linebreak_2 - && (two.starts_with(b"\r\n\n") - || two.starts_with(b"\r\n\r\n") - || two.starts_with(b"\n\r\n") - || two.starts_with(b"\n\n")); + let blankline_1 = linebreak_1 && T::is_linebreak_end(one); + let blankline_2 = linebreak_2 && T::is_linebreak_start(two); if blankline_1 || blankline_2 { // 5 for blank lines @@ -1231,7 +1244,7 @@ impl DiffMatchPatch { // Reorder and merge like edit sections. Merge equalities. // Any edit section can move as long as it doesn't cross an equality. #[inline] - fn cleanup_merge<T: BisectSplit>(diffs: &mut Vec<Diff<T>>) { + fn cleanup_merge<T: DType>(diffs: &mut Vec<Diff<T>>) { // Push a dummy diff ... this triggers the equality as a last step diffs.push(Diff::equal(&[])); @@ -1624,7 +1637,7 @@ impl DiffMatchPatch { } #[inline] - fn x_index<T: Copy + Eq + Ord>(diffs: &[Diff<T>], loc: usize) -> usize { + fn x_index<T: DType>(diffs: &[Diff<T>], loc: usize) -> usize { let mut char1 = 0; let mut char2 = 0; @@ -1834,17 +1847,17 @@ impl DiffMatchPatch { } #[derive(Debug, Eq, PartialEq)] -struct LineToChars<'a> { +struct LineToChars<'a, T: DType> { chars_old: Vec<usize>, chars_new: Vec<usize>, - lines: Vec<&'a [u8]>, + lines: Vec<&'a [T]>, } impl DiffMatchPatch { #[inline] - fn lines_to_chars<'a>(old: &'a [u8], new: &'a [u8]) -> LineToChars<'a> { - let mut lines: Vec<&'a [u8]> = vec![]; - let mut linehash: HashMap<&'a [u8], usize> = HashMap::new(); + fn lines_to_chars<'a, T: DType>(old: &'a [T], new: &'a [T]) -> LineToChars<'a, T> { + let mut lines: Vec<&'a [T]> = vec![]; + let mut linehash: HashMap<&'a [T], usize> = HashMap::new(); // Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2. // let mut maxlines = 5; @@ -1864,10 +1877,10 @@ impl DiffMatchPatch { } #[inline] - fn lines_to_chars_internal<'a>( - text: &'a [u8], - array: &mut Vec<&'a [u8]>, - hash: &mut HashMap<&'a [u8], usize>, + fn lines_to_chars_internal<'a, T: DType>( + text: &'a [T], + array: &mut Vec<&'a [T]>, + hash: &mut HashMap<&'a [T], usize>, maxlines: usize, ) -> Vec<usize> { let take = maxlines - array.len(); @@ -1878,7 +1891,7 @@ impl DiffMatchPatch { let mut broke = false; let mut cursor = 0; - text.split_inclusive(|u| *u == b'\n') + text.split_inclusive(|u| *u == T::from_char('\n')) .enumerate() .take(take) .for_each(|(idx, line)| { @@ -1913,7 +1926,7 @@ impl DiffMatchPatch { } #[inline] - fn chars_to_lines(diffs: &[Diff<usize>], lines: &[&[u8]]) -> Vec<Diff<u8>> { + fn chars_to_lines<T: DType>(diffs: &[Diff<usize>], lines: &[&[T]]) -> Vec<Diff<T>> { diffs .iter() .map(|d| { @@ -2653,6 +2666,22 @@ impl DiffMatchPatch { ) } + pub fn diff_main_compat(&self, old: &str, new: &str) -> Result<Vec<Diff<char>>, crate::errors::Error> { + let (old, new) = ( + char::from_str(old), + char::from_str(new) + ); + + self.diff_internal( + &old[..], + &new[..], + self.checklines(), + self.deadline(), + ) + } + + + /// A diff of two unrelated texts can be filled with coincidental matches. /// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")]. /// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format. @@ -1,6 +1,9 @@ +#![feature(trait_alias, let_chains)] + pub mod dmp; pub mod errors; pub mod traits; +pub(crate) use traits::DType; pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches}; pub use errors::Error; diff --git a/src/traits.rs b/src/traits.rs index 8a5c9bb..20e0025 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,8 +1,11 @@ +use std::hash::Hash; + use chrono::NaiveTime; use crate::dmp::{Diff, DiffMatchPatch}; -pub trait BisectSplit: Copy + Ord + Eq { + +pub trait DType: Copy + Ord + Eq + Hash { fn bisect_split( dmp: &DiffMatchPatch, old: &[Self], @@ -11,9 +14,18 @@ pub trait BisectSplit: Copy + Ord + Eq { y: usize, deadline: Option<NaiveTime>, ) -> Result<Vec<Diff<Self>>, crate::errors::Error>; + + fn from_char(c: char) -> Self; + + fn as_char(&self) -> Option<char>; + + fn from_str(str: &str) -> Vec<Self>; + + fn is_linebreak_end(input: &[Self]) -> bool; + fn is_linebreak_start(input: &[Self]) -> bool; } -impl BisectSplit for u8 { +impl DType for u8 { fn bisect_split( dmp: &DiffMatchPatch, old: &[u8], @@ -34,9 +46,82 @@ impl BisectSplit for u8 { Ok(diffs_a) } + + fn from_char(c: char) -> Self { + c as u8 + } + + fn as_char(&self) -> Option<char> { + Some(*self as char) + } + + fn from_str(str: &str) -> Vec<Self> { + str.as_bytes().to_vec() + } + + #[inline] + fn is_linebreak_end(input: &[Self]) -> bool { + input.ends_with(b"\n\n") || input.ends_with(b"\n\r\n") + } + + #[inline] + fn is_linebreak_start(input: &[Self]) -> bool { + input.starts_with(b"\r\n\n") + || input.starts_with(b"\r\n\r\n") + || input.starts_with(b"\n\r\n") + || input.starts_with(b"\n\n") + } +} + +impl DType for char { + fn bisect_split( + dmp: &DiffMatchPatch, + old: &[char], + new: &[char], + x: usize, + y: usize, + deadline: Option<NaiveTime>, + ) -> Result<Vec<Diff<char>>, crate::errors::Error> { + let old_a = &old[..x]; + let new_a = &new[..y]; + + let old_b = &old[x..]; + let new_b = &new[y..]; + + // Compute both diffs serially. + let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?; + diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?); + + Ok(diffs_a) + } + + fn from_char(c: char) -> Self { + c + } + + fn as_char(&self) -> Option<char> { + Some(*self) + } + + fn from_str(str: &str) -> Vec<Self> { + str.chars().collect::<Vec<_>>() + } + + #[inline] + fn is_linebreak_end(input: &[Self]) -> bool { + input.ends_with(&['\n', '\n']) || input.ends_with(&['\n', '\r', '\n']) + } + + #[inline] + fn is_linebreak_start(input: &[Self]) -> bool { + input.starts_with(&['\r', '\n', '\n']) + || input.starts_with(&['\r', '\n', '\r', '\n']) + || input.starts_with(&['\n', '\r', '\n']) + || input.starts_with(&['\n', '\n']) + } } -impl BisectSplit for usize { +impl DType for usize { fn bisect_split( dmp: &DiffMatchPatch, old: &[usize], @@ -57,4 +142,25 @@ impl BisectSplit for usize { Ok(diffs_a) } -} + + fn from_char(c: char) -> Self { + (c as u8) as usize + } + + fn as_char(&self) -> Option<char> { + char::from_digit(*self as u32, 10) + } + + fn from_str(_: &str) -> Vec<Self> { + unimplemented!() + } + + fn is_linebreak_end(_: &[Self]) -> bool { + unimplemented!() + } + + #[inline] + fn is_linebreak_start(_: &[Self]) -> bool { + unimplemented!() + } +}
\ No newline at end of file diff --git a/tests/test.rs b/tests/test.rs index fd00a4e..ce839cd 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -357,6 +357,204 @@ fn test_diff_main() -> Result<(), Error> { } #[test] +fn test_diff_main_compat() -> Result<(), Error> { + let mut dmp = DiffMatchPatch::default(); + + // Perform a trivial diff. + // Null case. + assert!(dmp.diff_main_compat("", "")?.is_empty()); + + // Equality + // assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main_compat("abc", "abc")?); + + // // Simple insert + // assert_eq!( + // vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")], + // dmp.diff_main_compat("abc", "ab123c")? + // ); + + // // Simple delete + // assert_eq!( + // vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")], + // dmp.diff_main_compat("a123bc", "abc")? + // ); + + // // Two insertions + // assert_eq!( + // vec![ + // Diff::equal(b"a"), + // Diff::insert(b"123"), + // Diff::equal(b"b"), + // Diff::insert(b"456"), + // Diff::equal(b"c"), + // ], + // dmp.diff_main_compat("abc", "a123b456c")? + // ); + + // // Two deletions. + // assert_eq!( + // vec![ + // Diff::equal(b"a"), + // Diff::delete(b"123"), + // Diff::equal(b"b"), + // Diff::delete(b"456"), + // Diff::equal(b"c"), + // ], + // dmp.diff_main_compat("a123b456c", "abc")? + // ); + + // // Perform a real diff. + // // Switch off the timeout. + // dmp.set_timeout(None); + // // Simple cases. + // assert_eq!( + // vec![Diff::delete(b"a"), Diff::insert(b"b"),], + // dmp.diff_main_compat("a", "b")? + // ); + + // assert_eq!( + // vec![ + // Diff::delete(b"Apple"), + // Diff::insert(b"Banana"), + // Diff::equal(b"s are a"), + // Diff::insert(b"lso"), + // Diff::equal(b" fruit.") + // ], + // dmp.diff_main_compat("Apples are a fruit.", "Bananas are also fruit.")? + // ); + + // assert_eq!( + // vec![ + // Diff::delete(b"a"), + // Diff::insert("\u{0680}".as_bytes()), + // Diff::equal(b"x"), + // Diff::delete(b"\t"), + // Diff::insert(b"\0") + // ], + // dmp.diff_main_compat("ax\t", "\u{0680}x\0")? + // ); + + // // Overlaps. + // assert_eq!( + // vec![ + // Diff::delete(b"1"), + // Diff::equal(b"a"), + // Diff::delete(b"y"), + // Diff::equal(b"b"), + // Diff::delete(b"2"), + // Diff::insert(b"xab"), + // ], + // dmp.diff_main_compat("1ayb2", "abxab")? + // ); + + // assert_eq!( + // vec![ + // Diff::insert(b"xaxcx"), + // Diff::equal(b"abc"), + // Diff::delete(b"y"), + // ], + // dmp.diff_main_compat("abcy", "xaxcxabc")? + // ); + + // assert_eq!( + // vec![ + // Diff::delete(b"ABCD"), + // Diff::equal(b"a"), + // Diff::delete(b"="), + // Diff::insert(b"-"), + // Diff::equal(b"bcd"), + // Diff::delete(b"="), + // Diff::insert(b"-"), + // Diff::equal(b"efghijklmnopqrs"), + // Diff::delete(b"EFGHIJKLMNOefg"), + // ], + // dmp.diff_main_compat( + // "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", + // "a-bcd-efghijklmnopqrs" + // )? + // ); + + // Large equality. + assert_eq!( + vec![ + Diff::insert(&[' ']), + Diff::equal(&['a']), + Diff::insert(&['n','d']), + Diff::equal(&[' ','[','[','H','e','p','a','t','o','p','a','n','c','r','e','a','t','i','c',']',']']), + Diff::delete(&" and [[New".chars().collect::<Vec<_>>()[..]), + ], + dmp.diff_main_compat( + "a [[Hepatopancreatic]] and [[New", + " and [[Hepatopancreatic]]" + )? + ); + + // Timeout. + const LOW_TIMEOUT: u32 = 100; + dmp.set_timeout(Some(LOW_TIMEOUT)); + let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); + let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); + + let start = Utc::now().time(); + dmp.diff_main(&a, &b)?; + let end = Utc::now().time(); + // Test that we took at least the timeout period (+ 5ms being generous). + assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + // Simple line-mode. + dmp.set_timeout(Some(1000)); + let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + dmp.set_checklines(false); + let res_no_lm = dmp.diff_main(a, b)?; + dmp.set_checklines(true); + let res_yes_lm = dmp.diff_main(a, b)?; + + // Now, we'll run 2 checks - one for result equality + assert_eq!(res_no_lm, res_yes_lm); + + // Single line-mode. + let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + dmp.set_checklines(false); + let yes_lm = dmp.diff_main(a, b)?; + dmp.set_checklines(true); + let no_lm = dmp.diff_main(a, b)?; + assert_eq!(no_lm, yes_lm); + + // Overlap line-mode. + let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + dmp.set_checklines(false); + let no_lm = dmp.diff_main(a, b)?; + dmp.set_checklines(true); + let yes_lm = dmp.diff_main(a, b)?; + assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); + + // Benefits of checklines can only be realized in text with many lines + let mut dmp = DiffMatchPatch::default(); + let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap(); + let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap(); + + let start = Instant::now(); + let diff_yes_lm = dmp.diff_main(&old, &new); + let yes_lm_dur = Instant::now() - start; + assert!(diff_yes_lm.is_ok()); + + dmp.set_checklines(false); + let start = Instant::now(); + let diff_no_lm = dmp.diff_main(&old, &new); + let no_lm_dur = Instant::now() - start; + assert!(diff_no_lm.is_ok()); + + assert!(no_lm_dur > yes_lm_dur); + + Ok(()) +} + +#[test] fn test_diff_delta() -> Result<(), Error> { let diffs = vec![ Diff::equal(b"jump"), |