my fork of dmp
WIP: with chars
Anubhab Bandyopadhyay 2024-08-25
parent daa2da3 · commit a443403
-rw-r--r--benches/diff.rs2
-rw-r--r--src/dmp.rs123
-rw-r--r--src/lib.rs3
-rw-r--r--src/traits.rs114
-rw-r--r--tests/test.rs198
5 files changed, 388 insertions, 52 deletions
diff --git a/benches/diff.rs b/benches/diff.rs
index dcd9330..4c58d5a 100644
--- a/benches/diff.rs
+++ b/benches/diff.rs
@@ -11,7 +11,7 @@ fn diff_main(c: &mut Criterion) {
let dmp = DiffMatchPatch::default();
c.bench_function("diff-match-patch", |bencher| {
- bencher.iter(|| dmp.diff_main(&old, &new).unwrap());
+ bencher.iter(|| dmp.diff_main_compat(&old, &new).unwrap());
});
}
diff --git a/src/dmp.rs b/src/dmp.rs
index e72b14b..c166be5 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -4,7 +4,7 @@ use std::{char, collections::HashMap, fmt::Display};
use chrono::{NaiveTime, TimeDelta, Utc};
use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
-use crate::{errors::Error, traits::BisectSplit};
+use crate::{errors::Error, DType};
// Appending controls to ensure exact same encoding as cpp variant
pub const ENCODE_SET: &AsciiSet = &CONTROLS
@@ -34,7 +34,7 @@ pub enum Ops {
/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
/// (Ops::Equal, String::new("World")) means keep world
#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>);
+pub struct Diff<T: DType>(Ops, Vec<T>);
impl Display for Diff<u8> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -47,7 +47,18 @@ impl Display for Diff<u8> {
}
}
-impl<T: Copy + Ord + Eq> Diff<T> {
+impl Display for Diff<char> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "({:?}, {})",
+ self.op(),
+ self.data().iter().collect::<String>()
+ )
+ }
+}
+
+impl<T: DType> Diff<T> {
/// Create a new diff object
pub fn new(op: Ops, data: &[T]) -> Self {
Self(op, data.to_vec())
@@ -89,6 +100,11 @@ pub struct DiffMatchPatch {
checklines: bool,
/// A default timeout in num milliseconda, defaults to 1000 (1 second)
timeout: Option<u32>,
+ /// enable/ disable `compatibility mode`
+ /// If you are preparing `patches` that need to be compatible across other `diff-match-patch` libraries enable `compatibility` mode
+ /// Compatibility mode adds some extra overhead of preparing diffs cohereant with `char` representation instead of bytes
+ /// defaults to `false`
+ compat_mode: bool,
/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
match_threshold: f32,
/// How far to search for a match (0 = exact location, 1000+ = broad match).
@@ -96,6 +112,8 @@ pub struct DiffMatchPatch {
/// 1.0 to the score (0.0 is a perfect match).
/// int Match_Distance;
match_distance: usize,
+ /// The number of bits in an int.
+ match_max_bits: usize,
/// When deleting a large block of text (over ~64 characters), how close does
/// the contents have to match the expected contents. (0.0 = perfection,
/// 1.0 = very loose). Note that `match_threshold` controls how closely the
@@ -103,8 +121,6 @@ pub struct DiffMatchPatch {
delete_threshold: f32,
/// Chunk size for context length.
patch_margin: u16,
- /// The number of bits in an int.
- match_max_bits: usize,
}
impl Default for DiffMatchPatch {
@@ -112,6 +128,7 @@ impl Default for DiffMatchPatch {
Self {
checklines: true,
timeout: Some(1000),
+ compat_mode: false,
match_threshold: 0.5,
match_distance: 1000,
match_max_bits: 32,
@@ -122,7 +139,7 @@ impl Default for DiffMatchPatch {
}
#[derive(Debug, PartialEq, Eq)]
-struct HalfMatch<'a, T: Copy + Ord + Eq> {
+struct HalfMatch<'a, T: DType> {
prefix_long: &'a [T],
suffix_long: &'a [T],
prefix_short: &'a [T],
@@ -215,13 +232,13 @@ impl DiffMatchPatch {
self.match_distance = distance
}
- pub(crate) fn diff_internal<'a>(
+ pub(crate) fn diff_internal<'a, T: DType>(
&self,
- old_bytes: &'a [u8],
- new_bytes: &'a [u8],
+ old_bytes: &'a [T],
+ new_bytes: &'a [T],
linemode: bool,
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// First, check if lhs and rhs are equal
if old_bytes == new_bytes {
if old_bytes.is_empty() {
@@ -273,13 +290,13 @@ impl DiffMatchPatch {
Ok(diffs)
}
- fn compute<'a>(
+ fn compute<'a, T: DType>(
&self,
- old: &'a [u8],
- new: &'a [u8],
+ old: &'a [T],
+ new: &'a [T],
linemode: bool,
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
return Ok(vec![Diff::insert(new)]);
@@ -355,7 +372,7 @@ impl DiffMatchPatch {
}
}
- fn half_match<'a, T: Copy + Ord + Eq>(
+ fn half_match<'a, T: DType>(
&self,
old: &'a [T],
new: &'a [T],
@@ -424,12 +441,12 @@ impl DiffMatchPatch {
// Quick line-level diff on both strings, then rediff the parts for greater accuracy
// This speedup can produce non-minimal diffs
- fn line_mode<'a>(
+ fn line_mode<'a, T: DType>(
&self,
- old: &'a [u8],
- new: &'a [u8],
+ old: &'a [T],
+ new: &'a [T],
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let mut diffs = {
let to_chars = Self::lines_to_chars(old, new);
let diffs =
@@ -629,7 +646,7 @@ impl DiffMatchPatch {
// Find the 'middle snake' of a diff, split the problem in two
// and return the recursively constructed diff.
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
- pub fn bisect<'a, T: BisectSplit>(
+ pub fn bisect<'a, T: DType>(
&self,
old: &'a [T],
new: &'a [T],
@@ -799,7 +816,7 @@ impl DiffMatchPatch {
// is at least half the length of longtext?
//idx Start index of quarter length substring within longtext.
#[inline]
- fn half_match_i<'a, T: Copy + Ord + Eq>(
+ fn half_match_i<'a, T: DType>(
long: &'a [T],
short: &'a [T],
idx: usize,
@@ -858,7 +875,7 @@ impl DiffMatchPatch {
// Reverse prefix is suffix
// TODO: investigate this further
#[inline]
- fn common_prefix<T: Copy + Ord + Eq>(lhs: &[T], rhs: &[T], reverse: bool) -> usize {
+ fn common_prefix<T: DType>(lhs: &[T], rhs: &[T], reverse: bool) -> usize {
if lhs.is_empty()
|| rhs.is_empty()
|| (!reverse && (lhs.first() != rhs.first()))
@@ -897,7 +914,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize {
+ fn common_overlap<T: DType>(lhs: &[T], rhs: &[T]) -> usize {
if lhs.is_empty() || rhs.is_empty() {
return 0;
}
@@ -949,7 +966,7 @@ impl DiffMatchPatch {
// Reduce the number of edits by eliminating semantically trivial equalities
#[inline]
- fn cleanup_semantic(diffs: &mut Vec<Diff<u8>>) {
+ fn cleanup_semantic<T: DType>(diffs: &mut Vec<Diff<T>>) {
let mut changes = false;
let mut pointer = 0_usize;
@@ -1087,7 +1104,7 @@ impl DiffMatchPatch {
// Look for single edits surrounded on both sides by equalities
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
#[inline]
- fn cleanup_semantic_lossless(diffs: &mut Vec<Diff<u8>>) {
+ fn cleanup_semantic_lossless<T: DType>(diffs: &mut Vec<Diff<T>>) {
let mut pointer = 1_usize;
let mut difflen = diffs.len();
@@ -1182,9 +1199,9 @@ impl DiffMatchPatch {
// boundary falls on logical boundaries
// Scores range from 6 (best) to 0 (worst)
#[inline]
- fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 {
- let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
- (char1 as char, char2 as char)
+ fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 {
+ let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) && let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char()) {
+ (c1, c2)
} else {
return 6;
};
@@ -1201,12 +1218,8 @@ impl DiffMatchPatch {
let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r');
let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r');
- let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n")));
- let blankline_2 = linebreak_2
- && (two.starts_with(b"\r\n\n")
- || two.starts_with(b"\r\n\r\n")
- || two.starts_with(b"\n\r\n")
- || two.starts_with(b"\n\n"));
+ let blankline_1 = linebreak_1 && T::is_linebreak_end(one);
+ let blankline_2 = linebreak_2 && T::is_linebreak_start(two);
if blankline_1 || blankline_2 {
// 5 for blank lines
@@ -1231,7 +1244,7 @@ impl DiffMatchPatch {
// Reorder and merge like edit sections. Merge equalities.
// Any edit section can move as long as it doesn't cross an equality.
#[inline]
- fn cleanup_merge<T: BisectSplit>(diffs: &mut Vec<Diff<T>>) {
+ fn cleanup_merge<T: DType>(diffs: &mut Vec<Diff<T>>) {
// Push a dummy diff ... this triggers the equality as a last step
diffs.push(Diff::equal(&[]));
@@ -1624,7 +1637,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn x_index<T: Copy + Eq + Ord>(diffs: &[Diff<T>], loc: usize) -> usize {
+ fn x_index<T: DType>(diffs: &[Diff<T>], loc: usize) -> usize {
let mut char1 = 0;
let mut char2 = 0;
@@ -1834,17 +1847,17 @@ impl DiffMatchPatch {
}
#[derive(Debug, Eq, PartialEq)]
-struct LineToChars<'a> {
+struct LineToChars<'a, T: DType> {
chars_old: Vec<usize>,
chars_new: Vec<usize>,
- lines: Vec<&'a [u8]>,
+ lines: Vec<&'a [T]>,
}
impl DiffMatchPatch {
#[inline]
- fn lines_to_chars<'a>(old: &'a [u8], new: &'a [u8]) -> LineToChars<'a> {
- let mut lines: Vec<&'a [u8]> = vec![];
- let mut linehash: HashMap<&'a [u8], usize> = HashMap::new();
+ fn lines_to_chars<'a, T: DType>(old: &'a [T], new: &'a [T]) -> LineToChars<'a, T> {
+ let mut lines: Vec<&'a [T]> = vec![];
+ let mut linehash: HashMap<&'a [T], usize> = HashMap::new();
// Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2.
// let mut maxlines = 5;
@@ -1864,10 +1877,10 @@ impl DiffMatchPatch {
}
#[inline]
- fn lines_to_chars_internal<'a>(
- text: &'a [u8],
- array: &mut Vec<&'a [u8]>,
- hash: &mut HashMap<&'a [u8], usize>,
+ fn lines_to_chars_internal<'a, T: DType>(
+ text: &'a [T],
+ array: &mut Vec<&'a [T]>,
+ hash: &mut HashMap<&'a [T], usize>,
maxlines: usize,
) -> Vec<usize> {
let take = maxlines - array.len();
@@ -1878,7 +1891,7 @@ impl DiffMatchPatch {
let mut broke = false;
let mut cursor = 0;
- text.split_inclusive(|u| *u == b'\n')
+ text.split_inclusive(|u| *u == T::from_char('\n'))
.enumerate()
.take(take)
.for_each(|(idx, line)| {
@@ -1913,7 +1926,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn chars_to_lines(diffs: &[Diff<usize>], lines: &[&[u8]]) -> Vec<Diff<u8>> {
+ fn chars_to_lines<T: DType>(diffs: &[Diff<usize>], lines: &[&[T]]) -> Vec<Diff<T>> {
diffs
.iter()
.map(|d| {
@@ -2653,6 +2666,22 @@ impl DiffMatchPatch {
)
}
+ pub fn diff_main_compat(&self, old: &str, new: &str) -> Result<Vec<Diff<char>>, crate::errors::Error> {
+ let (old, new) = (
+ char::from_str(old),
+ char::from_str(new)
+ );
+
+ self.diff_internal(
+ &old[..],
+ &new[..],
+ self.checklines(),
+ self.deadline(),
+ )
+ }
+
+
+
/// A diff of two unrelated texts can be filled with coincidental matches.
/// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")].
/// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format.
diff --git a/src/lib.rs b/src/lib.rs
index 684cf21..1b0decc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,9 @@
+#![feature(trait_alias, let_chains)]
+
pub mod dmp;
pub mod errors;
pub mod traits;
+pub(crate) use traits::DType;
pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches};
pub use errors::Error;
diff --git a/src/traits.rs b/src/traits.rs
index 8a5c9bb..20e0025 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -1,8 +1,11 @@
+use std::hash::Hash;
+
use chrono::NaiveTime;
use crate::dmp::{Diff, DiffMatchPatch};
-pub trait BisectSplit: Copy + Ord + Eq {
+
+pub trait DType: Copy + Ord + Eq + Hash {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[Self],
@@ -11,9 +14,18 @@ pub trait BisectSplit: Copy + Ord + Eq {
y: usize,
deadline: Option<NaiveTime>,
) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
+
+ fn from_char(c: char) -> Self;
+
+ fn as_char(&self) -> Option<char>;
+
+ fn from_str(str: &str) -> Vec<Self>;
+
+ fn is_linebreak_end(input: &[Self]) -> bool;
+ fn is_linebreak_start(input: &[Self]) -> bool;
}
-impl BisectSplit for u8 {
+impl DType for u8 {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[u8],
@@ -34,9 +46,82 @@ impl BisectSplit for u8 {
Ok(diffs_a)
}
+
+ fn from_char(c: char) -> Self {
+ c as u8
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self as char)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.as_bytes().to_vec()
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(b"\n\n") || input.ends_with(b"\n\r\n")
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(b"\r\n\n")
+ || input.starts_with(b"\r\n\r\n")
+ || input.starts_with(b"\n\r\n")
+ || input.starts_with(b"\n\n")
+ }
+}
+
+impl DType for char {
+ fn bisect_split(
+ dmp: &DiffMatchPatch,
+ old: &[char],
+ new: &[char],
+ x: usize,
+ y: usize,
+ deadline: Option<NaiveTime>,
+ ) -> Result<Vec<Diff<char>>, crate::errors::Error> {
+ let old_a = &old[..x];
+ let new_a = &new[..y];
+
+ let old_b = &old[x..];
+ let new_b = &new[y..];
+
+ // Compute both diffs serially.
+ let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?;
+ diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?);
+
+ Ok(diffs_a)
+ }
+
+ fn from_char(c: char) -> Self {
+ c
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.chars().collect::<Vec<_>>()
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(&['\n', '\n']) || input.ends_with(&['\n', '\r', '\n'])
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(&['\r', '\n', '\n'])
+ || input.starts_with(&['\r', '\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\n'])
+ }
}
-impl BisectSplit for usize {
+impl DType for usize {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[usize],
@@ -57,4 +142,25 @@ impl BisectSplit for usize {
Ok(diffs_a)
}
-}
+
+ fn from_char(c: char) -> Self {
+ (c as u8) as usize
+ }
+
+ fn as_char(&self) -> Option<char> {
+ char::from_digit(*self as u32, 10)
+ }
+
+ fn from_str(_: &str) -> Vec<Self> {
+ unimplemented!()
+ }
+
+ fn is_linebreak_end(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn is_linebreak_start(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+} \ No newline at end of file
diff --git a/tests/test.rs b/tests/test.rs
index fd00a4e..ce839cd 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -357,6 +357,204 @@ fn test_diff_main() -> Result<(), Error> {
}
#[test]
+fn test_diff_main_compat() -> Result<(), Error> {
+ let mut dmp = DiffMatchPatch::default();
+
+ // Perform a trivial diff.
+ // Null case.
+ assert!(dmp.diff_main_compat("", "")?.is_empty());
+
+ // Equality
+ // assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main_compat("abc", "abc")?);
+
+ // // Simple insert
+ // assert_eq!(
+ // vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
+ // dmp.diff_main_compat("abc", "ab123c")?
+ // );
+
+ // // Simple delete
+ // assert_eq!(
+ // vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
+ // dmp.diff_main_compat("a123bc", "abc")?
+ // );
+
+ // // Two insertions
+ // assert_eq!(
+ // vec![
+ // Diff::equal(b"a"),
+ // Diff::insert(b"123"),
+ // Diff::equal(b"b"),
+ // Diff::insert(b"456"),
+ // Diff::equal(b"c"),
+ // ],
+ // dmp.diff_main_compat("abc", "a123b456c")?
+ // );
+
+ // // Two deletions.
+ // assert_eq!(
+ // vec![
+ // Diff::equal(b"a"),
+ // Diff::delete(b"123"),
+ // Diff::equal(b"b"),
+ // Diff::delete(b"456"),
+ // Diff::equal(b"c"),
+ // ],
+ // dmp.diff_main_compat("a123b456c", "abc")?
+ // );
+
+ // // Perform a real diff.
+ // // Switch off the timeout.
+ // dmp.set_timeout(None);
+ // // Simple cases.
+ // assert_eq!(
+ // vec![Diff::delete(b"a"), Diff::insert(b"b"),],
+ // dmp.diff_main_compat("a", "b")?
+ // );
+
+ // assert_eq!(
+ // vec![
+ // Diff::delete(b"Apple"),
+ // Diff::insert(b"Banana"),
+ // Diff::equal(b"s are a"),
+ // Diff::insert(b"lso"),
+ // Diff::equal(b" fruit.")
+ // ],
+ // dmp.diff_main_compat("Apples are a fruit.", "Bananas are also fruit.")?
+ // );
+
+ // assert_eq!(
+ // vec![
+ // Diff::delete(b"a"),
+ // Diff::insert("\u{0680}".as_bytes()),
+ // Diff::equal(b"x"),
+ // Diff::delete(b"\t"),
+ // Diff::insert(b"\0")
+ // ],
+ // dmp.diff_main_compat("ax\t", "\u{0680}x\0")?
+ // );
+
+ // // Overlaps.
+ // assert_eq!(
+ // vec![
+ // Diff::delete(b"1"),
+ // Diff::equal(b"a"),
+ // Diff::delete(b"y"),
+ // Diff::equal(b"b"),
+ // Diff::delete(b"2"),
+ // Diff::insert(b"xab"),
+ // ],
+ // dmp.diff_main_compat("1ayb2", "abxab")?
+ // );
+
+ // assert_eq!(
+ // vec![
+ // Diff::insert(b"xaxcx"),
+ // Diff::equal(b"abc"),
+ // Diff::delete(b"y"),
+ // ],
+ // dmp.diff_main_compat("abcy", "xaxcxabc")?
+ // );
+
+ // assert_eq!(
+ // vec![
+ // Diff::delete(b"ABCD"),
+ // Diff::equal(b"a"),
+ // Diff::delete(b"="),
+ // Diff::insert(b"-"),
+ // Diff::equal(b"bcd"),
+ // Diff::delete(b"="),
+ // Diff::insert(b"-"),
+ // Diff::equal(b"efghijklmnopqrs"),
+ // Diff::delete(b"EFGHIJKLMNOefg"),
+ // ],
+ // dmp.diff_main_compat(
+ // "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
+ // "a-bcd-efghijklmnopqrs"
+ // )?
+ // );
+
+ // Large equality.
+ assert_eq!(
+ vec![
+ Diff::insert(&[' ']),
+ Diff::equal(&['a']),
+ Diff::insert(&['n','d']),
+ Diff::equal(&[' ','[','[','H','e','p','a','t','o','p','a','n','c','r','e','a','t','i','c',']',']']),
+ Diff::delete(&" and [[New".chars().collect::<Vec<_>>()[..]),
+ ],
+ dmp.diff_main_compat(
+ "a [[Hepatopancreatic]] and [[New",
+ " and [[Hepatopancreatic]]"
+ )?
+ );
+
+ // Timeout.
+ const LOW_TIMEOUT: u32 = 100;
+ dmp.set_timeout(Some(LOW_TIMEOUT));
+ let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
+ let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
+
+ let start = Utc::now().time();
+ dmp.diff_main(&a, &b)?;
+ let end = Utc::now().time();
+ // Test that we took at least the timeout period (+ 5ms being generous).
+ assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
+
+ // Test the linemode speedup.
+ // Must be long to pass the 100 char cutoff.
+ // Simple line-mode.
+ dmp.set_timeout(Some(1000));
+ let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let res_no_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let res_yes_lm = dmp.diff_main(a, b)?;
+
+ // Now, we'll run 2 checks - one for result equality
+ assert_eq!(res_no_lm, res_yes_lm);
+
+ // Single line-mode.
+ let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+ let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
+ dmp.set_checklines(false);
+ let yes_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let no_lm = dmp.diff_main(a, b)?;
+ assert_eq!(no_lm, yes_lm);
+
+ // Overlap line-mode.
+ let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let no_lm = dmp.diff_main(a, b)?;
+ dmp.set_checklines(true);
+ let yes_lm = dmp.diff_main(a, b)?;
+ assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?);
+
+ // Benefits of checklines can only be realized in text with many lines
+ let mut dmp = DiffMatchPatch::default();
+ let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap();
+ let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
+
+ let start = Instant::now();
+ let diff_yes_lm = dmp.diff_main(&old, &new);
+ let yes_lm_dur = Instant::now() - start;
+ assert!(diff_yes_lm.is_ok());
+
+ dmp.set_checklines(false);
+ let start = Instant::now();
+ let diff_no_lm = dmp.diff_main(&old, &new);
+ let no_lm_dur = Instant::now() - start;
+ assert!(diff_no_lm.is_ok());
+
+ assert!(no_lm_dur > yes_lm_dur);
+
+ Ok(())
+}
+
+#[test]
fn test_diff_delta() -> Result<(), Error> {
let diffs = vec![
Diff::equal(b"jump"),