my fork of dmp
| -rw-r--r-- | benches/diff.rs | 4 | ||||
| -rw-r--r-- | src/dmp.rs | 423 | ||||
| -rw-r--r-- | src/lib.rs | 4 | ||||
| -rw-r--r-- | src/traits.rs | 200 | ||||
| -rw-r--r-- | tests/test.rs | 547 |
5 files changed, 912 insertions, 266 deletions
diff --git a/benches/diff.rs b/benches/diff.rs index dcd9330..f426c3c 100644 --- a/benches/diff.rs +++ b/benches/diff.rs @@ -1,7 +1,7 @@ use std::path::Path; use criterion::{criterion_group, criterion_main, Criterion}; -use diff_match_patch_rs::dmp::DiffMatchPatch; +use diff_match_patch_rs::{dmp::DiffMatchPatch, Efficient}; fn diff_main(c: &mut Criterion) { let basedir = Path::new("testdata"); @@ -11,7 +11,7 @@ fn diff_main(c: &mut Criterion) { let dmp = DiffMatchPatch::default(); c.bench_function("diff-match-patch", |bencher| { - bencher.iter(|| dmp.diff_main(&old, &new).unwrap()); + bencher.iter(|| dmp.diff_main::<Efficient>(&old, &new).unwrap()); }); } @@ -2,24 +2,8 @@ use core::str; use std::{char, collections::HashMap, fmt::Display}; use chrono::{NaiveTime, TimeDelta, Utc}; -use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS}; - -use crate::{errors::Error, traits::BisectSplit}; - -// Appending controls to ensure exact same encoding as cpp variant -pub const ENCODE_SET: &AsciiSet = &CONTROLS - .add(b'"') - .add(b'<') - .add(b'>') - .add(b'`') - .add(b'{') - .add(b'}') - .add(b'%') - .add(b'[') - .add(b'\\') - .add(b']') - .add(b'^') - .add(b'|'); + +use crate::{errors::Error, DType}; /// Enum representing the different ops of diff #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -34,7 +18,7 @@ pub enum Ops { /// (Ops::Insert, String::new("Goodbye")) means add `Goodbye` /// (Ops::Equal, String::new("World")) means keep world #[derive(Debug, Clone, PartialEq, Eq)] -pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>); +pub struct Diff<T: DType>(Ops, Vec<T>); impl Display for Diff<u8> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -47,7 +31,18 @@ impl Display for Diff<u8> { } } -impl<T: Copy + Ord + Eq> Diff<T> { +impl Display for Diff<char> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({:?}, {})", + self.op(), + self.data().iter().collect::<String>() + ) + } +} + +impl<T: DType> Diff<T> { /// Create a new diff object pub fn new(op: Ops, data: &[T]) -> Self { Self(op, data.to_vec()) @@ -96,15 +91,15 @@ pub struct DiffMatchPatch { /// 1.0 to the score (0.0 is a perfect match). /// int Match_Distance; match_distance: usize, + /// The number of bits in an int. + match_max_bits: usize, /// When deleting a large block of text (over ~64 characters), how close does /// the contents have to match the expected contents. (0.0 = perfection, /// 1.0 = very loose). Note that `match_threshold` controls how closely the /// end points of a delete need to match. delete_threshold: f32, /// Chunk size for context length. - patch_margin: u16, - /// The number of bits in an int. - match_max_bits: usize, + patch_margin: u8, } impl Default for DiffMatchPatch { @@ -122,7 +117,7 @@ impl Default for DiffMatchPatch { } #[derive(Debug, PartialEq, Eq)] -struct HalfMatch<'a, T: Copy + Ord + Eq> { +struct HalfMatch<'a, T: DType> { prefix_long: &'a [T], suffix_long: &'a [T], prefix_short: &'a [T], @@ -180,7 +175,7 @@ impl DiffMatchPatch { } // returns the current patch margin - fn patch_margin(&self) -> u16 { + fn patch_margin(&self) -> u8 { self.patch_margin } @@ -215,13 +210,13 @@ impl DiffMatchPatch { self.match_distance = distance } - pub(crate) fn diff_internal<'a>( + pub(crate) fn diff_internal<'a, T: DType>( &self, - old_bytes: &'a [u8], - new_bytes: &'a [u8], + old_bytes: &'a [T], + new_bytes: &'a [T], linemode: bool, deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // First, check if lhs and rhs are equal if old_bytes == new_bytes { if old_bytes.is_empty() { @@ -273,13 +268,13 @@ impl DiffMatchPatch { Ok(diffs) } - fn compute<'a>( + fn compute<'a, T: DType>( &self, - old: &'a [u8], - new: &'a [u8], + old: &'a [T], + new: &'a [T], linemode: bool, deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // returning all of the new part if old.is_empty() { return Ok(vec![Diff::insert(new)]); @@ -355,11 +350,7 @@ impl DiffMatchPatch { } } - fn half_match<'a, T: Copy + Ord + Eq>( - &self, - old: &'a [T], - new: &'a [T], - ) -> Option<HalfMatch<'a, T>> { + fn half_match<'a, T: DType>(&self, old: &'a [T], new: &'a [T]) -> Option<HalfMatch<'a, T>> { // Don't risk returning a suboptimal diff when we have unlimited time self.timeout()?; @@ -424,12 +415,12 @@ impl DiffMatchPatch { // Quick line-level diff on both strings, then rediff the parts for greater accuracy // This speedup can produce non-minimal diffs - fn line_mode<'a>( + fn line_mode<'a, T: DType>( &self, - old: &'a [u8], - new: &'a [u8], + old: &'a [T], + new: &'a [T], deadline: Option<NaiveTime>, - ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { let mut diffs = { let to_chars = Self::lines_to_chars(old, new); let diffs = @@ -629,7 +620,7 @@ impl DiffMatchPatch { // Find the 'middle snake' of a diff, split the problem in two // and return the recursively constructed diff. // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - pub fn bisect<'a, T: BisectSplit>( + pub fn bisect<'a, T: DType>( &self, old: &'a [T], new: &'a [T], @@ -799,7 +790,7 @@ impl DiffMatchPatch { // is at least half the length of longtext? //idx Start index of quarter length substring within longtext. #[inline] - fn half_match_i<'a, T: Copy + Ord + Eq>( + fn half_match_i<'a, T: DType>( long: &'a [T], short: &'a [T], idx: usize, @@ -858,7 +849,7 @@ impl DiffMatchPatch { // Reverse prefix is suffix // TODO: investigate this further #[inline] - fn common_prefix<T: Copy + Ord + Eq>(lhs: &[T], rhs: &[T], reverse: bool) -> usize { + fn common_prefix<T: DType>(lhs: &[T], rhs: &[T], reverse: bool) -> usize { if lhs.is_empty() || rhs.is_empty() || (!reverse && (lhs.first() != rhs.first())) @@ -897,7 +888,7 @@ impl DiffMatchPatch { } #[inline] - fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize { + fn common_overlap<T: DType>(lhs: &[T], rhs: &[T]) -> usize { if lhs.is_empty() || rhs.is_empty() { return 0; } @@ -949,7 +940,7 @@ impl DiffMatchPatch { // Reduce the number of edits by eliminating semantically trivial equalities #[inline] - fn cleanup_semantic(diffs: &mut Vec<Diff<u8>>) { + fn cleanup_semantic<T: DType>(diffs: &mut Vec<Diff<T>>) { let mut changes = false; let mut pointer = 0_usize; @@ -1087,7 +1078,7 @@ impl DiffMatchPatch { // Look for single edits surrounded on both sides by equalities // e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. #[inline] - fn cleanup_semantic_lossless(diffs: &mut Vec<Diff<u8>>) { + fn cleanup_semantic_lossless<T: DType>(diffs: &mut Vec<Diff<T>>) { let mut pointer = 1_usize; let mut difflen = diffs.len(); @@ -1182,9 +1173,11 @@ impl DiffMatchPatch { // boundary falls on logical boundaries // Scores range from 6 (best) to 0 (worst) #[inline] - fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 { - let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) { - (char1 as char, char2 as char) + fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 { + let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) + && let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char()) + { + (c1, c2) } else { return 6; }; @@ -1201,12 +1194,8 @@ impl DiffMatchPatch { let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r'); let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r'); - let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n"))); - let blankline_2 = linebreak_2 - && (two.starts_with(b"\r\n\n") - || two.starts_with(b"\r\n\r\n") - || two.starts_with(b"\n\r\n") - || two.starts_with(b"\n\n")); + let blankline_1 = linebreak_1 && T::is_linebreak_end(one); + let blankline_2 = linebreak_2 && T::is_linebreak_start(two); if blankline_1 || blankline_2 { // 5 for blank lines @@ -1231,7 +1220,7 @@ impl DiffMatchPatch { // Reorder and merge like edit sections. Merge equalities. // Any edit section can move as long as it doesn't cross an equality. #[inline] - fn cleanup_merge<T: BisectSplit>(diffs: &mut Vec<Diff<T>>) { + fn cleanup_merge<T: DType>(diffs: &mut Vec<Diff<T>>) { // Push a dummy diff ... this triggers the equality as a last step diffs.push(Diff::equal(&[])); @@ -1407,26 +1396,28 @@ impl DiffMatchPatch { } } - pub fn to_delta(diffs: &[Diff<u8>]) -> Vec<u8> { + pub fn to_delta<T: DType>(diffs: &[Diff<T>]) -> Vec<T> { let mut data = diffs .iter() .map(|diff| { match diff.op() { Ops::Insert => { - let encoded = percent_encode(diff.data(), ENCODE_SET) - .map(|v| v.as_bytes()) - .collect::<Vec<_>>() - .concat(); + let encoded = T::percent_encode(diff.data()); // format!("+{encoded}") - ["+".as_bytes(), &encoded, "\t".as_bytes()].concat() - } - Ops::Delete => { - [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() - } - Ops::Equal => { - // format!("={}", diff.size()) - [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() + [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat() } + Ops::Delete => [ + &[T::from_char('-')], + &T::from_str(diff.size().to_string().as_str())[..], + &[T::from_char('\t')], + ] + .concat(), + Ops::Equal => [ + &[T::from_char('=')], + &T::from_str(diff.size().to_string().as_str())[..], + &[T::from_char('\t')], + ] + .concat(), } }) .collect::<Vec<_>>() @@ -1437,11 +1428,14 @@ impl DiffMatchPatch { data } - pub fn from_delta(old: &[u8], delta: &[u8]) -> Result<Vec<Diff<u8>>, crate::errors::Error> { + pub fn from_delta<T: DType>( + old: &[T], + delta: &[T], + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { let mut pointer = 0; // cursor to text let mut diffs = vec![]; - for token in delta.split(|&k| k == b'\t') { + for token in delta.split(|&k| k == T::from_char('\t')) { if token.is_empty() { continue; } @@ -1451,22 +1445,13 @@ impl DiffMatchPatch { let opcode = token.first(); let param = &token[1..]; - if opcode == Some(&b'+') { - let param = percent_decode(param).collect::<Vec<_>>(); + if opcode == Some(&T::from_char('+')) { + let param = T::percent_decode(param); diffs.push(Diff::insert(¶m)); - } else if opcode == Some(&b'-') || opcode == Some(&b'=') { - let n = match std::str::from_utf8(param) - .map_err(|_| crate::errors::Error::Utf8Error) - .and_then(|t| { - t.parse::<isize>() - .map_err(|_| crate::errors::Error::InvalidInput) - }) { - Ok(n) => n, - Err(_) => { - return Err(crate::errors::Error::InvalidInput); - } - }; - + } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) { + let n = T::to_string(param)? + .parse::<isize>() + .map_err(|_| Error::Utf8Error)?; if n < 0 { return Err(crate::errors::Error::InvalidInput); } @@ -1480,7 +1465,7 @@ impl DiffMatchPatch { let txt = &old[pointer..new_pointer]; pointer = new_pointer; - if opcode == Some(&b'=') { + if opcode == Some(&T::from_char('=')) { diffs.push(Diff::equal(txt)) } else { diffs.push(Diff::delete(txt)) @@ -1624,7 +1609,7 @@ impl DiffMatchPatch { } #[inline] - fn x_index<T: Copy + Eq + Ord>(diffs: &[Diff<T>], loc: usize) -> usize { + fn x_index<T: DType>(diffs: &[Diff<T>], loc: usize) -> usize { let mut char1 = 0; let mut char2 = 0; @@ -1666,7 +1651,7 @@ impl DiffMatchPatch { } #[inline] - pub fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> { + pub fn diff_text_old<T: DType>(diffs: &[Diff<T>]) -> Vec<T> { diffs .iter() .filter_map(|diff| { @@ -1680,8 +1665,7 @@ impl DiffMatchPatch { .concat() } - - pub fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> { + pub fn diff_text_new<T: DType>(diffs: &[Diff<T>]) -> Vec<T> { diffs .iter() .filter_map(|diff| { @@ -1699,7 +1683,7 @@ impl DiffMatchPatch { // limit of the match algorithm. // Intended to be called only from within patch_apply. #[inline] - fn split_max(&self, patches: &mut Patches) { + fn split_max<T: DType>(&self, patches: &mut Patches<T>) { let max_bit = self.match_max_bits(); let patch_margin = self.patch_margin() as usize; @@ -1834,17 +1818,17 @@ impl DiffMatchPatch { } #[derive(Debug, Eq, PartialEq)] -struct LineToChars<'a> { +struct LineToChars<'a, T: DType> { chars_old: Vec<usize>, chars_new: Vec<usize>, - lines: Vec<&'a [u8]>, + lines: Vec<&'a [T]>, } impl DiffMatchPatch { #[inline] - fn lines_to_chars<'a>(old: &'a [u8], new: &'a [u8]) -> LineToChars<'a> { - let mut lines: Vec<&'a [u8]> = vec![]; - let mut linehash: HashMap<&'a [u8], usize> = HashMap::new(); + fn lines_to_chars<'a, T: DType>(old: &'a [T], new: &'a [T]) -> LineToChars<'a, T> { + let mut lines: Vec<&'a [T]> = vec![]; + let mut linehash: HashMap<&'a [T], usize> = HashMap::new(); // Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2. // let mut maxlines = 5; @@ -1864,10 +1848,10 @@ impl DiffMatchPatch { } #[inline] - fn lines_to_chars_internal<'a>( - text: &'a [u8], - array: &mut Vec<&'a [u8]>, - hash: &mut HashMap<&'a [u8], usize>, + fn lines_to_chars_internal<'a, T: DType>( + text: &'a [T], + array: &mut Vec<&'a [T]>, + hash: &mut HashMap<&'a [T], usize>, maxlines: usize, ) -> Vec<usize> { let take = maxlines - array.len(); @@ -1878,7 +1862,7 @@ impl DiffMatchPatch { let mut broke = false; let mut cursor = 0; - text.split_inclusive(|u| *u == b'\n') + text.split_inclusive(|u| *u == T::from_char('\n')) .enumerate() .take(take) .for_each(|(idx, line)| { @@ -1913,7 +1897,7 @@ impl DiffMatchPatch { } #[inline] - fn chars_to_lines(diffs: &[Diff<usize>], lines: &[&[u8]]) -> Vec<Diff<u8>> { + fn chars_to_lines<T: DType>(diffs: &[Diff<usize>], lines: &[&[T]]) -> Vec<Diff<T>> { diffs .iter() .map(|d| { @@ -1936,7 +1920,7 @@ impl DiffMatchPatch { // Match methods impl DiffMatchPatch { - fn match_internal(&self, text: &[u8], pattern: &[u8], loc: usize) -> Option<usize> { + fn match_internal<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> { // Check for null inputs. // Nothing to match. if text.is_empty() { @@ -1959,7 +1943,7 @@ impl DiffMatchPatch { } } - fn match_bitap(&self, text: &[u8], pattern: &[u8], loc: usize) -> Option<usize> { + fn match_bitap<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> { if pattern.len() > self.match_max_bits() { todo!("Throw error"); } @@ -2085,7 +2069,7 @@ impl DiffMatchPatch { best_loc } - fn match_alphabet(pattern: &[u8]) -> HashMap<u8, usize> { + fn match_alphabet<T: DType>(pattern: &[T]) -> HashMap<T, usize> { let mut map = HashMap::with_capacity(pattern.len()); pattern.iter().enumerate().for_each(|(i, &p)| { @@ -2115,16 +2099,28 @@ impl DiffMatchPatch { } // Patch Methods -#[derive(Debug, Default, Clone)] -pub struct Patch { - diffs: Vec<Diff<u8>>, +#[derive(Debug, Clone)] +pub struct Patch<T: DType> { + diffs: Vec<Diff<T>>, start1: usize, start2: usize, length1: usize, length2: usize, } -impl Display for Patch { +impl<T: DType> Default for Patch<T> { + fn default() -> Self { + Self { + diffs: Vec::new(), + start1: 0, + start2: 0, + length1: 0, + length2: 0, + } + } +} + +impl<T: DType> Display for Patch<T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let coord1 = if self.length1 == 0 { format!("{},0", self.start1) @@ -2161,37 +2157,43 @@ impl Display for Patch { coord2, " @@\n".to_string(), ]; - self.diffs.iter().for_each(|diff| { + for diff in self.diffs.iter() { let sign = match diff.op() { Ops::Insert => '+', Ops::Delete => '-', Ops::Equal => ' ', }; - let segment = format!("{sign}{}\n", percent_encode(diff.data(), ENCODE_SET)); + let enc = T::percent_encode(diff.data()); + let segment = format!( + "{sign}{}\n", + T::to_string(&enc).map_err(|_| std::fmt::Error)? + ); segments.push(segment) - }); + } write!(f, "{}", segments.join("")) } } -impl Patch { - pub fn diffs(&self) -> &[Diff<u8>] { +impl<T: DType> Patch<T> { + pub fn diffs(&self) -> &[Diff<T>] { &self.diffs[..] } } -pub enum PatchInput<'a> { +pub enum PatchInput<'a, T: DType> { Texts(&'a str, &'a str), - Diffs(&'a [Diff<u8>]), - TextDiffs(&'a str, &'a [Diff<u8>]), + Diffs(&'a [Diff<T>]), + TextDiffs(&'a str, &'a [Diff<T>]), } -pub type Patches = Vec<Patch>; +pub type Patches<T> = Vec<Patch<T>>; impl DiffMatchPatch { - fn parse_patch_header(s: &[u8]) -> Option<(usize, Option<usize>, usize, Option<usize>)> { + fn parse_patch_header<T: DType>( + s: &[T], + ) -> Option<(usize, Option<usize>, usize, Option<usize>)> { let mut section = Vec::with_capacity(64); let mut current_sect = 0; @@ -2201,10 +2203,10 @@ impl DiffMatchPatch { let mut new_cols = None; for &c in s.iter() { - if c == b' ' { + if c == T::from_char(' ') { match current_sect { 0 => { - if §ion != b"@@" { + if section != T::from_str("@@") { return None; } } @@ -2213,23 +2215,29 @@ impl DiffMatchPatch { return None; } - let splits = section[1..].split(|&p| p == b',').collect::<Vec<_>>(); + let splits = section[1..] + .split(|&p| p == T::from_char(',')) + .collect::<Vec<_>>(); let ol = splits.first()?; - old_line = std::str::from_utf8(ol).ok()?.parse::<usize>().ok()?; + old_line = T::to_string(ol).ok()?.parse::<usize>().ok()?; if let Some(&oc) = splits.get(1) { - old_cols = Some(std::str::from_utf8(oc).ok()?.parse::<usize>().ok()?); + old_cols = Some(T::to_string(oc).ok()?.parse::<usize>().ok()?); } } 2 => { - let splits = section[if *section.first()? == b'+' { 1 } else { 0 }..] - .split(|&p| p == b',') + let splits = section[if *section.first()? == T::from_char('+') { + 1 + } else { + 0 + }..] + .split(|&p| p == T::from_char(',')) .collect::<Vec<_>>(); let nl = splits.first()?; - new_line = std::str::from_utf8(nl).ok()?.parse::<usize>().ok()?; + new_line = T::to_string(nl).ok()?.parse::<usize>().ok()?; if let Some(&nc) = splits.get(1) { - new_cols = Some(std::str::from_utf8(nc).ok()?.parse::<usize>().ok()?); + new_cols = Some(T::to_string(nc).ok()?.parse::<usize>().ok()?); } } _ => { @@ -2243,25 +2251,25 @@ impl DiffMatchPatch { continue; } - if current_sect == 1 && section.is_empty() && c != b'-' { + if current_sect == 1 && section.is_empty() && c != T::from_char('-') { return None; } section.push(c); } - if §ion != b"@@" { + if section != T::from_str("@@") { return None; } Some((old_line, old_cols, new_line, new_cols)) } - fn patch_make_internal( + fn patch_make_internal<T: DType>( &self, - txt: &[u8], - diffs: &[Diff<u8>], - ) -> Result<Patches, crate::errors::Error> { + txt: &[T], + diffs: &[Diff<T>], + ) -> Result<Patches<T>, crate::errors::Error> { // No diffs -> no patches if diffs.is_empty() { return Ok(Vec::new()); @@ -2275,8 +2283,8 @@ impl DiffMatchPatch { let mut char_n1 = 0; let mut char_n2 = 0; - let mut prepatch: Vec<u8> = txt.to_vec(); - let mut postpatch: Vec<u8> = prepatch.clone(); + let mut prepatch: Vec<T> = txt.to_vec(); + let mut postpatch: Vec<T> = prepatch.clone(); diffs.iter().enumerate().for_each(|(idx, diff)| { // a new patch starts here @@ -2342,7 +2350,7 @@ impl DiffMatchPatch { Ok(patches) } - fn patch_add_context(&self, patch: &mut Patch, text: &[u8]) { + fn patch_add_context<T: DType>(&self, patch: &mut Patch<T>, text: &[T]) { if text.is_empty() { return; } @@ -2409,11 +2417,11 @@ impl DiffMatchPatch { patch.length2 += prefix.len() + suffix.len(); } - fn patch_apply_internal( + fn patch_apply_internal<T: DType>( &self, - patches: &Patches, - source: &[u8], - ) -> Result<(Vec<u8>, Vec<bool>), crate::errors::Error> { + patches: &Patches<T>, + source: &[T], + ) -> Result<(Vec<T>, Vec<bool>), crate::errors::Error> { if patches.is_empty() { return Ok((source.to_vec(), vec![])); } @@ -2533,13 +2541,13 @@ impl DiffMatchPatch { Ok((source, results)) } - fn patch_add_padding(&self, patches: &mut Patches) -> Vec<u8> { - let pad_len = self.patch_margin() as usize; - - let null_pad = (1..pad_len + 1) - .filter_map(|c| char::from_u32(c as u32).map(|c_| c_ as u8)) + fn patch_add_padding<T: DType>(&self, patches: &mut Patches<T>) -> Vec<T> { + let null_pad = (1..self.patch_margin() + 1) + .filter_map(|c| c.as_char().map(|c| T::from_char(c))) .collect::<Vec<_>>(); + let pad_len = self.patch_margin() as usize; + // Bump all the patches forward. patches.iter_mut().for_each(|p| { p.start1 += pad_len; @@ -2607,14 +2615,14 @@ impl DiffMatchPatch { /// Create a new instance of the struct with default settings /// # Example /// ``` - /// use diff_match_patch_rs::{DiffMatchPatch, Error}; + /// use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient}; /// /// # fn main() -> Result<(), Error> { /// let mut dmp = DiffMatchPatch::new(); /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines /// dmp.set_checklines(false); /// // do the diffing - /// let diffs = dmp.diff_main("Fast enough", "Blazing fast")?; + /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?; /// # Ok(()) /// # } /// ``` @@ -2627,14 +2635,14 @@ impl DiffMatchPatch { /// Vec of changes (Diff). /// # Example /// ``` - /// use diff_match_patch_rs::{DiffMatchPatch, Error}; + /// use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient}; /// /// # fn main() -> Result<(), Error> { /// let mut dmp = DiffMatchPatch::new(); /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines /// dmp.set_checklines(false); /// // do the diffing - /// let diffs = dmp.diff_main("Fast enough", "Blazing fast")?; + /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?; /// println!("{}", diffs.iter().map(|d| format!("d")).collect::<Vec<_>>().join("\n")); /// // You should see the following output /// // (Delete, F) @@ -2644,13 +2652,15 @@ impl DiffMatchPatch { /// # Ok(()) /// # } /// ``` - pub fn diff_main(&self, old: &str, new: &str) -> Result<Vec<Diff<u8>>, crate::errors::Error> { - self.diff_internal( - old.as_bytes(), - new.as_bytes(), - self.checklines(), - self.deadline(), - ) + pub fn diff_main<T: DType>( + &self, + old: &str, + new: &str, + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { + let old = T::from_str(old); + let new = T::from_str(new); + + self.diff_internal(&old, &new, self.checklines(), self.deadline()) } /// A diff of two unrelated texts can be filled with coincidental matches. @@ -2674,7 +2684,7 @@ impl DiffMatchPatch { /// Given a diff, measure its Levenshtein distance in terms of the number of inserted, deleted or substituted characters. /// The minimum distance is 0 which means equality, the maximum distance is the length of the longer string. - pub fn diff_levenshtein(&self, diffs: &[Diff<u8>]) -> usize { + pub fn diff_levenshtein<T: DType>(&self, diffs: &[Diff<T>]) -> usize { let mut levenshtein = 0; let mut insert = 0; let mut delete = 0; @@ -2853,46 +2863,53 @@ impl DiffMatchPatch { /// Given two texts, or an already computed list of differences, return an array of patch objects. /// The third form PatchInput::TextDiffs(...) is preferred, use it if you happen to have that data available, otherwise this function will compute the missing pieces. /// TODO: add example - pub fn patch_make(&self, input: PatchInput) -> Result<Patches, crate::errors::Error> { + pub fn patch_make<T: DType>( + &self, + input: PatchInput<T>, + ) -> Result<Patches<T>, crate::errors::Error> { let mut diff_input; let txt_old; let (txt, diffs) = match input { // No diffs provided, lets make our own PatchInput::Texts(txt1, txt2) => { - let dmp = DiffMatchPatch::default(); - diff_input = dmp.diff_main(txt1, txt2)?; + diff_input = self.diff_main(txt1, txt2)?; if diff_input.len() > 2 { Self::cleanup_semantic(&mut diff_input); } - (txt1.as_bytes(), &diff_input[..]) + (T::from_str(txt1), &diff_input[..]) } PatchInput::Diffs(diffs) => { // No origin string provided, compute our own. - txt_old = Self::diff_text_old(diffs); - (&txt_old[..], diffs) + (Self::diff_text_old(diffs), diffs) + } + PatchInput::TextDiffs(txt, diffs) => { + txt_old = T::from_str(txt); + (txt_old, diffs) } - PatchInput::TextDiffs(txt, diffs) => (txt.as_bytes(), diffs), }; - self.patch_make_internal(txt, diffs) + self.patch_make_internal(&txt, diffs) } /// Reduces an array of patch objects to a block of text which looks extremely similar to the standard GNU diff/patch format. This text may be stored or transmitted. /// TODO: add example - pub fn patch_to_text(&self, patches: &Patches) -> String { + pub fn patch_to_text<T: DType>(&self, patches: &Patches<T>) -> String { patches.iter().map(|p| p.to_string()).collect::<String>() } /// Parses a block of text (which was presumably created by the patch_toText function) and returns an array of patch objects. /// TODO: add example - pub fn patch_from_text(&self, text: &str) -> Result<Patches, Error> { + pub fn patch_from_text<T: DType>(&self, text: &str) -> Result<Patches<T>, Error> { if text.is_empty() { return Ok(vec![]); } - let mut text = text.as_bytes().split(|&p| p == b'\n').collect::<Vec<_>>(); + let txt_t = T::from_str(text); + let mut text = txt_t + .split(|&p| p == T::from_char('\n')) + .collect::<Vec<_>>(); let mut patches = vec![]; @@ -2946,27 +2963,21 @@ impl DiffMatchPatch { }; // Should never panic, already checked for `empty` - let sign = txt.first().unwrap(); - - let line = percent_decode(&txt[1..]).collect::<Vec<_>>(); - - match sign { - b'-' => { - patch.diffs.push(Diff::delete(&line)); - } - b'+' => { - patch.diffs.push(Diff::insert(&line)); - } - b' ' => { - patch.diffs.push(Diff::equal(&line)); - } - b'@' => { - // next patch, break - break; - } - _ => { - return Err(Error::InvalidInput); - } + let &sign = txt.first().unwrap(); + + let line = T::percent_decode(&txt[1..]); + + if sign == T::from_char('-') { + patch.diffs.push(Diff::delete(&line)); + } else if sign == T::from_char('+') { + patch.diffs.push(Diff::insert(&line)); + } else if sign == T::from_char(' ') { + patch.diffs.push(Diff::equal(&line)); + } else if sign == T::from_char('@') { + // next patch, break + break; + } else { + return Err(Error::InvalidInput); } text.remove(0); @@ -2989,15 +3000,15 @@ impl DiffMatchPatch { /// If patch_delete_threshold is closer to 1, then the deleted text may contain anything. /// In most use cases Patch_DeleteThreshold should just be set to the same value as match_threshold. /// TODO: add example - pub fn patch_apply( + pub fn patch_apply<T: DType>( &self, - patches: &Patches, + patches: &Patches<T>, source_txt: &str, ) -> Result<(String, Vec<bool>), crate::errors::Error> { - let (str_bytes, results) = self.patch_apply_internal(patches, source_txt.as_bytes())?; + let (str_data, results) = self.patch_apply_internal(patches, &T::from_str(source_txt))?; Ok(( - String::from_utf8(str_bytes).map_err(|_| crate::errors::Error::Utf8Error)?, + T::to_string(&str_data).map_err(|_| crate::errors::Error::Utf8Error)?, results, )) } @@ -3009,7 +3020,7 @@ mod tests { use crate::{ dmp::{Diff, HalfMatch, LineToChars}, - DiffMatchPatch, Error, Patch, PatchInput, + DiffMatchPatch, Efficient, Error, Patch, PatchInput, }; #[test] @@ -3757,8 +3768,9 @@ mod tests { fn test_patch_add_padding() -> Result<(), Error> { let dmp = DiffMatchPatch::default(); // Both edges full. - let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?; + let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?; assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches)); + dmp.patch_add_padding(&mut patches); assert_eq!( "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", @@ -3766,7 +3778,7 @@ mod tests { ); // Both edges partial. - let mut patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?; + let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("XY", "XtestY"))?; assert_eq!( "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_to_text(&patches) @@ -3778,7 +3790,8 @@ mod tests { ); // Both edges none. - let mut patches = dmp.patch_make(PatchInput::Texts("XXXXYYYY", "XXXXtestYYYY"))?; + let mut patches = + dmp.patch_make(PatchInput::Texts::<Efficient>("XXXXYYYY", "XXXXtestYYYY"))?; assert_eq!( "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_to_text(&patches) @@ -3799,7 +3812,7 @@ mod tests { let dmp = DiffMatchPatch::default(); // Assumes that dmp.Match_MaxBits is 32. - let mut patches = dmp.patch_make(PatchInput::Texts( + let mut patches = dmp.patch_make(PatchInput::Texts::<char>( "abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0", ))?; @@ -3809,7 +3822,7 @@ mod tests { dmp.patch_to_text(&patches) ); - let mut patches = dmp.patch_make(PatchInput::Texts( + let mut patches = dmp.patch_make(PatchInput::Texts::<char>( "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz", ))?; @@ -3817,7 +3830,7 @@ mod tests { dmp.split_max(&mut patches); assert_eq!(p2t, dmp.patch_to_text(&patches)); - let mut patches = dmp.patch_make(PatchInput::Texts( + let mut patches = dmp.patch_make(PatchInput::Texts::<u8>( "1234567890123456789012345678901234567890123456789012345678901234567890", "abc", ))?; @@ -3827,7 +3840,7 @@ mod tests { dmp.patch_to_text(&patches) ); - let mut patches = dmp.patch_make(PatchInput::Texts( + let mut patches = dmp.patch_make(PatchInput::Texts::<char>( "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1", ))?; @@ -1,6 +1,10 @@ +#![feature(trait_alias, let_chains)] + pub mod dmp; pub mod errors; pub mod traits; pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches}; pub use errors::Error; +pub(crate) use traits::DType; +pub use traits::{Compat, Efficient}; diff --git a/src/traits.rs b/src/traits.rs index 8a5c9bb..3818837 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,8 +1,30 @@ +use std::hash::Hash; + use chrono::NaiveTime; +use percent_encoding::{percent_decode, AsciiSet, CONTROLS}; use crate::dmp::{Diff, DiffMatchPatch}; -pub trait BisectSplit: Copy + Ord + Eq { +pub type Efficient = u8; +pub type Compat = char; + +// Appending controls to ensure exact same encoding as cpp variant +const ENCODE_SET: &AsciiSet = &CONTROLS + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`') + .add(b'{') + .add(b'}') + .add(b'%') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'|'); + +pub trait DType: Copy + Ord + Eq + Hash { + // fn differ(dmp: &DiffMatchPatch, txt_old: &str, txt_new: &str) -> Result<Vec<Diff<Self>>, crate::errors::Error>; fn bisect_split( dmp: &DiffMatchPatch, old: &[Self], @@ -11,9 +33,20 @@ pub trait BisectSplit: Copy + Ord + Eq { y: usize, deadline: Option<NaiveTime>, ) -> Result<Vec<Diff<Self>>, crate::errors::Error>; + + fn from_char(c: char) -> Self; + fn as_char(&self) -> Option<char>; + fn from_str(str: &str) -> Vec<Self>; + fn to_string(data: &[Self]) -> Result<String, crate::Error>; + + fn is_linebreak_end(input: &[Self]) -> bool; + fn is_linebreak_start(input: &[Self]) -> bool; + + fn percent_encode(input: &[Self]) -> Vec<Self>; + fn percent_decode(input: &[Self]) -> Vec<Self>; } -impl BisectSplit for u8 { +impl DType for u8 { fn bisect_split( dmp: &DiffMatchPatch, old: &[u8], @@ -34,9 +67,135 @@ impl BisectSplit for u8 { Ok(diffs_a) } + + fn from_char(c: char) -> Self { + c as u8 + } + + fn as_char(&self) -> Option<char> { + Some(*self as char) + } + + fn from_str(str: &str) -> Vec<Self> { + str.as_bytes().to_vec() + } + + #[inline] + fn to_string(data: &[Self]) -> Result<String, crate::Error> { + std::str::from_utf8(data) + .map_err(|_| crate::Error::Utf8Error) + .map(|s| s.to_string()) + } + + #[inline] + fn is_linebreak_end(input: &[Self]) -> bool { + input.ends_with(b"\n\n") || input.ends_with(b"\n\r\n") + } + + #[inline] + fn is_linebreak_start(input: &[Self]) -> bool { + input.starts_with(b"\r\n\n") + || input.starts_with(b"\r\n\r\n") + || input.starts_with(b"\n\r\n") + || input.starts_with(b"\n\n") + } + + #[inline] + fn percent_encode(input: &[Self]) -> Vec<Self> { + percent_encoding::percent_encode(input, ENCODE_SET) + .collect::<String>() + .as_bytes() + .to_vec() + } + + #[inline] + fn percent_decode(input: &[Self]) -> Vec<Self> { + percent_decode(input).collect() + } +} + +impl DType for char { + fn bisect_split( + dmp: &DiffMatchPatch, + old: &[char], + new: &[char], + x: usize, + y: usize, + deadline: Option<NaiveTime>, + ) -> Result<Vec<Diff<char>>, crate::errors::Error> { + let old_a = &old[..x]; + let new_a = &new[..y]; + + let old_b = &old[x..]; + let new_b = &new[y..]; + + // Compute both diffs serially. + let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?; + diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?); + + Ok(diffs_a) + } + + fn from_char(c: char) -> Self { + c + } + + fn as_char(&self) -> Option<char> { + Some(*self) + } + + fn from_str(str: &str) -> Vec<Self> { + str.chars().collect::<Vec<_>>() + } + + #[inline] + fn to_string(data: &[Self]) -> Result<String, crate::Error> { + Ok(data.iter().collect::<String>()) + } + + #[inline] + fn is_linebreak_end(input: &[Self]) -> bool { + input.ends_with(&['\n', '\n']) || input.ends_with(&['\n', '\r', '\n']) + } + + #[inline] + fn is_linebreak_start(input: &[Self]) -> bool { + input.starts_with(&['\r', '\n', '\n']) + || input.starts_with(&['\r', '\n', '\r', '\n']) + || input.starts_with(&['\n', '\r', '\n']) + || input.starts_with(&['\n', '\n']) + } + + #[inline] + fn percent_encode(input: &[Self]) -> Vec<Self> { + let d = input + .iter() + .map(|c| { + let mut b = vec![0; c.len_utf8()]; + c.encode_utf8(&mut b); + + b + }) + .collect::<Vec<_>>() + .concat(); + + let encoded = percent_encoding::percent_encode(&d[..], ENCODE_SET).collect::<String>(); + + Self::from_str(&encoded) + } + + #[inline] + fn percent_decode(input: &[Self]) -> Vec<Self> { + let ip = input.iter().collect::<String>(); + percent_decode(ip.as_bytes()) + .decode_utf8() + .unwrap() + .chars() + .collect() + } } -impl BisectSplit for usize { +impl DType for usize { fn bisect_split( dmp: &DiffMatchPatch, old: &[usize], @@ -57,4 +216,39 @@ impl BisectSplit for usize { Ok(diffs_a) } + + fn from_char(c: char) -> Self { + (c as u8) as usize + } + + fn as_char(&self) -> Option<char> { + char::from_digit(*self as u32, 10) + } + + fn from_str(_: &str) -> Vec<Self> { + unimplemented!() + } + + fn to_string(_: &[Self]) -> Result<String, crate::Error> { + unimplemented!() + } + + fn is_linebreak_end(_: &[Self]) -> bool { + unimplemented!() + } + + #[inline] + fn is_linebreak_start(_: &[Self]) -> bool { + unimplemented!() + } + + #[inline] + fn percent_encode(_: &[Self]) -> Vec<Self> { + unimplemented!() + } + + #[inline] + fn percent_decode(_: &[Self]) -> Vec<Self> { + unimplemented!() + } } diff --git a/tests/test.rs b/tests/test.rs index fd00a4e..bc8262b 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -4,7 +4,7 @@ use chrono::Utc; use diff_match_patch_rs::dmp::Diff; -use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, PatchInput}; +use diff_match_patch_rs::{Compat, DiffMatchPatch, Efficient, Error, Ops, PatchInput}; // const tests = [ // 'testDiffIsDestructurable', @@ -85,7 +85,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Which means the the diff should an equality block of 3 bytes folloed by insert and delete let old = "π€ͺ"; // [240, 159, 164, 170] let new = "π€"; // [240, 159, 164, 148] - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span></span><del style=\"background:#ffe6e6;\">π€ͺ</del><ins style=\"background:#e6ffe6;\">π€</ins>", dmp.diff_pretty_html(&diffs)? @@ -94,7 +94,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Now Case 1. but with some text before and after let old = "I'm puzzledπ€ͺ or am I?"; let new = "I'm puzzledπ€ or thinking I guess!"; - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span>I'm puzzled</span><del style=\"background:#ffe6e6;\">π€ͺ</del><ins style=\"background:#e6ffe6;\">π€</ins><span> or </span><del style=\"background:#ffe6e6;\">am I?</del><ins style=\"background:#e6ffe6;\">thinking I guess!</ins>", dmp.diff_pretty_html(&diffs)? @@ -103,7 +103,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Case 2. Emoticons with the third position different let old = "π"; // [240, 159, 141, 138] let new = "π"; // [240, 159, 140, 138] - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span></span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins>", dmp.diff_pretty_html(&diffs)? @@ -112,7 +112,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Now Case 2. but with some text, lets complicate this let old = "π, aah orange is the new black!"; // [240, 159, 141, 138] let new = "Aah orange!πis the new π"; // [240, 159, 140, 138] - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<del style=\"background:#ffe6e6;\">π, a</del><ins style=\"background:#e6ffe6;\">A</ins><span>ah orange</span><del style=\"background:#ffe6e6;\"> </del><ins style=\"background:#e6ffe6;\">!π</ins><span>is the new </span><del style=\"background:#ffe6e6;\">black!</del><ins style=\"background:#e6ffe6;\">π</ins>", dmp.diff_pretty_html(&diffs)? @@ -121,7 +121,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Case 3. with second and third different, but lets complicate this with an equality let old = "π "; // [240, 160, 140, 138] let new = "π "; // [240, 150, 160, 138] - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span></span><ins style=\"background:#e6ffe6;\">π </ins><del style=\"background:#ffe6e6;\">π </del>", dmp.diff_pretty_html(&diffs)? @@ -130,7 +130,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Case 3. but let there be a swap let old = "π "; // [240, 158, 160, 132] let new = std::str::from_utf8(&[240, 160, 158, 132]).unwrap(); // basically an undefined element `π `. Should still work - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span></span><del style=\"background:#ffe6e6;\">π </del><ins style=\"background:#e6ffe6;\">π </ins>", dmp.diff_pretty_html(&diffs)? @@ -139,7 +139,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Case 4. swap at the last 2 positions let old = "π"; // [240, 159, 141, 140] -- FINALLY A BANANA let new = "π"; // [240, 159, 140, 141] -- interesting revelation - last 2 bytes swapped and π becomes π. Guess the world is going `Bananas!!` - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<span></span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins>", dmp.diff_pretty_html(&diffs)? @@ -148,7 +148,7 @@ fn test_diff_pretty_html() -> Result<(), Error> { // Let's do this with a slightly longish string let old = "Now, let's explore some emotional extremes π.\nWe've got your ecstatic face π€©, your devastated face π, and your utterly confused face π€―. But that's not all! π€ We've also got some subtle emotions like π, π, and π."; let new = "Let's start with some basics π.\nWe've got your standard smiley face π, your sad face βΉοΈ, and your angry face π . But wait, there's more! π€© We've also got some more complex emotions like π, π€€, and π. And let's not forget about the classics: π, π, and π."; - let diffs = dmp.diff_main(old, new)?; + let diffs = dmp.diff_main::<Efficient>(old, new)?; assert_eq!( "<del style=\"background:#ffe6e6;\">Now, let's explore some emotional extreme</del><ins style=\"background:#e6ffe6;\">Let's start with some basic</ins><span>s </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>.¶<br>We've got your </span><del style=\"background:#ffe6e6;\">ec</del><span>sta</span><del style=\"background:#ffe6e6;\">tic</del><ins style=\"background:#e6ffe6;\">ndard smiley</ins><span> face </span><del style=\"background:#ffe6e6;\">π€©</del><ins style=\"background:#e6ffe6;\">π</ins><span>, your </span><del style=\"background:#ffe6e6;\">devastate</del><ins style=\"background:#e6ffe6;\">sa</ins><span>d face </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">βΉοΈ</ins><span>, and your </span><del style=\"background:#ffe6e6;\">utterly confused</del><ins style=\"background:#e6ffe6;\">angry</ins><span> face </span><del style=\"background:#ffe6e6;\">π€―</del><ins style=\"background:#e6ffe6;\">π </ins><span>. But </span><del style=\"background:#ffe6e6;\">that's not all</del><ins style=\"background:#e6ffe6;\">wait, there's more</ins><span>! </span><del style=\"background:#ffe6e6;\">π€</del><ins style=\"background:#e6ffe6;\">π€©</ins><span> We've also got some </span><del style=\"background:#ffe6e6;\">subt</del><ins style=\"background:#e6ffe6;\">more comp</ins><span>le</span><ins style=\"background:#e6ffe6;\">x</ins><span> emotions like </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π, π€€, and π. And let's not forget about the classics: π</ins><span>, </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>, and </span><del style=\"background:#ffe6e6;\">π</del><ins style=\"background:#e6ffe6;\">π</ins><span>.</span>", @@ -164,21 +164,24 @@ fn test_diff_main() -> Result<(), Error> { // Perform a trivial diff. // Null case. - assert!(dmp.diff_main("", "")?.is_empty()); + assert!(dmp.diff_main::<Efficient>("", "")?.is_empty()); // Equality - assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?); + assert_eq!( + vec![Diff::equal(b"abc")], + dmp.diff_main::<Efficient>("abc", "abc")? + ); // Simple insert assert_eq!( vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")], - dmp.diff_main("abc", "ab123c")? + dmp.diff_main::<Efficient>("abc", "ab123c")? ); // Simple delete assert_eq!( vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")], - dmp.diff_main("a123bc", "abc")? + dmp.diff_main::<Efficient>("a123bc", "abc")? ); // Two insertions @@ -190,7 +193,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::insert(b"456"), Diff::equal(b"c"), ], - dmp.diff_main("abc", "a123b456c")? + dmp.diff_main::<Efficient>("abc", "a123b456c")? ); // Two deletions. @@ -202,7 +205,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::delete(b"456"), Diff::equal(b"c"), ], - dmp.diff_main("a123b456c", "abc")? + dmp.diff_main::<Efficient>("a123b456c", "abc")? ); // Perform a real diff. @@ -211,7 +214,7 @@ fn test_diff_main() -> Result<(), Error> { // Simple cases. assert_eq!( vec![Diff::delete(b"a"), Diff::insert(b"b"),], - dmp.diff_main("a", "b")? + dmp.diff_main::<Efficient>("a", "b")? ); assert_eq!( @@ -222,7 +225,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::insert(b"lso"), Diff::equal(b" fruit.") ], - dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")? + dmp.diff_main::<Efficient>("Apples are a fruit.", "Bananas are also fruit.")? ); assert_eq!( @@ -233,7 +236,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::delete(b"\t"), Diff::insert(b"\0") ], - dmp.diff_main("ax\t", "\u{0680}x\0")? + dmp.diff_main::<Efficient>("ax\t", "\u{0680}x\0")? ); // Overlaps. @@ -246,7 +249,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::delete(b"2"), Diff::insert(b"xab"), ], - dmp.diff_main("1ayb2", "abxab")? + dmp.diff_main::<Efficient>("1ayb2", "abxab")? ); assert_eq!( @@ -255,7 +258,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::equal(b"abc"), Diff::delete(b"y"), ], - dmp.diff_main("abcy", "xaxcxabc")? + dmp.diff_main::<Efficient>("abcy", "xaxcxabc")? ); assert_eq!( @@ -270,7 +273,7 @@ fn test_diff_main() -> Result<(), Error> { Diff::equal(b"efghijklmnopqrs"), Diff::delete(b"EFGHIJKLMNOefg"), ], - dmp.diff_main( + dmp.diff_main::<Efficient>( "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs" )? @@ -285,7 +288,219 @@ fn test_diff_main() -> Result<(), Error> { Diff::equal(b" [[Hepatopancreatic]]"), Diff::delete(b" and [[New"), ], - dmp.diff_main( + dmp.diff_main::<Efficient>( + "a [[Hepatopancreatic]] and [[New", + " and [[Hepatopancreatic]]" + )? + ); + + // Timeout. + const LOW_TIMEOUT: u32 = 100; + dmp.set_timeout(Some(LOW_TIMEOUT)); + let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); + let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); + + let start = Utc::now().time(); + dmp.diff_main::<Efficient>(&a, &b)?; + let end = Utc::now().time(); + // Test that we took at least the timeout period (+ 5ms being generous). + assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + // Simple line-mode. + dmp.set_timeout(Some(1000)); + let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + dmp.set_checklines(false); + let res_no_lm = dmp.diff_main::<Efficient>(a, b)?; + dmp.set_checklines(true); + let res_yes_lm = dmp.diff_main::<Efficient>(a, b)?; + + // Now, we'll run 2 checks - one for result equality + assert_eq!(res_no_lm, res_yes_lm); + + // Single line-mode. + let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + dmp.set_checklines(false); + let yes_lm = dmp.diff_main::<Efficient>(a, b)?; + dmp.set_checklines(true); + let no_lm = dmp.diff_main::<Efficient>(a, b)?; + assert_eq!(no_lm, yes_lm); + + // Overlap line-mode. + let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + dmp.set_checklines(false); + let no_lm = dmp.diff_main::<Efficient>(a, b)?; + dmp.set_checklines(true); + let yes_lm = dmp.diff_main::<Efficient>(a, b)?; + assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); + + // Benefits of checklines can only be realized in text with many lines + let mut dmp = DiffMatchPatch::default(); + let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap(); + let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap(); + + let start = Instant::now(); + let diff_yes_lm = dmp.diff_main::<Efficient>(&old, &new); + let yes_lm_dur = Instant::now() - start; + assert!(diff_yes_lm.is_ok()); + + dmp.set_checklines(false); + let start = Instant::now(); + let diff_no_lm = dmp.diff_main::<Efficient>(&old, &new); + let no_lm_dur = Instant::now() - start; + assert!(diff_no_lm.is_ok()); + + assert!(no_lm_dur > yes_lm_dur); + + Ok(()) +} + +#[test] +fn test_diff_main_compat() -> Result<(), Error> { + let mut dmp = DiffMatchPatch::default(); + + // Perform a trivial diff. + // Null case. + assert!(dmp.diff_main::<Compat>("", "")?.is_empty()); + + // Equality + assert_eq!( + vec![Diff::equal(&"abc".chars().collect::<Vec<_>>()[..])], + dmp.diff_main::<Compat>("abc", "abc")? + ); + + // Simple insert + assert_eq!( + vec![ + Diff::equal(&"ab".chars().collect::<Vec<_>>()[..]), + Diff::insert(&"123".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['c']) + ], + dmp.diff_main::<Compat>("abc", "ab123c")? + ); + + // Simple delete + assert_eq!( + vec![ + Diff::equal(&['a']), + Diff::delete(&"123".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['b', 'c']) + ], + dmp.diff_main::<Compat>("a123bc", "abc")? + ); + + // Two insertions + assert_eq!( + vec![ + Diff::equal(&['a']), + Diff::insert(&"123".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['b']), + Diff::insert(&['4', '5', '6']), + Diff::equal(&['c']), + ], + dmp.diff_main::<Compat>("abc", "a123b456c")? + ); + + // Two deletions. + assert_eq!( + vec![ + Diff::equal(&['a']), + Diff::delete(&"123".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['b']), + Diff::delete(&"456".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['c']), + ], + dmp.diff_main::<Compat>("a123b456c", "abc")? + ); + + // Perform a real diff. + // Switch off the timeout. + dmp.set_timeout(None); + // Simple cases. + assert_eq!( + vec![Diff::delete(&['a']), Diff::insert(&['b']),], + dmp.diff_main::<Compat>("a", "b")? + ); + + assert_eq!( + vec![ + Diff::delete(&"Apple".chars().collect::<Vec<_>>()[..]), + Diff::insert(&"Banana".chars().collect::<Vec<_>>()[..]), + Diff::equal(&"s are a".chars().collect::<Vec<_>>()[..]), + Diff::insert(&"lso".chars().collect::<Vec<_>>()[..]), + Diff::equal(&" fruit.".chars().collect::<Vec<_>>()[..]) + ], + dmp.diff_main::<Compat>("Apples are a fruit.", "Bananas are also fruit.")? + ); + + assert_eq!( + vec![ + Diff::delete(&['a']), + Diff::insert(&"\u{0680}".chars().collect::<Vec<_>>()[..]), + Diff::equal(&['x']), + Diff::delete(&['\t']), + Diff::insert(&['\0']) + ], + dmp.diff_main::<Compat>("ax\t", "\u{0680}x\0")? + ); + + // Overlaps. + assert_eq!( + vec![ + Diff::delete(&['1']), + Diff::equal(&['a']), + Diff::delete(&['y']), + Diff::equal(&['b']), + Diff::delete(&['2']), + Diff::insert(&"xab".chars().collect::<Vec<_>>()[..]), + ], + dmp.diff_main::<Compat>("1ayb2", "abxab")? + ); + + assert_eq!( + vec![ + Diff::insert(&"xaxcx".chars().collect::<Vec<_>>()[..]), + Diff::equal(&"abc".chars().collect::<Vec<_>>()[..]), + Diff::delete(&['y']), + ], + dmp.diff_main::<Compat>("abcy", "xaxcxabc")? + ); + + assert_eq!( + vec![ + Diff::delete(&"ABCD".chars().collect::<Vec<_>>()[..]), + Diff::equal(&"a".chars().collect::<Vec<_>>()[..]), + Diff::delete(&"=".chars().collect::<Vec<_>>()[..]), + Diff::insert(&"-".chars().collect::<Vec<_>>()[..]), + Diff::equal(&"bcd".chars().collect::<Vec<_>>()[..]), + Diff::delete(&"=".chars().collect::<Vec<_>>()[..]), + Diff::insert(&"-".chars().collect::<Vec<_>>()[..]), + Diff::equal(&"efghijklmnopqrs".chars().collect::<Vec<_>>()[..]), + Diff::delete(&"EFGHIJKLMNOefg".chars().collect::<Vec<_>>()[..]), + ], + dmp.diff_main::<Compat>( + "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", + "a-bcd-efghijklmnopqrs" + )? + ); + + // Large equality. + assert_eq!( + vec![ + Diff::insert(&[' ']), + Diff::equal(&['a']), + Diff::insert(&['n', 'd']), + Diff::equal(&[ + ' ', '[', '[', 'H', 'e', 'p', 'a', 't', 'o', 'p', 'a', 'n', 'c', 'r', 'e', 'a', + 't', 'i', 'c', ']', ']' + ]), + Diff::delete(&" and [[New".chars().collect::<Vec<_>>()[..]), + ], + dmp.diff_main::<Compat>( "a [[Hepatopancreatic]] and [[New", " and [[Hepatopancreatic]]" )? @@ -298,7 +513,7 @@ fn test_diff_main() -> Result<(), Error> { let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); let start = Utc::now().time(); - dmp.diff_main(&a, &b)?; + dmp.diff_main::<Efficient>(&a, &b)?; let end = Utc::now().time(); // Test that we took at least the timeout period (+ 5ms being generous). assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); @@ -310,9 +525,9 @@ fn test_diff_main() -> Result<(), Error> { let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; dmp.set_checklines(false); - let res_no_lm = dmp.diff_main(a, b)?; + let res_no_lm = dmp.diff_main::<Efficient>(a, b)?; dmp.set_checklines(true); - let res_yes_lm = dmp.diff_main(a, b)?; + let res_yes_lm = dmp.diff_main::<Efficient>(a, b)?; // Now, we'll run 2 checks - one for result equality assert_eq!(res_no_lm, res_yes_lm); @@ -321,18 +536,18 @@ fn test_diff_main() -> Result<(), Error> { let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; dmp.set_checklines(false); - let yes_lm = dmp.diff_main(a, b)?; + let yes_lm = dmp.diff_main::<Efficient>(a, b)?; dmp.set_checklines(true); - let no_lm = dmp.diff_main(a, b)?; + let no_lm = dmp.diff_main::<Efficient>(a, b)?; assert_eq!(no_lm, yes_lm); // Overlap line-mode. let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; dmp.set_checklines(false); - let no_lm = dmp.diff_main(a, b)?; + let no_lm = dmp.diff_main::<Efficient>(a, b)?; dmp.set_checklines(true); - let yes_lm = dmp.diff_main(a, b)?; + let yes_lm = dmp.diff_main::<Efficient>(a, b)?; assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?); // Benefits of checklines can only be realized in text with many lines @@ -341,13 +556,13 @@ fn test_diff_main() -> Result<(), Error> { let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap(); let start = Instant::now(); - let diff_yes_lm = dmp.diff_main(&old, &new); + let diff_yes_lm = dmp.diff_main::<Efficient>(&old, &new); let yes_lm_dur = Instant::now() - start; assert!(diff_yes_lm.is_ok()); dmp.set_checklines(false); let start = Instant::now(); - let diff_no_lm = dmp.diff_main(&old, &new); + let diff_no_lm = dmp.diff_main::<Efficient>(&old, &new); let no_lm_dur = Instant::now() - start; assert!(diff_no_lm.is_ok()); @@ -442,28 +657,51 @@ fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), Error> { fn test_patch_from_text() -> Result<(), Error> { let dmp = DiffMatchPatch::new(); - assert!(dmp.patch_from_text("")?.is_empty()); + assert!(dmp.patch_from_text::<Efficient>("")?.is_empty()); let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assert_eq!(strp, dmp.patch_from_text(strp)?[0].to_string()); + assert_eq!(strp, dmp.patch_from_text::<Efficient>(strp)?[0].to_string()); assert_eq!( "@@ -1 +1 @@\n-a\n+b\n", - dmp.patch_from_text("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string() + dmp.patch_from_text::<Efficient>("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string() ); assert_eq!( "@@ -1,3 +0,0 @@\n-abc\n", - dmp.patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string() + dmp.patch_from_text::<Efficient>("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string() ); assert_eq!( "@@ -0,0 +1,3 @@\n+abc\n", - dmp.patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string() + dmp.patch_from_text::<Efficient>("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string() ); // Generates error. - assert!(dmp.patch_from_text("Bad\nPatch\n").is_err()); + assert!(dmp.patch_from_text::<Efficient>("Bad\nPatch\n").is_err()); + + assert!(dmp.patch_from_text::<Compat>("")?.is_empty()); + + let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assert_eq!(strp, dmp.patch_from_text::<Compat>(strp)?[0].to_string()); + + assert_eq!( + "@@ -1 +1 @@\n-a\n+b\n", + dmp.patch_from_text::<Compat>("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string() + ); + + assert_eq!( + "@@ -1,3 +0,0 @@\n-abc\n", + dmp.patch_from_text::<Compat>("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string() + ); + + assert_eq!( + "@@ -0,0 +1,3 @@\n+abc\n", + dmp.patch_from_text::<Compat>("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string() + ); + + // Generates error. + assert!(dmp.patch_from_text::<Compat>("Bad\nPatch\n").is_err()); Ok(()) } @@ -473,11 +711,19 @@ fn test_patch_to_text() -> Result<(), Error> { let dmp = DiffMatchPatch::new(); let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - let patches = dmp.patch_from_text(strp)?; + let patches = dmp.patch_from_text::<Efficient>(strp)?; + assert_eq!(strp, dmp.patch_to_text(&patches)); + + let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + let patches = dmp.patch_from_text::<Efficient>(strp)?; + assert_eq!(strp, dmp.patch_to_text(&patches)); + + let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + let patches = dmp.patch_from_text::<Compat>(strp)?; assert_eq!(strp, dmp.patch_to_text(&patches)); let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - let patches = dmp.patch_from_text(strp)?; + let patches = dmp.patch_from_text::<Compat>(strp)?; assert_eq!(strp, dmp.patch_to_text(&patches)); Ok(()) @@ -486,22 +732,75 @@ fn test_patch_to_text() -> Result<(), Error> { #[test] fn test_patch_make() -> Result<(), Error> { let dmp = DiffMatchPatch::default(); - let patches = dmp.patch_make(PatchInput::Texts("", ""))?; + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", ""))?; + assert!(patches.is_empty()); + + let txt1 = "The quick brown fox jumps over the lazy dog."; + let txt2 = "That quick brown fox jumped over a lazy dog."; + + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(txt2, txt1))?; + assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches)); + + // Text1+Text2 inputs. + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(txt1, txt2))?; + assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); + + // Diff input. + let diffs = dmp.diff_main::<Efficient>(txt1, txt2)?; + let patches = dmp.patch_make(PatchInput::Diffs(&diffs[..]))?; + assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); + + // Text1+Diff inputs. + let patches = dmp.patch_make(PatchInput::TextDiffs(txt1, &diffs[..]))?; + assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); + + // Character encoding. + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "`1234567890-=[]\\;',./", + "~!@#$%^&*()_+{}|:\"<>?", + ))?; + + assert_eq!( + "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + dmp.patch_to_text(&patches) + ); + + // Character decoding. + let diffs = vec![ + Diff::delete(b"`1234567890-=[]\\;',./"), + Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?"), + ]; + assert_eq!( + diffs, + dmp.patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")?[0].diffs() + ); + + // Long string with repeats. + let txt1 = vec!["abcdef"; 100].join(""); + let txt2 = [&txt1, "123"].join(""); + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(&txt1, &txt2))?; + assert_eq!( + "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n", + dmp.patch_to_text(&patches) + ); + + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", ""))?; assert!(patches.is_empty()); let txt1 = "The quick brown fox jumps over the lazy dog."; let txt2 = "That quick brown fox jumped over a lazy dog."; // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - let patches = dmp.patch_make(PatchInput::Texts(txt2, txt1))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>(txt2, txt1))?; assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches)); // Text1+Text2 inputs. - let patches = dmp.patch_make(PatchInput::Texts(txt1, txt2))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>(txt1, txt2))?; assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); // Diff input. - let diffs = dmp.diff_main(txt1, txt2)?; + let diffs = dmp.diff_main::<Efficient>(txt1, txt2)?; let patches = dmp.patch_make(PatchInput::Diffs(&diffs[..]))?; assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); @@ -510,7 +809,7 @@ fn test_patch_make() -> Result<(), Error> { assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches)); // Character encoding. - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?", ))?; @@ -533,7 +832,7 @@ fn test_patch_make() -> Result<(), Error> { // Long string with repeats. let txt1 = vec!["abcdef"; 100].join(""); let txt2 = [&txt1, "123"].join(""); - let patches = dmp.patch_make(PatchInput::Texts(&txt1, &txt2))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>(&txt1, &txt2))?; assert_eq!( "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n", dmp.patch_to_text(&patches) @@ -568,11 +867,147 @@ fn test_diff_text() { fn test_patch_apply() -> Result<(), Error> { let mut dmp = DiffMatchPatch::default(); - let patches = dmp.patch_make(PatchInput::Texts("", ""))?; + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", ""))?; + let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?; + assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0"); + + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "The quick brown fox jumps over the lazy dog.", + "That quick brown fox jumped over a lazy dog.", + ))?; + + // Exact match + assert_eq!( + ( + "That quick brown fox jumped over a lazy dog.".to_string(), + vec![true, true] + ), + dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")? + ); + + // Partial match + assert_eq!( + ( + "That quick red rabbit jumped over a tired tiger.".to_string(), + vec![true, true] + ), + dmp.patch_apply(&patches, "The quick red rabbit jumps over the tired tiger.")? + ); + + // Failed match + assert_eq!( + ( + "I am the very model of a modern major general.".to_string(), + vec![false, false] + ), + dmp.patch_apply(&patches, "I am the very model of a modern major general.")? + ); + + // Big delete, small change + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "x1234567890123456789012345678901234567890123456789012345678901234567890y", + "xabcy", + ))?; + assert_eq!( + ("xabcy".to_string(), vec![true, true]), + dmp.patch_apply( + &patches, + "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" + )? + ); + + // Big delete, large change + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "x1234567890123456789012345678901234567890123456789012345678901234567890y", + "xabcy", + ))?; + assert_eq!( + ( + "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y" + .to_string(), + vec![false, true] + ), + dmp.patch_apply( + &patches, + "x12345678901234567890---------------++++++++++---------------12345678901234567890y" + )? + ); + + dmp.set_delete_threshold(0.6); + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "x1234567890123456789012345678901234567890123456789012345678901234567890y", + "xabcy", + ))?; + assert_eq!( + ("xabcy".to_string(), vec![true, true]), + dmp.patch_apply( + &patches, + "x12345678901234567890---------------++++++++++---------------12345678901234567890y" + )? + ); + dmp.set_delete_threshold(0.5); + + // Compesate for failed patch + dmp.set_match_threshold(0.); + dmp.set_match_distance(0); + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "abcdefghijklmnopqrstuvwxyz--------------------1234567890", + "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890", + ))?; + assert_eq!( + ( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_string(), + vec![false, true] + ), + dmp.patch_apply( + &patches, + "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" + )? + ); + + dmp.set_match_threshold(0.5); + dmp.set_match_distance(1000); + + // No side-effects - kinds useless cos patches is not mutable in rust + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?; + let srcstr = dmp.patch_to_text(&patches); + dmp.patch_apply(&patches, "")?; + assert_eq!(srcstr, dmp.patch_to_text(&patches)); + + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>( + "The quick brown fox jumps over the lazy dog.", + "Woof", + ))?; + let srcstr = dmp.patch_to_text(&patches); + dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?; + assert_eq!(srcstr, dmp.patch_to_text(&patches)); + + // Edge exact match + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?; + assert_eq!( + ("test".to_string(), vec![true]), + dmp.patch_apply(&patches, "")? + ); + + // Near edge exact match + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("XY", "XtestY"))?; + assert_eq!( + ("XtestY".to_string(), vec![true]), + dmp.patch_apply(&patches, "XY")? + ); + + // Edge partial match + let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("y", "y123"))?; + assert_eq!( + ("x123".to_string(), vec![true]), + dmp.patch_apply(&patches, "x")? + ); + + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", ""))?; let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?; assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0"); - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.", ))?; @@ -605,7 +1040,7 @@ fn test_patch_apply() -> Result<(), Error> { ); // Big delete, small change - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; @@ -618,7 +1053,7 @@ fn test_patch_apply() -> Result<(), Error> { ); // Big delete, large change - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; @@ -635,7 +1070,7 @@ fn test_patch_apply() -> Result<(), Error> { ); dmp.set_delete_threshold(0.6); - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; @@ -651,7 +1086,7 @@ fn test_patch_apply() -> Result<(), Error> { // Compesate for failed patch dmp.set_match_threshold(0.); dmp.set_match_distance(0); - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890", ))?; @@ -670,12 +1105,12 @@ fn test_patch_apply() -> Result<(), Error> { dmp.set_match_distance(1000); // No side-effects - kinds useless cos patches is not mutable in rust - let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", "test"))?; let srcstr = dmp.patch_to_text(&patches); dmp.patch_apply(&patches, "")?; assert_eq!(srcstr, dmp.patch_to_text(&patches)); - let patches = dmp.patch_make(PatchInput::Texts( + let patches = dmp.patch_make(PatchInput::Texts::<Compat>( "The quick brown fox jumps over the lazy dog.", "Woof", ))?; @@ -684,21 +1119,21 @@ fn test_patch_apply() -> Result<(), Error> { assert_eq!(srcstr, dmp.patch_to_text(&patches)); // Edge exact match - let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", "test"))?; assert_eq!( ("test".to_string(), vec![true]), dmp.patch_apply(&patches, "")? ); // Near edge exact match - let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("XY", "XtestY"))?; assert_eq!( ("XtestY".to_string(), vec![true]), dmp.patch_apply(&patches, "XY")? ); // Edge partial match - let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?; + let patches = dmp.patch_make(PatchInput::Texts::<Compat>("y", "y123"))?; assert_eq!( ("x123".to_string(), vec![true]), dmp.patch_apply(&patches, "x")? |