use std::{
char, collections::HashMap, fmt::Display, time::{Duration, Instant}
};
use percent_encoding::percent_decode;
use serde::{Deserialize, Serialize};
use serde_repr::{Deserialize_repr, Serialize_repr};
/// Enum representing the different ops of diff
#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize_repr, Deserialize_repr)]
#[repr(i8)]
pub enum Ops {
Delete = -1,
Insert,
Equal,
}
/// A structure representing a diff
/// (Ops::Delete, String::new("Hello")) means delete `Hello`
/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
/// (Ops::Equal, String::new("World")) means keep world
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Diff(
Ops,
Vec<u8>
);
impl Diff {
/// Create a new diff object
pub fn new(op: Ops, text: &[u8]) -> Self {
Self(op, text.to_vec())
}
/// helper functions to create ops
pub fn delete(text: &[u8]) -> Self {
Self::new(Ops::Delete, text)
}
pub fn insert(text: &[u8]) -> Self {
Self::new(Ops::Insert, text)
}
pub fn equal(text: &[u8]) -> Self {
Self::new(Ops::Equal, text)
}
}
pub struct DiffMatchPatch {
/// a speedup flag, If present and false, then don't run
/// a line-level diff first to identify the changed areas.
/// Defaults to true, which does a faster, slightly less optimal diff.
checklines: Option<bool>,
/// A default timeout in num milliseconda, defaults to 1000 (1 second)
timeout: Option<u64>,
// How far to search for a match (0 = exact location, 1000+ = broad match).
// A match this many characters away from the expected location will add
// 1.0 to the score (0.0 is a perfect match).
// int Match_Distance;
// When deleting a large block of text (over ~64 characters), how close does
// the contents have to match the expected contents. (0.0 = perfection,
// 1.0 = very loose). Note that Match_Threshold controls how closely the
// end points of a delete need to match.
// float Patch_DeleteThreshold;
// Chunk size for context length.
patch_margin: u16,
// The number of bits in an int.
match_max_bits: usize,
}
impl Default for DiffMatchPatch {
fn default() -> Self {
Self {
checklines: Some(true),
timeout: Some(1000),
patch_margin: 4,
match_max_bits: 32
}
}
}
#[derive(Debug, PartialEq, Eq)]
struct HalfMatch<'a> {
prefix_long: &'a [u8],
suffix_long: &'a [u8],
prefix_short: &'a [u8],
suffix_short: &'a [u8],
common: &'a [u8],
}
impl DiffMatchPatch {
fn checklines(&self) -> bool {
self.checklines.map_or(true, |c| c)
}
// returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout
fn timeout(&self) -> u64 {
self.timeout
.map_or(31536000_u64, |tout| if tout > 0 { tout } else { u64::MAX })
}
// returns the current patch margin
fn patch_margin(&self) -> u16 {
self.patch_margin
}
// returns the configured max_bits
fn match_max_bits(&self) -> usize {
self.match_max_bits
}
fn diff_internal<'a>(
&self,
old_bytes: &'a [u8],
new_bytes: &'a [u8],
linemode: bool,
deadline: Instant,
) -> Vec<Diff> {
// First, check if lhs and rhs are equal
if old_bytes == new_bytes {
if old_bytes.is_empty() {
return Vec::new();
}
return vec![Diff::equal(old_bytes)];
}
if old_bytes.is_empty() {
return vec![Diff::insert(new_bytes)];
}
if new_bytes.is_empty() {
return vec![Diff::delete(old_bytes)];
}
// Trim common prefix
let common_prefix = Self::common_prefix(old_bytes, new_bytes, false);
let common_suffix = Self::common_prefix(
&old_bytes[common_prefix..],
&new_bytes[common_prefix..],
true,
);
let mut diffs = self.compute(
&old_bytes[common_prefix..old_bytes.len() - common_suffix],
&new_bytes[common_prefix..new_bytes.len() - common_suffix],
linemode,
deadline,
);
// Restore the prefix and suffix.
if common_prefix > 0 {
let mut d = vec![Diff::equal(&old_bytes[..common_prefix])];
d.append(&mut diffs);
diffs = d;
}
if common_suffix > 0 {
diffs.push(Diff::new(
Ops::Equal,
&new_bytes[new_bytes.len() - common_suffix..],
));
}
Self::cleanup_merge(&mut diffs);
diffs
}
fn compute<'a>(&self, old: &'a [u8], new: &'a [u8], linemode: bool, deadline: Instant) -> Vec<Diff> {
// returning all of the new part
if old.is_empty() {
return vec![Diff::insert(new)];
}
// return everything deleted
if new.is_empty() {
return vec![Diff::delete(old)];
}
let (long, short, old_gt_new) = if old.len() > new.len() {
(old, new, true)
} else {
(new, old, false)
};
// found a subsequence which contains the short text
if let Some(idx) = long
.windows(short.len())
.step_by(1)
.position(|k| k == short)
{
// Shorter text is inside the longer text (speedup).
let op = if old_gt_new { Ops::Delete } else { Ops::Insert };
let diffs = vec![
Diff::new(op, &long[0..idx]),
Diff::equal(short),
Diff::new(op, &long[idx + short.len()..]),
];
return diffs;
}
if short.len() == 1 {
// After previous case, this can't be an equality
return vec![Diff::delete(old), Diff::insert(new)];
}
// Check if the problem can be split in two
if let Some(half_match) = self.half_match(old, new) {
let old_a = half_match.prefix_long;
let old_b = half_match.suffix_long;
let new_a = half_match.prefix_short;
let new_b = half_match.suffix_short;
let mid_common = half_match.common;
// Send both pairs off for separate processing.
let mut diffs_a = self.diff_internal(old_a, new_a, linemode, deadline);
let mut diffs_b = self.diff_internal(old_b, new_b, linemode, deadline);
// Merge the results
diffs_a.push(Diff::equal(mid_common));
diffs_a.append(&mut diffs_b);
return diffs_a;
}
if linemode && old.len() > 100 && new.len() > 100 {
return self.line_mode(old, new, deadline);
}
self.bisect(old, new, deadline)
}
fn half_match<'a>(&self, old: &'a [u8], new: &'a [u8]) -> Option<HalfMatch<'a>> {
// Don't risk returning a suboptimal diff when we have unlimited time
if self.timeout() == u64::MAX {
return None;
}
let (long, short) = if old.len() > new.len() {
(old, new)
} else {
(new, old)
};
// pointless - two small for this algo
if long.len() < 4 || short.len() * 2 < long.len() {
return None;
}
// First check if the second quarter is the seed for a half-match.
// let hm1 = Self::diff_half_match_i(long, short, (long.len() as f32 / 4.).ceil() as usize);
let hm1 = Self::half_match_i(long, short, long.len() / 4);
// Check again based on the third quarter.
// let hm2 = Self::diff_half_match_i(long, short, (long.len() as f32 / 2.).ceil() as usize);
let hm2 = Self::half_match_i(long, short, long.len() / 2);
if hm1.is_none() && hm2.is_none() {
return None;
}
let hm = if let (Some(hm1), None) = (&hm1, &hm2) {
hm1
} else if let (None, Some(hm2)) = (&hm1, &hm2) {
hm2
} else if let (Some(hm1), Some(hm2)) = (&hm1, &hm2) {
// both match, select the longest
if hm1.common.len() > hm2.common.len() {
hm1
} else {
hm2
}
} else {
return None;
};
// A half-match was found, sort out the return data.
let half_match = if old.len() > new.len() {
HalfMatch {
prefix_long: hm.prefix_long,
suffix_long: hm.suffix_long,
prefix_short: hm.prefix_short,
suffix_short: hm.suffix_short,
common: hm.common,
}
} else {
HalfMatch {
prefix_long: hm.prefix_short,
suffix_long: hm.suffix_short,
prefix_short: hm.prefix_long,
suffix_short: hm.suffix_long,
common: hm.common,
}
};
Some(half_match)
}
// Quick line-level diff on both strings, then rediff the parts for greater accuracy
// This speedup can produce non-minimal diffs
fn line_mode<'a>(&self, old: &'a [u8], new: &'a [u8], deadline: Instant) -> Vec<Diff> {
let to_chars = Self::lines_to_chars(old, new);
let mut diffs = self.diff_internal(&to_chars.chars_old, &to_chars.chars_new, false, deadline);
// Convert diffs back to text
Self::chars_to_lines(&mut diffs[..], &to_chars.lines[..]);
// Eliminate freak matches
Self::cleanup_semantic(&mut diffs);
// Rediff any replacement blocks, this time character-by-character.
// Add a dummy entry at the end.
diffs.push(Diff::equal(b""));
let mut difflen = diffs.len();
let mut pointer = 0_usize;
// count of bytes inserted
let mut insert_n = 0_usize;
// count of bytes to delete
let mut delete_n = 0_usize;
// a temp holder for consequetive data being inserted
let mut insert_data = Vec::new();
// same for delete
let mut delete_data = Vec::new();
while pointer < difflen {
match diffs[pointer].0 {
Ops::Insert => {
insert_n += 1;
let mut data = diffs[pointer].1.to_vec();
insert_data.append(&mut data);
}
Ops::Delete => {
delete_n += 1;
let mut data = diffs[pointer].1.to_vec();
delete_data.append(&mut data);
}
Ops::Equal => {
// Upon reaching an equality, check for prior redundancies.
if delete_n >= 1 && insert_n >= 1 {
// Delete the offending records and add the merged ones.
let idxstart = pointer - delete_n - insert_n;
let idxend = idxstart + delete_n + insert_n;
diffs.drain(idxstart .. idxend);
pointer = idxstart;
let mut subdiffs = self.diff_internal(&delete_data, &insert_data, false, deadline);
let subdifflen = subdiffs.len();
subdiffs.drain(..).rev().for_each(|d| {
diffs.insert(pointer, d);
});
pointer += subdifflen;
difflen = diffs.len();
}
// resetting counters
insert_n = 0;
delete_n = 0;
delete_data = Vec::new();
insert_data = Vec::new();
}
}
pointer += 1;
}
diffs.pop();
diffs
}
// Find the 'middle snake' of a diff, split the problem in two
// and return the recursively constructed diff.
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
fn bisect<'a>(&self, old: &'a [u8], new: &'a [u8], deadline: Instant) -> Vec<Diff> {
let old_len = old.len() as i32;
let new_len = new.len() as i32;
let max_d = (old_len + new_len + 1) / 2;
let v_offset = max_d as usize;
let v_len = 2 * max_d;
let mut v1 = vec![-1_i32; v_len as usize];
let mut v2 = vec![-1_i32; v_len as usize];
v1[v_offset + 1] = 0;
v2[v_offset + 1] = 0;
// println!("{v1:?}");
// println!("{v2:?}");
let delta = old_len - new_len;
// If the total number of characters is odd, then the front path will collide
// with the reverse path.
let front = delta % 2 != 0;
// println!("v_offset[{v_offset}] v_length[{v_len}] delta[{delta}] front[{front}]");
// Offsets for start and end of k loop.
// Prevents mapping of space beyond the grid.
let mut k1start = 0;
let mut k1end = 0;
let mut k2start = 0;
let mut k2end = 0;
for d in 0..max_d {
// println!("For d: {d} --------------------");
// Bail out if deadline is reached.
if Instant::now() > deadline {
break;
}
// Walk the front path one step
for k1 in (k1start - d..d - k1end + 1).step_by(2) {
let k1_offset = (v_offset as i32 + k1) as usize;
// println!("Loop 1 k1[{k1}] k1_offset[{k1_offset}]");
let mut x1 = if k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) {
// println!("Loop 1 Check offset[if] {}", k1_offset + 1);
v1[k1_offset + 1]
} else {
// println!("Loop 1 Check offset[else] {}", k1_offset - 1);
v1[k1_offset - 1] + 1
};
let mut y1 = x1 - k1;
while x1 < old_len && y1 < new_len && old[x1 as usize] == new[y1 as usize] {
x1 += 1;
y1 += 1;
}
v1[k1_offset] = x1;
if x1 > old_len {
// ran off the right of the graph
k1end += 2;
} else if y1 > new_len {
// Ran of the bottom of the graph
k1start += 2;
} else if front {
let k2_offset = v_offset as i32 + delta - k1;
if k2_offset >= 0 && k2_offset < v_len && v2[k2_offset as usize] != -1 {
// Mirror x2 onto top-left coodinate system
let x2 = old_len - v2[k2_offset as usize];
if x1 >= x2 {
// Overlap detected
// println!("Loop 1 Overlap detected! {x1} {y1}");
return self.bisect_split(old, new, x1 as usize, y1 as usize, deadline);
}
}
}
}
// println!("After loop 1 k1start[{k1start}] k1end[{k1end}]");
// Walk the reverse path one step
for k2 in (k2start - d..d - k2end + 1).step_by(2) {
let k2_offset = (v_offset as i32 + k2) as usize;
// println!("Loop 2 k2: {k2} k2_offset: {k2_offset}");
let mut x2 = if k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
// println!("Loop 2 Check offset[if] {}", k2_offset + 1);
v2[k2_offset + 1]
} else {
// println!("Loop 2 Check offset[else] {}", k2_offset - 1);
v2[k2_offset - 1] + 1
};
let mut y2 = x2 - k2;
while x2 < old_len
&& y2 < new_len
&& old[(old_len - x2) as usize - 1] == new[(new_len - y2) as usize - 1]
{
x2 += 1;
y2 += 1;
}
v2[k2_offset] = x2;
if x2 > old_len {
// Ran off the left of the graph
k2end += 2;
} else if y2 > new_len {
// Ran off the top of the graph
k2start += 2;
} else if !front {
let k1_offset = v_offset as i32 + delta - k2;
// println!("Loop 2 not front! k1_offset[{k1_offset}] v_length[{v_len}]");
if k1_offset >= 0 && k1_offset < v_len && v1[k1_offset as usize] != -1 {
let x1 = v1[k1_offset as usize];
let y1 = v_offset as i32 + x1 - k1_offset;
// Mirror x2 onto top-left coordinate system
x2 = old_len - x2;
// println!("Loop 2 not front inner: x1[{x1}] y1[{y1}] x2[{x2}]");
if x1 >= x2 {
// Overlap
// println!("Loop 2 Overlap detected! {x1} {y1}");
return self.bisect_split(old, new, x1 as usize, y1 as usize, deadline);
}
}
}
}
}
vec![Diff::delete(old), Diff::insert(new)]
}
fn bisect_split(
&self,
old: &[u8],
new: &[u8],
x: usize,
y: usize,
deadline: Instant,
) -> Vec<Diff> {
let old_a = &old[..x];
let new_a = &new[..y];
let old_b = &old[x..];
let new_b = &new[y..];
// Compute both diffs serially.
let mut diffs_a = self.diff_internal(old_a, new_a, false, deadline);
let mut diffs_b = self.diff_internal(old_b, new_b, false, deadline);
diffs_a.append(&mut diffs_b);
diffs_a
}
// Does a substring of shorttext exist within longtext such that the substring
// is at least half the length of longtext?
//idx Start index of quarter length substring within longtext.
fn half_match_i<'a>(long: &'a [u8], short: &'a [u8], idx: usize) -> Option<HalfMatch<'a>> {
// Start with a 1/4 length substring at position i as a seed.
let seed = &long[idx..idx + long.len() / 4];
let mut j = 0;
let mut best_common: &[u8] = &[];
let mut best_long_a: &[u8] = &[];
let mut best_long_b: &[u8] = &[];
let mut best_short_a: &[u8] = &[];
let mut best_short_b: &[u8] = &[];
while let Some(pos) = &short[j..]
.windows(seed.len())
.step_by(1)
.position(|p| p == seed)
{
j += *pos;
let prefix_len = Self::common_prefix(&long[idx..], &short[j..], false);
let suffix_len = Self::common_prefix(&long[..idx], &short[..j], true);
if best_common.len() < suffix_len + prefix_len {
best_common = &short[j - suffix_len..j + prefix_len];
best_long_a = &long[..idx - suffix_len];
best_long_b = &long[idx + prefix_len..];
best_short_a = &short[..j - suffix_len];
best_short_b = &short[j + prefix_len..];
}
j += 1;
}
if best_common.len() * 2 >= long.len() {
Some(HalfMatch {
prefix_long: best_long_a,
suffix_long: best_long_b,
prefix_short: best_short_a,
suffix_short: best_short_b,
common: best_common,
})
} else {
None
}
}
// returns the number of bytes common in both the str - this is the position in bytes not chars, [0 .. n] is your prefix
// We are doing a binary search here, and I've observed similar performance as noted by https://neil.fraser.name/news/2007/10/09/
// Some benchmark code can be found in benches/prefix.rs
// Reverse prefix is suffix
// TODO: investigate this further
fn common_prefix(lhs: &[u8], rhs: &[u8], reverse: bool) -> usize {
if lhs.is_empty()
|| rhs.is_empty()
|| (!reverse && (lhs.first() != rhs.first()))
|| (reverse && (lhs.last() != rhs.last()))
{
return 0;
}
let mut pointmin = 0;
let mut pointmax = lhs.len().min(rhs.len());
let mut pointmid = pointmax;
let mut pointstart = 0;
while pointmin < pointmid {
let (lhsrange, rhsrange) = if !reverse {
(pointstart..pointmid, pointstart..pointmid)
} else {
(
lhs.len() - pointmid..lhs.len() - pointstart,
rhs.len() - pointmid..rhs.len() - pointstart,
)
};
if lhs[lhsrange] == rhs[rhsrange] {
pointmin = pointmid;
pointstart = pointmin;
} else {
pointmax = pointmid;
}
pointmid = (pointmax - pointmin) / 2 + pointmin;
}
pointmid
}
fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize {
if lhs.is_empty() || rhs.is_empty() {
return 0;
}
let minlen = lhs.len().min(rhs.len());
// A working set with longer string truncated
let l = if lhs.len() > rhs.len() {
&lhs[lhs.len() - rhs.len()..]
} else {
lhs
};
let r = if lhs.len() < rhs.len() {
&rhs[..lhs.len()]
} else {
rhs
};
// Quick check for the worst case.
if l == r {
return minlen;
}
// Start by looking for a single character match
// and increase length until no match is found.
// Performance analysis: https://neil.fraser.name/news/2010/11/04/
let mut len = 1;
let mut best = 0;
loop {
let pattern = &l[minlen - len..];
let found = if let Some(found) = r
.windows(pattern.len())
.step_by(1)
.position(|p| p == pattern)
{
found
} else {
return best;
};
len += found;
if found == 0 || l[minlen - len..] == r[..len] {
best = len;
len += 1;
}
}
}
// Reduce the number of edits by eliminating semantically trivial equalities
fn cleanup_semantic(diffs: &mut Vec<Diff>) {
let mut changes = false;
let mut pointer = 0_usize;
// reducing runtime allocation by giving this vec max capacity
let mut equalities = Vec::with_capacity(diffs.len());
let mut last_equality = None;
// Number of bytes changed pre equality
let mut insert_len_pre = 0_usize;
let mut delete_len_pre = 0_usize;
// Number of bytes changed post equality
let mut insert_len_post = 0_usize;
let mut delete_len_post = 0_usize;
let mut difflen = diffs.len();
while pointer < difflen {
let mut diff_mod = false;
if diffs[pointer].0 == Ops::Equal {
equalities.push(pointer);
// Updating pre equality changes
insert_len_pre = insert_len_post;
delete_len_pre = delete_len_post;
// Resetting post insertion changes to 0
insert_len_post = 0;
delete_len_post = 0;
last_equality = Some(diffs[pointer].1.clone());
} else {
// Ops::Insert || Ops::Delete
// Increasing changes of post_equality metrics
if diffs[pointer].0 == Ops::Insert {
insert_len_post += diffs[pointer].1.len();
} else {
delete_len_post += diffs[pointer].1.len();
}
// Eliminate an equality that is smaller or equal to the edits on both
// sides of it.
if let Some(last_eq) = &last_equality {
if last_eq.len() <= insert_len_pre.max(delete_len_pre)
&& last_eq.len() <= insert_len_post.max(delete_len_post)
{
if let Some(&last) = equalities.last() {
// Duplicate record
diffs.insert(last, Diff::delete(last_eq));
// Change the other copy to insert
diffs[last + 1].0 = Ops::Insert;
// change diff length
difflen = diffs.len();
// Throw away the equality we just deleted.
equalities.pop();
// Throw away the previous equality (it needs to be reevaluated).
equalities.pop();
diff_mod = true;
changes = true;
if let Some(&e) = equalities.last() {
pointer = e;
} else {
pointer = 0;
}
// reset all counters
insert_len_pre = 0;
delete_len_pre = 0;
insert_len_post = 0;
delete_len_post = 0;
last_equality = None;
}
}
}
}
pointer += if diff_mod && pointer == 0 { 0 } else { 1 };
}
// Normalize the diff
if changes {
Self::cleanup_merge(diffs);
}
Self::cleanup_semantic_lossless(diffs);
difflen = diffs.len();
// Find any overlaps between deletions and insertions.
// e.g: <del>abcxxx</del><ins>xxxdef</ins>
// -> <del>abc</del>xxx<ins>def</ins>
// e.g: <del>xxxabc</del><ins>defxxx</ins>
// -> <ins>def</ins>xxx<del>abc</del>
// Only extract an overlap if it is as big as the edit ahead or behind it.
pointer = 1;
while difflen > 0 && pointer < difflen {
if diffs[pointer - 1].0 == Ops::Delete && diffs[pointer].0 == Ops::Insert {
let delete = diffs[pointer - 1].1.to_vec();
let insert = diffs[pointer].1.to_vec();
let delete_thres = delete.len() / 2 + if delete.len() % 2 > 0 { 1 } else { 0 };
let insert_thres = insert.len() / 2 + if insert.len() % 2 > 0 { 1 } else { 0 };
let overlap_1 = Self::common_overlap(&delete[..], &insert[..]);
let overlap_2 = Self::common_overlap(&insert[..], &delete[..]);
if overlap_1 >= overlap_2
&& (overlap_1 >= delete_thres || overlap_1 >= insert_thres)
{
// Overlap found. Insert an equality and trim the surrounding edits.
diffs.insert(pointer, Diff::equal(&insert[..overlap_1]));
diffs[pointer - 1].1 = delete[..delete.len() - overlap_1].to_vec();
diffs[pointer + 1].1 = insert[overlap_1..].to_vec();
difflen = diffs.len();
pointer += 1;
} else if overlap_2 >= delete_thres || overlap_2 >= insert_thres {
// Reverse overlap
// Insert equality and swap and trim surrounding edits
diffs.insert(pointer, Diff::equal(&delete[..overlap_2]));
diffs[pointer - 1] = Diff::insert(&insert[..insert.len() - overlap_2]);
diffs[pointer + 1] = Diff::delete(&delete[overlap_2..]);
difflen = diffs.len();
pointer += 1;
}
pointer += 1;
}
pointer += 1;
}
}
// Look for single edits surrounded on both sides by equalities
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
fn cleanup_semantic_lossless(diffs: &mut Vec<Diff>) {
let mut pointer = 1_usize;
let mut difflen = diffs.len();
// Intentionally ignore the first and last element (don't need checking).
while difflen > 0 && pointer < difflen - 1 {
// an edit surrounded by equalities
if diffs[pointer - 1].0 == Ops::Equal && diffs[pointer + 1].0 == Ops::Equal {
let mut equality_prev = diffs[pointer - 1].1.clone();
let mut edit = diffs[pointer].1.clone();
let mut equality_next = diffs[pointer + 1].1.clone();
// Shift the edit as far left as possible
let commonlen = Self::common_prefix(&equality_prev[..], &edit[..], true);
if commonlen > 0 {
let mut common_prev = edit[edit.len() - commonlen..].to_vec();
let mut common_next = common_prev.clone();
equality_prev = equality_prev[..equality_prev.len() - commonlen].to_vec();
common_prev.append(&mut edit[..edit.len() - commonlen].to_vec());
edit = common_prev;
common_next.append(&mut equality_next.to_vec());
equality_next = common_next;
}
// Step byte by byte right looking for the best fit
let mut best_equality_prev = equality_prev.clone();
let mut best_edit = edit.clone();
let mut best_equality_next = equality_next.clone();
let mut best_score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..])
+ Self::cleanup_semantic_score(&edit[..], &equality_next[..]);
while !edit.is_empty() && !equality_next.is_empty() && edit[0] == equality_next[0] {
equality_prev.push(edit[0]);
edit.remove(0);
edit.push(equality_next[0]);
equality_next.remove(0);
let score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..])
+ Self::cleanup_semantic_score(&edit[..], &equality_next[..]);
// The >= encourages trailing rather than leading whitespace on edits.
if score >= best_score {
best_score = score;
best_equality_prev.clone_from(&equality_prev);
best_edit.clone_from(&edit);
best_equality_next.clone_from(&equality_next);
}
}
// We have an improvement, save it back to the diff.
if diffs[pointer - 1].1 != best_equality_prev {
if !best_equality_prev.is_empty() {
diffs[pointer - 1].1 = best_equality_prev.to_vec();
} else {
diffs.remove(pointer - 1);
pointer -= 1;
difflen = diffs.len();
}
diffs[pointer].1 = best_edit.to_vec();
if !best_equality_next.is_empty() {
diffs[pointer + 1].1 = best_equality_next.to_vec();
} else {
diffs.remove(pointer + 1);
pointer -= 1;
difflen = diffs.len();
}
}
}
pointer += 1;
}
}
// Given two strings, compute a score representing whether the internal
// boundary falls on logical boundaries
// Scores range from 6 (best) to 0 (worst)
fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 {
let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
(char1 as char, char2 as char)
} else {
return 6;
};
// Each port of this function behaves slightly differently due to
// subtle differences in each language's definition of things like
// 'whitespace'. Since this function's purpose is largely cosmetic,
// the choice has been made to use each language's native features
// rather than force total conformity.
let whitespace_1 = char1.is_whitespace();
let whitespace_2 = char2.is_whitespace();
let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r');
let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r');
let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n")));
let blankline_2 = linebreak_2
&& (two.starts_with(b"\r\n\n")
|| two.starts_with(b"\r\n\r\n")
|| two.starts_with(b"\n\r\n")
|| two.starts_with(b"\n\n"));
// println!("Non-alphanum: [{} {}] Whitespace: [{whitespace_1} {whitespace_2}] Linebreak: [{linebreak_1} {linebreak_2}] Blankline: [{blankline_1} {blankline_2}] Blanklinematch: [{} {}]", !char1.is_alphanumeric(), !char2.is_alphanumeric(), one.ends_with(b"\n"), (two.starts_with(b"\n") || two.starts_with(b"\r")));
if blankline_1 || blankline_2 {
// 5 for blank lines
5
} else if linebreak_1 || linebreak_2 {
// Four points for line breaks.
4
} else if !char1.is_alphanumeric() && !whitespace_1 && whitespace_2 {
// Three points for end of sentences.
3
} else if whitespace_1 || whitespace_2 {
// 2 for whitespace
2
} else if !char1.is_alphanumeric() || !char2.is_alphanumeric() {
// 1 for not alphanumeric
1
} else {
0
}
}
// Reorder and merge like edit sections. Merge equalities.
// Any edit section can move as long as it doesn't cross an equality.
fn cleanup_merge(diffs: &mut Vec<Diff>) {
// Push a dummy diff ... this triggers the equality as a last step
diffs.push(Diff::equal(b""));
let mut difflen = diffs.len();
let mut pointer = 0_usize;
let mut insert_n = 0;
let mut delete_n = 0;
let mut insert_data = vec![];
let mut delete_data = vec![];
// let mut commonlen = 0;
while pointer < difflen {
match diffs[pointer].0 {
Ops::Insert => {
insert_n += 1;
let mut data = diffs[pointer].1.to_vec();
insert_data.append(&mut data);
pointer += 1;
}
Ops::Delete => {
delete_n += 1;
let mut data = diffs[pointer].1.to_vec();
delete_data.append(&mut data);
pointer += 1;
}
Ops::Equal => {
// Upon reaching an equality, check for prior redundancies.
if delete_n + insert_n > 1 {
if delete_n != 0 && insert_n != 0 {
// Factor out any common prefixies.
let commonlen =
Self::common_prefix(&insert_data[..], &delete_data[..], false);
if commonlen != 0 {
let tmpidx = pointer - delete_n - insert_n;
if tmpidx > 0 && diffs[tmpidx - 1].0 == Ops::Equal {
let mut appenddata = insert_data[..commonlen].to_vec();
diffs[tmpidx - 1].1.append(&mut appenddata);
} else {
diffs.insert(0, Diff::equal(&insert_data[..commonlen]));
pointer += 1;
}
insert_data = insert_data[commonlen..].to_vec();
delete_data = delete_data[commonlen..].to_vec();
}
// Factor out any common suffixies.
let commonlen =
Self::common_prefix(&insert_data[..], &delete_data[..], true);
if commonlen > 0 {
diffs[pointer].1 = [
insert_data[insert_data.len() - commonlen..].to_vec(),
diffs[pointer].1.to_vec(),
]
.concat();
insert_data = insert_data[..insert_data.len() - commonlen].to_vec();
delete_data = delete_data[..delete_data.len() - commonlen].to_vec();
}
}
// Delete the offending records and add the merged ones.
pointer -= delete_n + insert_n;
// Reversing because index will not change
(pointer..pointer + delete_n + insert_n)
.rev()
.for_each(|i| {
diffs.remove(i);
});
difflen = diffs.len();
if !delete_data.is_empty() {
diffs.insert(pointer, Diff::delete(&delete_data));
pointer += 1;
difflen = diffs.len();
}
if !insert_data.is_empty() {
diffs.insert(pointer, Diff::insert(&insert_data));
pointer += 1;
difflen = diffs.len();
}
pointer += 1;
} else if pointer != 0 && diffs[pointer - 1].0 == Ops::Equal {
// Merge this equality with the previous one.
let mut to_merge = diffs[pointer].1.to_vec();
diffs[pointer - 1].1.append(&mut to_merge);
diffs.remove(pointer);
difflen = diffs.len();
} else {
pointer += 1;
}
insert_n = 0;
delete_n = 0;
insert_data = Vec::new();
delete_data = Vec::new();
}
}
}
if let Some(dl) = diffs.last() {
if dl.1.is_empty() {
diffs.pop();
}
}
difflen = diffs.len();
// Second pass: look for single edits surrounded on both sides by equalities
// which can be shifted sideways to eliminate an equality.
// e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
pointer = 1;
let mut changes = false;
while difflen > 0 && pointer < difflen - 1 {
if let (Some(diff_prev), Some(diff), Some(diff_next)) = (
diffs.get(pointer - 1),
diffs.get(pointer),
diffs.get(pointer + 1),
) {
// This is a single edit surrounded by equalities.
if diff_prev.0 == Ops::Equal && diff_next.0 == Ops::Equal {
let substr_idx = if diff.1.len() >= diff_prev.1.len() {
diff.1.len() - diff_prev.1.len()
} else {
0
};
if diff.1[substr_idx..] == diff_prev.1 {
// Shift the edit over the previous equality.
let new_current_data =
[&diff_prev.1, &diff.1[..diff.1.len() - diff_prev.1.len()]].concat();
let new_next_data = [&diff_prev.1[..], &diff_next.1[..]].concat();
diffs[pointer].1 = new_current_data;
diffs[pointer + 1].1 = new_next_data;
diffs.remove(pointer - 1);
difflen = diffs.len();
changes = true;
} else if diff.1[..if diff_next.1.len() <= diff.1.len() {
diff_next.1.len()
} else {
diff.1.len()
}] == diff_next.1
{
// Shift the edit over the next equality.
let mut next_data = diffs[pointer + 1].1.to_vec();
diffs[pointer - 1].1.append(&mut next_data);
diffs[pointer].1 = [
&diffs[pointer].1[diffs[pointer + 1].1.len()..],
&diffs[pointer + 1].1[..],
]
.concat();
diffs.remove(pointer + 1);
difflen = diffs.len();
changes = true;
}
}
}
pointer += 1;
}
if changes {
Self::cleanup_merge(diffs);
}
}
fn cleanup_efficiency(diffs: &mut Vec<Diff>) {
todo!()
}
fn diff_text_old(diffs: &[Diff]) -> Vec<u8> {
diffs.iter()
.filter_map(|diff| {
if diff.0 != Ops::Insert {
Some(diff.1.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.concat()
}
fn diff_text_new(diffs: &[Diff]) -> Vec<u8> {
diffs.iter()
.filter_map(|diff| {
if diff.0 != Ops::Delete {
Some(diff.1.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.concat()
}
}
#[derive(Debug, Eq, PartialEq)]
struct LineToChars<'a> {
chars_old: Vec<u8>,
chars_new: Vec<u8>,
lines: Vec<&'a [u8]>,
}
impl DiffMatchPatch {
fn lines_to_chars<'a>(old: &'a [u8], new: &'a [u8]) -> LineToChars<'a> {
let mut lines: Vec<&'a [u8]> = vec![];
let mut linehash: HashMap<&'a [u8], usize> = HashMap::new();
// Allocate 2/3rds of the space for text1, the rest for text2.
// let mut maxlines = 5;
let mut maxlines = 40000;
let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, maxlines);
// This basically represents the U16::MAX value
maxlines = 65535;
// maxlines = 7;
let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, maxlines);
LineToChars {
chars_old,
chars_new,
lines,
}
}
fn lines_to_chars_internal<'a>(
text: &'a [u8],
array: &mut Vec<&'a [u8]>,
hash: &mut HashMap<&'a [u8], usize>,
maxlines: usize,
) -> Vec<u8> {
let take = maxlines - array.len();
let mut lines = text.split_inclusive(|u| *u == b'\n').enumerate();
let mut charlist = Vec::with_capacity(take + 1);
let mut broke = None;
let mut cursor = 0;
for (idx, line) in lines.by_ref() {
cursor += line.len();
let entry = hash.entry(line).or_insert(array.len());
// fresh insert
if entry == &array.len() {
array.push(line);
}
// We know the `maxlines = 65535`, this will never fail
charlist.push(char::from_u32(*entry as u32).unwrap());
if idx == take - 1 {
broke = Some(idx);
break;
}
}
if broke.is_some() {
let line = &text[cursor..];
let e = hash.entry(line).or_insert(array.len());
array.push(line);
charlist.push(char::from_u32(*e as u32).unwrap());
}
let chars: String = charlist.iter().collect::<String>();
chars.as_bytes().to_vec()
}
fn chars_to_lines(diffs: &mut [Diff], lines: &[&[u8]]) {
diffs.iter_mut().for_each(|d| {
let chars = String::from_utf8(d.1.to_vec()).unwrap();
// let mut txt = &[];
let t = chars
.chars()
.map(|c| {
let idx: u32 = c.into();
*lines.get(idx as usize).unwrap()
})
.collect::<Vec<_>>()
.concat();
d.1 = t;
});
}
}
// Patch Methods
#[derive(Debug, Default, Clone)]
pub struct Patch {
diffs: Vec<Diff>,
start1: usize,
start2: usize,
length1: usize,
length2: usize
}
impl Display for Patch {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let coord1 = if self.length1 == 0 {
format!("{},0", self.start1)
} else {
format!(
"{}{}",
self.start1 + 1,
if self.length1 != 1 {
format!(",{}", self.length1)
} else {
String::new()
}
)
};
let coord2 = if self.length2 == 0 {
format!("{},0", self.start2)
} else {
format!(
"{}{}",
self.start2 + 1,
if self.length2 != 1 {
format!(",{}", self.length2)
} else {
String::new()
}
)
};
let mut segments = vec!["@@ -".to_string(), coord1, " +".to_string(), coord2 , " @@\n".to_string()];
self.diffs.iter()
.for_each(|diff| {
let sign = match diff.0 {
Ops::Insert => '+',
Ops::Delete => '-',
Ops::Equal => ' '
};
segments.push(
String::from_utf8([&[sign as u8], &diff.1[..], &[b'\n']].concat()).unwrap()
)
});
write!(f, "{}", segments.join(""))
}
}
pub enum PatchInput<'a> {
Texts(&'a str, &'a str),
Diffs(&'a [Diff]),
TextDiffs(&'a str, &'a [Diff])
}
pub type Patches = Vec<Patch>;
impl DiffMatchPatch {
fn parse_patch_header(s: &[u8]) -> Option<(usize, Option<usize>, usize, Option<usize>)> {
let mut section = Vec::with_capacity(64);
let mut current_sect = 0;
let mut old_line = 0;
let mut old_cols = None;
let mut new_line = 0;
let mut new_cols = None;
for &c in s.iter() {
if c == b' ' {
match current_sect {
0 => {
if §ion != b"@@" {
return None;
}
}
1 => {
if section.is_empty() {
return None;
}
let splits = section[1..].split(|&p| p == b',').collect::<Vec<_>>();
let ol = splits.first()?;
old_line = std::str::from_utf8(ol).ok()?.parse::<usize>().ok()?;
if let Some(&oc) = splits.get(1) {
old_cols = Some(std::str::from_utf8(oc).ok()?.parse::<usize>().ok()?);
}
}
2 => {
let splits = section[
if *section.first()? == b'+' { 1 } else { 0 }
..
].split(|&p| p == b',').collect::<Vec<_>>();
let nl = splits.first()?;
new_line = std::str::from_utf8(nl).ok()?.parse::<usize>().ok()?;
if let Some(&nc) = splits.get(1) {
new_cols = Some(std::str::from_utf8(nc).ok()?.parse::<usize>().ok()?);
}
}
_ => {
// invalid pattern
return None;
}
}
section = Vec::with_capacity(64);
current_sect += 1;
continue;
}
if current_sect == 1 && section.is_empty() && c != b'-' {
return None;
}
section.push(c);
}
if §ion != b"@@" {
return None;
}
Some(( old_line, old_cols, new_line, new_cols ))
}
fn patch_make_internal(&self, txt: &[u8], diffs: &[Diff]) -> Result<Patches, ()> {
// The null cases
if txt.is_empty() {
return Ok(vec![])
}
// No diffs -> no patches
if diffs.is_empty() {
return Ok(Vec::new());
}
let patch_margin = self.patch_margin() as usize;
let mut patches = vec![];
let mut patch = Patch::default();
let mut char_n1 = 0;
let mut char_n2 = 0;
let mut prepatch: Vec<u8> = txt.to_vec();
let mut postpatch: Vec<u8> = prepatch.clone();
diffs.iter().enumerate().for_each(|(idx, diff)| {
// a new patch starts here
if patch.diffs.is_empty() && diff.0 != Ops::Equal {
patch.start1 = char_n1;
patch.start2 = char_n2;
}
println!("Diff: {:?} {}", diff.0, std::str::from_utf8(&diff.1).unwrap());
match diff.0 {
Ops::Insert => {
patch.length2 += diff.1.len();
println!("patch make: INSERT char_n2[{char_n2}] textlen[{}]", postpatch.len());
postpatch = [&postpatch[..char_n2], &diff.1, &postpatch[char_n2..]].concat();
println!("patch make: INSERT after {} Indexes [0 .. {char_n2}] {} [{char_n2} ..]", postpatch.len(), diff.1.len());
patch.diffs.push(diff.clone());
}
Ops::Delete => {
patch.length1 += diff.1.len();
println!("patch make: DELETE char_n2[{char_n2}] textlen[{}]", postpatch.len());
postpatch = [&postpatch[..char_n2], &postpatch[char_n2 + diff.1.len()..]].concat();
println!("patch make: DELETE after {} Indexes [0 .. {char_n2}] [{} ..]", postpatch.len(), char_n2 + diff.1.len());
patch.diffs.push(diff.clone());
}
Ops::Equal => {
if diff.1.len() <= 2 * patch_margin && !patch.diffs.is_empty() && diffs.len() != idx + 1 {
// Small equality inside a patch.
patch.length1 += diff.1.len();
patch.length2 += diff.1.len();
patch.diffs.push(diff.clone());
} else if diff.1.len() >= 2 * patch_margin && !patch.diffs.is_empty() {
// Time for a new patch.
self.patch_add_context(&mut patch, &prepatch);
patches.push(patch.clone());
patch = Patch::default();
// Unlike Unidiff, our patch lists have a rolling context.
// http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
// Update prepatch text & pos to reflect the application of the
// just completed patch.
prepatch.clone_from(&postpatch);
char_n1 = char_n2;
}
}
}
if diff.0 != Ops::Insert {
char_n1 += diff.1.len();
}
if diff.0 != Ops::Delete {
char_n2 += diff.1.len();
}
println!("{patch} {} {} {} {} {}", patch.diffs.len(), patch.length1, patch.length2, patch.start1, patch.start2);
});
// Pick up the leftover patch if not empty.
if !patch.diffs.is_empty() {
self.patch_add_context(&mut patch, &prepatch);
patches.push(patch);
}
Ok(patches)
}
fn patch_add_context(&self, patch: &mut Patch, text: &[u8]) {
if text.is_empty() {
return;
}
let patch_margin = self.patch_margin() as usize;
let mut pattern = &text[patch.start2 .. patch.start2 + patch.length1];
let mut padding = 0;
// Look for the first and last matches of pattern in text. If two different
// matches are found, increase the pattern length.
println!("Add context: patch[{patch}] patch.start1[{}] patch.length1[{}] pattern[{}]", patch.start2, patch.length1, pattern.len());
while pattern.is_empty() || (
text.windows(pattern.len()).position(|p| p == pattern) !=
text.windows(pattern.len()).rev().position(|p| p == pattern).map(|p| text.len() - p - pattern.len()) &&
pattern.len() < self.match_max_bits() - patch_margin * 2
) {
padding += patch_margin;
println!("Inner Padding: {padding}");
let begin = if patch.start2 > padding { patch.start2 - padding } else { 0 };
let end = patch.start2 + patch.length1 + padding;
pattern = &text[begin .. if end > text.len() { text.len() } else { end }];
println!("Inner pattern: {}", std::str::from_utf8(pattern).unwrap());
}
// Add one chunk for good luck.
padding += patch_margin;
// Add prefix
let begin = if patch.start2 > padding { patch.start2 - padding } else { 0 };
let prefix = &text[begin .. patch.start2];
if !prefix.is_empty() {
patch.diffs.insert(0, Diff::equal(prefix));
}
// Add the suffix
let begin = patch.start2 + patch.length1;
let end = patch.start2 + patch.length1 + padding;
let suffix = &text[if begin < text.len() { begin } else { text.len() } .. if end < text.len() { end } else { text.len() }];
if !suffix.is_empty() {
patch.diffs.push(Diff::equal(suffix));
}
// Roll back the start points.
patch.start1 -= prefix.len();
patch.start2 -= prefix.len();
// extend the lengths
patch.length1 += prefix.len() + suffix.len();
patch.length2 += prefix.len() + suffix.len();
}
fn patch_apply_internal(&self, patches: &Patches, source: &[u8]) -> (Vec<u8>, Vec<bool>) {
if patches.is_empty() {
return (source.to_vec(), vec![])
}
// To avoid changes to original patches!!
let patches = patches.clone();
todo!()
}
fn patch_add_padding(&self, patches: &Patches) {
let pad_len = self.patch_margin();
let null_pad = (1 .. pad_len + 1).filter_map(|c| {
char::from_u32(c as u32)
}).collect::<Vec<_>>();
println!("Null pad! {null_pad:?}");
// let null_pad = ()
// short paddingLength = Patch_Margin;
// QString nullPadding = "";
// for (short x = 1; x <= paddingLength; x++) {
// nullPadding += QChar((ushort)x);
// }
// // Bump all the patches forward.
// QMutableListIterator<Patch> pointer(patches);
// while (pointer.hasNext()) {
// Patch &aPatch = pointer.next();
// aPatch.start1 += paddingLength;
// aPatch.start2 += paddingLength;
// }
// // Add some padding on start of first diff.
// Patch &firstPatch = patches.first();
// QList<Diff> &firstPatchDiffs = firstPatch.diffs;
// if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) {
// // Add nullPadding equality.
// firstPatchDiffs.prepend(Diff(EQUAL, nullPadding));
// firstPatch.start1 -= paddingLength; // Should be 0.
// firstPatch.start2 -= paddingLength; // Should be 0.
// firstPatch.length1 += paddingLength;
// firstPatch.length2 += paddingLength;
// } else if (paddingLength > firstPatchDiffs.first().text.length()) {
// // Grow first equality.
// Diff &firstDiff = firstPatchDiffs.first();
// int extraLength = paddingLength - firstDiff.text.length();
// firstDiff.text = safeMid(nullPadding, firstDiff.text.length(),
// paddingLength - firstDiff.text.length()) + firstDiff.text;
// firstPatch.start1 -= extraLength;
// firstPatch.start2 -= extraLength;
// firstPatch.length1 += extraLength;
// firstPatch.length2 += extraLength;
// }
// // Add some padding on end of last diff.
// Patch &lastPatch = patches.first();
// QList<Diff> &lastPatchDiffs = lastPatch.diffs;
// if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) {
// // Add nullPadding equality.
// lastPatchDiffs.append(Diff(EQUAL, nullPadding));
// lastPatch.length1 += paddingLength;
// lastPatch.length2 += paddingLength;
// } else if (paddingLength > lastPatchDiffs.last().text.length()) {
// // Grow last equality.
// Diff &lastDiff = lastPatchDiffs.last();
// int extraLength = paddingLength - lastDiff.text.length();
// lastDiff.text += nullPadding.left(extraLength);
// lastPatch.length1 += extraLength;
// lastPatch.length2 += extraLength;
// }
// return nullPadding;
}
}
// Exposed APIs
impl DiffMatchPatch {
/// Find the differences between two texts. Simplifies the problem by stripping any common prefix or suffix off the texts before diffing.
/// Args:
/// old: Old string to be diffed.
/// new: New string to be diffed.
/// deadline: Optional time when the diff should be complete by. Used
/// internally for recursive calls. Users should set DiffTimeout instead.
///
/// Returns:
/// Vec of changes (Diff).
pub fn diff_main(&self, old: &str, new: &str) -> Vec<Diff> {
let deadline = if let Some(i) = Instant::now()
.checked_add(Duration::from_millis(self.timeout())) {
i
} else {
todo!()
};
self.diff_internal(old.as_bytes(), new.as_bytes(), self.checklines(), deadline)
}
/// A diff of two unrelated texts can be filled with coincidental matches.
/// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")].
/// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format.
/// The above example would become: [(-1, "mouse"), (1, "sofas")]. If a diff is to be human-readable, it should be passed to diff_cleanupSemantic.
pub fn diff_cleanup_semantic(diffs: &mut Vec<Diff>) {
Self::cleanup_semantic(diffs)
}
pub fn diff_cleanup_efficiency(diffs: &mut [Diff]) {
todo!()
}
pub fn diff_levenshtein(diffs: &[Diff]) -> u32 {
todo!()
}
/// Takes a diff array and returns a pretty HTML sequence. This function is mainly intended as an example from which to write ones own display functions.
pub fn diff_pretty_html(diffs: &[Diff]) -> String {
todo!()
}
pub fn match_main(text: &str, pattern: &str, loc: ()) -> () {
todo!()
}
pub fn patch_make(&self, input: PatchInput) -> Patches {
let mut diff_input;
let txt_old;
let (txt, diffs) = match input {
// No diffs provided, lets make our own
PatchInput::Texts(txt1, txt2 ) => {
let dmp = DiffMatchPatch::default();
diff_input = dmp.diff_main(txt1, txt2);
Self::cleanup_semantic(&mut diff_input);
(txt1.as_bytes(), &diff_input[..])
}
PatchInput::Diffs(diffs) => {
// No origin string provided, compute our own.
txt_old = Self::diff_text_old(diffs);
(&txt_old[..], diffs)
}
PatchInput::TextDiffs(txt, diffs) => (txt.as_bytes(), diffs)
};
self.patch_make_internal(txt, diffs).unwrap()
}
pub fn patch_to_text(patches: &Patches) -> String {
patches
.iter()
.map(|p| p.to_string())
.collect::<String>()
}
pub fn patch_from_text(text: &str) -> Patches {
if text.is_empty() {
return vec![];
}
let mut text = text.as_bytes().split(|&p| p == b'\n').collect::<Vec<_>>();
let mut patches = vec![];
while !text.is_empty() {
let (old_line, old_cols, new_line, new_cols) = if let Some(p) = Self::parse_patch_header(text.first().unwrap()) {
p
} else {
todo!("return error")
};
let mut patch = Patch { start1: old_line, start2: new_line, ..Default::default() };
if let Some(old_cols) = old_cols {
if old_cols != 0 {
patch.start1 -= 1;
patch.length1 = old_cols;
}
} else {
patch.start1 -= 1;
patch.length1 = 1;
}
if let Some(new_cols) = new_cols {
if new_cols != 0 {
patch.start2 -= 1;
patch.length2 = new_cols;
}
} else {
patch.start2 -= 1;
patch.length2 = 1;
}
text.remove(0);
while !text.is_empty() {
let txt = if let Some(&s) = text.first() {
if !s.is_empty() {
s
} else {
text.remove(0);
continue;
}
} else {
text.remove(0);
continue;
};
let sign = if let Some(&sign) = txt.first() {
sign
} else {
unreachable!("Already checked for emptyness!");
};
let line = percent_decode(&txt[1..]).collect::<Vec<_>>();
match sign {
b'-' => {
patch.diffs.push(Diff::delete(&line));
}
b'+' => {
patch.diffs.push(Diff::insert(&line));
}
b' ' => {
patch.diffs.push(Diff::equal(&line));
}
b'@' => {
// next patch, break
break;
}
_ => {
todo!("throw error, invalid encoding")
}
}
text.remove(0);
}
patches.push(patch);
};
patches
}
pub fn patch_apply(patches: &[Patch], source_txt: &str) -> (String, ()) {
todo!()
}
}
#[cfg(test)]
mod tests {
use std::time::{Duration, Instant};
use crate::dmp::{Diff, HalfMatch, LineToChars};
use super::{DiffMatchPatch, Ops, PatchInput};
// const tests = [
// 'testDiffIsDestructurable', // TODO
// 'testDiffCleanupEfficiency',
// 'testDiffPrettyHtml',
// 'testDiffDelta',
// 'testDiffXIndex',
// 'testDiffLevenshtein',
// 'testMatchAlphabet',
// 'testMatchBitap',
// 'testMatchMain',
// 'testPatchObj',
// 'testPatchSplitMax',
// 'testPatchApply'
// ];
#[test]
fn test_prefix() {
// Detect any common prefix.
// Null case.
assert_eq!(
0,
DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false)
);
// Non-null case.
assert_eq!(
4,
DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false)
);
// Whole case.
assert_eq!(
4,
DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false)
);
}
#[test]
fn test_suffix() {
// Detect any common suffix.
// Null case.
assert_eq!(
0,
DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true)
);
// Non-null case.
assert_eq!(
4,
DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true)
);
// Whole case.
assert_eq!(
4,
DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true)
);
}
#[test]
fn test_diff_half_match() {
let mut dmp = DiffMatchPatch::default();
// No match
assert!(dmp
.half_match("1234567890".as_bytes(), "abcdef".as_bytes())
.is_none());
assert!(dmp
.half_match("12345".as_bytes(), "23".as_bytes())
.is_none());
// Single Match.
assert_eq!(
Some(HalfMatch {
prefix_long: "12".as_bytes(),
suffix_long: "90".as_bytes(),
prefix_short: "a".as_bytes(),
suffix_short: "z".as_bytes(),
common: "345678".as_bytes()
}),
dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())
);
assert_eq!(
Some(HalfMatch {
prefix_long: "a".as_bytes(),
suffix_long: "z".as_bytes(),
prefix_short: "12".as_bytes(),
suffix_short: "90".as_bytes(),
common: "345678".as_bytes()
}),
dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())
);
assert_eq!(
Some(HalfMatch {
prefix_long: "abc".as_bytes(),
suffix_long: "z".as_bytes(),
prefix_short: "1234".as_bytes(),
suffix_short: "0".as_bytes(),
common: "56789".as_bytes()
}),
dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())
);
assert_eq!(
Some(HalfMatch {
prefix_long: "a".as_bytes(),
suffix_long: "xyz".as_bytes(),
prefix_short: "1".as_bytes(),
suffix_short: "7890".as_bytes(),
common: "23456".as_bytes()
}),
dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())
);
// Multiple Matches.
assert_eq!(
Some(HalfMatch {
prefix_long: "12123".as_bytes(),
suffix_long: "123121".as_bytes(),
prefix_short: "a".as_bytes(),
suffix_short: "z".as_bytes(),
common: "1234123451234".as_bytes()
}),
dmp.half_match(
"121231234123451234123121".as_bytes(),
"a1234123451234z".as_bytes()
)
);
assert_eq!(
Some(HalfMatch {
prefix_long: "".as_bytes(),
suffix_long: "-=-=-=-=-=".as_bytes(),
prefix_short: "x".as_bytes(),
suffix_short: "".as_bytes(),
common: "x-=-=-=-=-=-=-=".as_bytes()
}),
dmp.half_match(
"x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),
"xx-=-=-=-=-=-=-=".as_bytes()
)
);
assert_eq!(
Some(HalfMatch {
prefix_long: "-=-=-=-=-=".as_bytes(),
suffix_long: "".as_bytes(),
prefix_short: "".as_bytes(),
suffix_short: "y".as_bytes(),
common: "-=-=-=-=-=-=-=y".as_bytes()
}),
dmp.half_match(
"-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),
"-=-=-=-=-=-=-=yy".as_bytes()
)
);
// Non-optimal halfmatch.
// Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
assert_eq!(
Some(HalfMatch {
prefix_long: "qHillo".as_bytes(),
suffix_long: "w".as_bytes(),
prefix_short: "x".as_bytes(),
suffix_short: "Hulloy".as_bytes(),
common: "HelloHe".as_bytes()
}),
dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
);
// Optimal no halfmatch.
dmp.timeout = Some(u64::MAX);
assert!(dmp
.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
.is_none());
}
#[test]
fn test_diff_lines_to_chars() {
// Convert lines down to characters.
assert_eq!(
LineToChars {
chars_old: [0_usize, 1, 0]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>()
.as_bytes()
.to_vec(),
chars_new: [1_usize, 0, 1]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>()
.as_bytes()
.to_vec(),
lines: vec![b"alpha\n", b"beta\n"]
},
DiffMatchPatch::lines_to_chars(b"alpha\nbeta\nalpha\n", b"beta\nalpha\nbeta\n")
);
assert_eq!(
LineToChars {
chars_old: "".as_bytes().to_vec(),
chars_new: [0_usize, 1, 2, 2]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>()
.as_bytes()
.to_vec(),
lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"]
},
DiffMatchPatch::lines_to_chars(b"", b"alpha\r\nbeta\r\n\r\n\r\n")
);
assert_eq!(
LineToChars {
chars_old: [0_usize]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>()
.as_bytes()
.to_vec(),
chars_new: [1_usize]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>()
.as_bytes()
.to_vec(),
lines: vec![b"a", b"b"]
},
DiffMatchPatch::lines_to_chars(b"a", b"b")
);
// More than 256 to reveal any 8-bit limitations.
const TLIMIT: usize = 300;
let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
let charlist = (0..TLIMIT)
.map(|i| char::from_u32(i as u32).unwrap())
.collect::<String>();
assert_eq!(
LineToChars {
chars_old: charlist.as_bytes().to_vec(),
chars_new: String::new().as_bytes().to_vec(),
lines: linelist
},
DiffMatchPatch::lines_to_chars(linestr.join("").as_bytes(), b"")
);
}
#[test]
fn test_diff_chars_to_lines() {
// Convert chars up to lines.
let d1 = [0_usize, 1, 0]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>();
let d2 = [1_usize, 0, 1]
.iter()
.map(|i| char::from_u32(*i as u32).unwrap())
.collect::<String>();
let mut diffs = [Diff::equal(d1.as_bytes()), Diff::insert(d2.as_bytes())];
DiffMatchPatch::chars_to_lines(&mut diffs, &[b"alpha\n", b"beta\n"]);
assert_eq!(
[
Diff::equal(b"alpha\nbeta\nalpha\n"),
Diff::insert(b"beta\nalpha\nbeta\n")
],
diffs
);
// More than 256 to reveal any 8-bit limitations.
const TLIMIT: usize = 300;
let charlist = (0..TLIMIT)
.map(|i| char::from_u32(i as u32).unwrap())
.collect::<String>();
let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
let mut diffs = [Diff::delete(charlist.as_bytes())];
DiffMatchPatch::chars_to_lines(&mut diffs, &linelist[..]);
assert_eq!([Diff::delete(linestr.join("").as_bytes())], diffs);
// More than 65536 to verify any 16-bit limitation.
const ULIMIT: usize = 10;
let linestr = (0..ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
let lines = linestr.join("");
let l2c = DiffMatchPatch::lines_to_chars(lines.as_bytes(), b"");
let mut diffs = [Diff::insert(&l2c.chars_old)];
DiffMatchPatch::chars_to_lines(&mut diffs, &l2c.lines);
assert_eq!(lines.as_bytes(), diffs[0].1);
}
#[test]
fn test_diff_cleanup_merge() {
// Cleanup a messy diff.
// Null case.
let mut diffs = vec![];
let test: Vec<Diff> = vec![];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// No change case
let mut diffs = vec![Diff::equal(b"a"), Diff::delete(b"b"), Diff::insert(b"c")];
let test = vec![Diff::equal(b"a"), Diff::delete(b"b"), Diff::insert(b"c")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge equalities.
let mut diffs = vec![Diff::equal(b"a"), Diff::equal(b"b"), Diff::equal(b"c")];
let test = vec![Diff::equal(b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge deletions.
let mut diffs = vec![Diff::delete(b"a"), Diff::delete(b"b"), Diff::delete(b"c")];
let test = vec![Diff::delete(b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge insertions.
let mut diffs = vec![Diff::insert(b"a"), Diff::insert(b"b"), Diff::insert(b"c")];
let test = vec![Diff::insert(b"abc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Merge interweave.
let mut diffs = vec![
Diff::delete(b"a"),
Diff::insert(b"b"),
Diff::delete(b"c"),
Diff::insert(b"d"),
Diff::equal(b"e"),
Diff::equal(b"f"),
];
let test = vec![Diff::delete(b"ac"), Diff::insert(b"bd"), Diff::equal(b"ef")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Prefix and suffix detection.
let mut diffs = vec![
Diff::delete(b"a"),
Diff::insert(b"abc"),
Diff::delete(b"dc"),
];
let test = vec![
Diff::equal(b"a"),
Diff::delete(b"d"),
Diff::insert(b"b"),
Diff::equal(b"c"),
];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Prefix and suffix detection with equalities.
let mut diffs = vec![
Diff::equal(b"x"),
Diff::delete(b"a"),
Diff::insert(b"abc"),
Diff::delete(b"dc"),
Diff::equal(b"y"),
];
let test = vec![
Diff::equal(b"xa"),
Diff::delete(b"d"),
Diff::insert(b"b"),
Diff::equal(b"cy"),
];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Slide edit left.
let mut diffs = vec![Diff::equal(b"a"), Diff::insert(b"ba"), Diff::equal(b"c")];
let test = vec![Diff::insert(b"ab"), Diff::equal(b"ac")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Slide edit right
let mut diffs = vec![Diff::equal(b"c"), Diff::insert(b"ab"), Diff::equal(b"a")];
let test = vec![Diff::equal(b"ca"), Diff::insert(b"ba")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Slide edit left recursive.
let mut diffs = vec![
Diff::equal(b"a"),
Diff::delete(b"b"),
Diff::equal(b"c"),
Diff::delete(b"ac"),
Diff::equal(b"x"),
];
let test = vec![Diff::delete(b"abc"), Diff::equal(b"acx")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Slide edit right recursive.
let mut diffs = vec![
Diff::equal(b"x"),
Diff::delete(b"ca"),
Diff::equal(b"c"),
Diff::delete(b"b"),
Diff::equal(b"a"),
];
let test = vec![Diff::equal(b"xca"), Diff::delete(b"cba")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Empty merge.
let mut diffs = vec![Diff::delete(b"b"), Diff::insert(b"ab"), Diff::equal(b"c")];
let test = vec![Diff::insert(b"a"), Diff::equal(b"bc")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
// Empty equality.
let mut diffs = vec![Diff::equal(b""), Diff::insert(b"a"), Diff::equal(b"b")];
let test = vec![Diff::insert(b"a"), Diff::equal(b"b")];
DiffMatchPatch::cleanup_merge(&mut diffs);
assert_eq!(test, diffs);
}
#[test]
fn test_diff_cleanup_semantic_overall() {
// Cleanup semantically trivial equalities.
// Null case.
let mut diffs = vec![];
let test: Vec<Diff> = vec![];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// No elimination #1.
let mut diffs = vec![
Diff::delete(b"ab"),
Diff::insert(b"cd"),
Diff::equal(b"12"),
Diff::delete(b"e"),
];
let test: Vec<Diff> = vec![
Diff::delete(b"ab"),
Diff::insert(b"cd"),
Diff::equal(b"12"),
Diff::delete(b"e"),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// No elimination #2.
let mut diffs = vec![
Diff::delete(b"abc"),
Diff::insert(b"ABC"),
Diff::equal(b"1234"),
Diff::delete(b"wxyz"),
];
let test: Vec<Diff> = vec![
Diff::delete(b"abc"),
Diff::insert(b"ABC"),
Diff::equal(b"1234"),
Diff::delete(b"wxyz"),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Simple elimination.
let mut diffs = vec![Diff::delete(b"a"), Diff::equal(b"b"), Diff::delete(b"c")];
let test: Vec<Diff> = vec![Diff::delete(b"abc"), Diff::insert(b"b")];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Backpass elimination.
let mut diffs = vec![
Diff::delete(b"ab"),
Diff::equal(b"cd"),
Diff::delete(b"e"),
Diff::equal(b"f"),
Diff::insert(b"g"),
];
let test: Vec<Diff> = vec![Diff::delete(b"abcdef"), Diff::insert(b"cdfg")];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Multiple eliminations.
let mut diffs = vec![
Diff::insert(b"1"),
Diff::equal(b"A"),
Diff::delete(b"B"),
Diff::insert(b"2"),
Diff::equal(b"_"),
Diff::insert(b"1"),
Diff::equal(b"A"),
Diff::delete(b"B"),
Diff::insert(b"2"),
];
let test: Vec<Diff> = vec![Diff::delete(b"AB_AB"), Diff::insert(b"1A2_1A2")];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Word boundaries.
let mut diffs = vec![
Diff::equal(b"The c"),
Diff::delete(b"ow and the c"),
Diff::equal(b"at."),
];
let test: Vec<Diff> = vec![
Diff::equal(b"The "),
Diff::delete(b"cow and the "),
Diff::equal(b"cat."),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// No overlap elimination.
let mut diffs = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")];
let test: Vec<Diff> = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Overlap elimination.
let mut diffs = vec![Diff::delete(b"abcxxx"), Diff::insert(b"xxxdef")];
let test: Vec<Diff> = vec![
Diff::delete(b"abc"),
Diff::equal(b"xxx"),
Diff::insert(b"def"),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Reverse overlap elimination.
let mut diffs = vec![Diff::delete(b"xxxabc"), Diff::insert(b"defxxx")];
let test: Vec<Diff> = vec![
Diff::insert(b"def"),
Diff::equal(b"xxx"),
Diff::delete(b"abc"),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
// Two overlap eliminations.
let mut diffs = vec![
Diff::delete(b"abcd1212"),
Diff::insert(b"1212efghi"),
Diff::equal(b"----"),
Diff::delete(b"A3"),
Diff::insert(b"3BC"),
];
let test: Vec<Diff> = vec![
Diff::delete(b"abcd"),
Diff::equal(b"1212"),
Diff::insert(b"efghi"),
Diff::equal(b"----"),
Diff::delete(b"A"),
Diff::equal(b"3"),
Diff::insert(b"BC"),
];
DiffMatchPatch::cleanup_semantic(&mut diffs);
assert_eq!(test, diffs);
}
#[test]
fn test_diff_cleanup_semantic_lossless() {
// Slide diffs to match logical boundaries.
// Null case.
let mut diffs: Vec<Diff> = vec![];
let test: Vec<Diff> = vec![];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Blank lines.
let mut diffs: Vec<Diff> = vec![
Diff::equal(b"AAA\r\n\r\nBBB"),
Diff::insert(b"\r\nDDD\r\n\r\nBBB"),
Diff::equal(b"\r\nEEE"),
];
let test: Vec<Diff> = vec![
Diff::equal(b"AAA\r\n\r\n"),
Diff::insert(b"BBB\r\nDDD\r\n\r\n"),
Diff::equal(b"BBB\r\nEEE"),
];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Line boundaries.
let mut diffs: Vec<Diff> = vec![
Diff::equal(b"AAA\r\nBBB"),
Diff::insert(b" DDD\r\nBBB"),
Diff::equal(b" EEE"),
];
let test: Vec<Diff> = vec![
Diff::equal(b"AAA\r\n"),
Diff::insert(b"BBB DDD\r\n"),
Diff::equal(b"BBB EEE"),
];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Word boundaries.
let mut diffs: Vec<Diff> = vec![
Diff::equal(b"The c"),
Diff::insert(b"ow and the c"),
Diff::equal(b"at."),
];
let test: Vec<Diff> = vec![
Diff::equal(b"The "),
Diff::insert(b"cow and the "),
Diff::equal(b"cat."),
];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Alphanumeric boundaries.
let mut diffs: Vec<Diff> = vec![
Diff::equal(b"The-c"),
Diff::insert(b"ow-and-the-c"),
Diff::equal(b"at."),
];
let test: Vec<Diff> = vec![
Diff::equal(b"The-"),
Diff::insert(b"cow-and-the-"),
Diff::equal(b"cat."),
];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Hitting the start.
let mut diffs: Vec<Diff> = vec![Diff::equal(b"a"), Diff::delete(b"a"), Diff::equal(b"ax")];
let test: Vec<Diff> = vec![Diff::delete(b"a"), Diff::equal(b"aax")];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Hitting the end.
let mut diffs: Vec<Diff> = vec![Diff::equal(b"xa"), Diff::delete(b"a"), Diff::equal(b"a")];
let test: Vec<Diff> = vec![Diff::equal(b"xaa"), Diff::delete(b"a")];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
// Sentence boundaries.
let mut diffs: Vec<Diff> = vec![
Diff::equal(b"The xxx. The "),
Diff::insert(b"zzz. The "),
Diff::equal(b"yyy."),
];
let test: Vec<Diff> = vec![
Diff::equal(b"The xxx."),
Diff::insert(b" The zzz."),
Diff::equal(b" The yyy."),
];
DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
assert_eq!(test, diffs);
}
#[test]
fn test_diff_common_overlap() {
// Detect any suffix/prefix overlap.
// Null case
assert_eq!(0, DiffMatchPatch::common_overlap(b"", b"abcd"));
// Whole case.
assert_eq!(3, DiffMatchPatch::common_overlap(b"abc", b"abcd"));
// No overlap.
assert_eq!(0, DiffMatchPatch::common_overlap(b"123456", b"abcd"));
// Overlap.
assert_eq!(3, DiffMatchPatch::common_overlap(b"123456xxx", b"xxxabcd"));
// Unicode.
// Some overly clever languages (C#) may treat ligatures as equal to their
// component letters. E.g. U+FB01 == 'fi'
assert_eq!(
0,
DiffMatchPatch::common_overlap(b"fi", "\u{7FFF}".as_bytes())
);
// Complete overlap
assert_eq!(
6,
DiffMatchPatch::common_overlap("❤️".as_bytes(), "❤️".as_bytes())
);
// Partial unicode overlap
assert_eq!(
0,
DiffMatchPatch::common_overlap("ஆ".as_bytes(), "இ".as_bytes())
);
assert_eq!(
3,
DiffMatchPatch::common_overlap("ஆஇ".as_bytes(), "இ".as_bytes())
);
}
#[test]
fn test_diff_bisect() {
let dmp = DiffMatchPatch::default();
// Normal.
// Since the resulting diff hasn't been normalized, it would be ok if
// the insertion and deletion pairs are swapped.
// If the order changes, tweak this test as required.
assert_eq!(
vec![
Diff::delete(b"c"),
Diff::insert(b"m"),
Diff::equal(b"a"),
Diff::delete(b"t"),
Diff::insert(b"p")
],
dmp.bisect(
b"cat",
b"map",
Instant::now()
.checked_add(Duration::from_secs(600))
.unwrap()
)
);
// Timeout.
assert_eq!(
vec![Diff::delete(b"cat"), Diff::insert(b"map"),],
dmp.bisect(
b"cat",
b"map",
Instant::now().checked_add(Duration::from_secs(0)).unwrap()
)
);
}
#[test]
fn test_diff_main() {
let mut dmp = DiffMatchPatch::default();
// Perform a trivial diff.
// Null case.
assert!(dmp.diff_main("", "").is_empty());
// Equality
assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc"));
// Simple insert
assert_eq!(
vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
dmp.diff_main("abc", "ab123c")
);
// Simple delete
assert_eq!(
vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
dmp.diff_main("a123bc", "abc")
);
// Two insertions
assert_eq!(
vec![
Diff::equal(b"a"),
Diff::insert(b"123"),
Diff::equal(b"b"),
Diff::insert(b"456"),
Diff::equal(b"c"),
],
dmp.diff_main("abc", "a123b456c")
);
// Two deletions.
assert_eq!(
vec![
Diff::equal(b"a"),
Diff::delete(b"123"),
Diff::equal(b"b"),
Diff::delete(b"456"),
Diff::equal(b"c"),
],
dmp.diff_main("a123b456c", "abc")
);
// Perform a real diff.
// Switch off the timeout.
dmp.timeout = None;
// Simple cases.
assert_eq!(
vec![Diff::delete(b"a"), Diff::insert(b"b"),],
dmp.diff_main("a", "b")
);
assert_eq!(
vec![
Diff::delete(b"Apple"),
Diff::insert(b"Banana"),
Diff::equal(b"s are a"),
Diff::insert(b"lso"),
Diff::equal(b" fruit.")
],
dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")
);
assert_eq!(
vec![
Diff::delete(b"a"),
Diff::insert("\u{0680}".as_bytes()),
Diff::equal(b"x"),
Diff::delete(b"\t"),
Diff::insert(b"\0")
],
dmp.diff_main("ax\t", "\u{0680}x\0")
);
// Overlaps.
assert_eq!(
vec![
Diff::delete(b"1"),
Diff::equal(b"a"),
Diff::delete(b"y"),
Diff::equal(b"b"),
Diff::delete(b"2"),
Diff::insert(b"xab"),
],
dmp.diff_main("1ayb2", "abxab")
);
assert_eq!(
vec![
Diff::insert(b"xaxcx"),
Diff::equal(b"abc"),
Diff::delete(b"y"),
],
dmp.diff_main("abcy", "xaxcxabc")
);
assert_eq!(
vec![
Diff::delete(b"ABCD"),
Diff::equal(b"a"),
Diff::delete(b"="),
Diff::insert(b"-"),
Diff::equal(b"bcd"),
Diff::delete(b"="),
Diff::insert(b"-"),
Diff::equal(b"efghijklmnopqrs"),
Diff::delete(b"EFGHIJKLMNOefg"),
],
dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs")
);
// Large equality.
assert_eq!(
vec![
Diff::insert(b" "),
Diff::equal(b"a"),
Diff::insert(b"nd"),
Diff::equal(b" [[Hepatopancreatic]]"),
Diff::delete(b" and [[New"),
],
dmp.diff_main("a [[Hepatopancreatic]] and [[New", " and [[Hepatopancreatic]]")
);
// Timeout.
const LOW_TIMEOUT: u64 = 100;
dmp.timeout = Some(LOW_TIMEOUT);
let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
let start = Instant::now();
dmp.diff_main(&a, &b);
let end = Instant::now();
// Test that we took at least the timeout period (+ 5ms being generous).
assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5);
// Test the linemode speedup.
// Must be long to pass the 100 char cutoff.
// Simple line-mode.
let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
dmp.checklines = Some(false);
// let start = Instant::now();
let res_no_lm = dmp.diff_main(a, b);
// let no_lm = Instant::now() - start;
dmp.checklines = Some(true);
// let start = Instant::now();
let res_yes_lm = dmp.diff_main(a, b);
// let yes_lm = Instant::now() - start;
// Now, we'll run 2 checks - one for result equality, two for speedup
assert_eq!(res_no_lm, res_yes_lm);
// Fails, no-linemode takes less time than with linemode optimizations
// Off by a few 30μs
// assert!(no_lm > yes_lm);
// Single line-mode.
let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
dmp.checklines = Some(true);
let yes_lm = dmp.diff_main(a, b);
dmp.checklines = Some(false);
let no_lm = dmp.diff_main(a, b);
assert_eq!(no_lm, yes_lm);
// Overlap line-mode.
let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
dmp.checklines = Some(true);
let yes_lm = dmp.diff_main(a, b);
dmp.checklines = Some(false);
let no_lm = dmp.diff_main(a, b);
assert_eq!(rebuild_text(&yes_lm[..]), rebuild_text(&no_lm[..]));
// TODO
// // Test null inputs.
// try {
// dmp.diff_main(null, null);
// assertEquals(Error, null);
// } catch (e) {
// // Exception expected.
// }
}
// Helper to construct the two texts which made up the diff originally.
fn rebuild_text(diffs: &[Diff]) -> (String, String) {
let mut txt1 = vec![];
let mut txt2 = vec![];
diffs.iter()
.for_each(|d| {
let mut txt = d.1.clone();
if d.0 != Ops::Insert {
txt1.append(&mut txt);
}
let mut txt = d.1.clone();
if d.0 != Ops::Delete {
txt2.append(&mut txt);
}
});
(String::from_utf8(txt1).unwrap(), String::from_utf8(txt2).unwrap())
}
#[test]
fn test_serde() {
// let diffs = vec![
// Diff::delete(b"a"),
// Diff::insert("\u{0680}".as_bytes()),
// Diff::equal(b"x"),
// Diff::delete(b"\t"),
// Diff::insert(b"\0")
// ];
// let serialized = serde_json::to_string(&diffs).unwrap();
// println!("{serialized}");
}
#[test]
fn test_patch_add_context() {
let dmp = DiffMatchPatch::default();
let mut ps = DiffMatchPatch::patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n");
let p = ps.first_mut().unwrap();
dmp.patch_add_context(p, b"The quick brown fox jumps over the lazy dog.");
assert_eq!("@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.to_string());
// Same, but not enough trailing context.
let mut ps = DiffMatchPatch::patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n");
let p = ps.first_mut().unwrap();
dmp.patch_add_context(p, b"The quick brown fox jumps.");
assert_eq!("@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.to_string());
// Same, but not enough leading context.
let mut ps = DiffMatchPatch::patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n");
let p = ps.first_mut().unwrap();
dmp.patch_add_context(p, b"The quick brown fox jumps.");
assert_eq!("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.to_string());
// Same, but with ambiguity.
let mut ps = DiffMatchPatch::patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n");
let p = ps.first_mut().unwrap();
dmp.patch_add_context(p, b"The quick brown fox jumps. The quick brown fox crashes.");
assert_eq!("@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.to_string());
}
#[test]
fn test_patch_from_text() {
assert!(DiffMatchPatch::patch_from_text("").is_empty());
assert_eq!("@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n \nlaz\n", DiffMatchPatch::patch_from_text("@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n")[0].to_string());
assert_eq!("@@ -1 +1 @@\n-a\n+b\n", DiffMatchPatch::patch_from_text("@@ -1 +1 @@\n-a\n+b\n")[0].to_string());
assert_eq!("@@ -1,3 +0,0 @@\n-abc\n", DiffMatchPatch::patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")[0].to_string());
assert_eq!("@@ -0,0 +1,3 @@\n+abc\n", DiffMatchPatch::patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")[0].to_string());
// TODO
// // Generates error.
// try {
// dmp.patch_fromText('Bad\nPatch\n');
// assertEquals(Error, null);
// } catch (e) {
// // Exception expected.
// }
}
#[test]
fn patch_to_text() {
let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
let patches = DiffMatchPatch::patch_from_text(strp);
assert_eq!(strp, DiffMatchPatch::patch_to_text(&patches));
let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n";
let patches = DiffMatchPatch::patch_from_text(strp);
assert_eq!(strp, DiffMatchPatch::patch_to_text(&patches));
}
#[test]
fn test_patch_make() {
let dmp = DiffMatchPatch::default();
let patches = dmp.patch_make(super::PatchInput::Texts("", ""));
assert!(patches.is_empty());
let txt1 = "The quick brown fox jumps over the lazy dog.";
let txt2 = "That quick brown fox jumped over a lazy dog.";
// The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt2, txt1));
assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", DiffMatchPatch::patch_to_text(&patches));
// Text1+Text2 inputs.
let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(txt1, txt2));
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", DiffMatchPatch::patch_to_text(&patches));
// Diff input.
// var diffs = dmp.diff_main(text1, text2, false);
let diffs = dmp.diff_main(txt1, txt2);
let patches = dmp.patch_make(crate::dmp::PatchInput::Diffs(&diffs[..]));
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", DiffMatchPatch::patch_to_text(&patches));
// Text1+Diff inputs.
let patches = dmp.patch_make(crate::dmp::PatchInput::TextDiffs(txt1, &diffs[..]));
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", DiffMatchPatch::patch_to_text(&patches));
// Character encoding.
let patches = dmp.patch_make(crate::dmp::PatchInput::Texts("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"));
assert_eq!(
percent_encoding::percent_decode(b"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").decode_utf8().unwrap(),
DiffMatchPatch::patch_to_text(&patches)
);
// Character decoding.
let diffs = vec![
Diff::delete(b"`1234567890-=[]\\;',./"),
Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?")
];
assert_eq!(
diffs,
DiffMatchPatch::patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0].diffs
);
// Long string with repeats.
let txt1 = vec!["abcdef"; 100].join("");
let txt2 = [&txt1, "123"].join("");
let patches = dmp.patch_make(crate::dmp::PatchInput::Texts(&txt1, &txt2));
assert_eq!("@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n", DiffMatchPatch::patch_to_text(&patches));
// // Test null inputs.
// try {
// dmp.patch_make(null);
// assertEquals(Error, null);
// } catch (e) {
// // Exception expected.
// }
}
#[test]
fn test_parse_patch_header() {
assert_eq!(Some((21, Some(4), 21, Some(10))), DiffMatchPatch::parse_patch_header("@@ -21,4 +21,10 @@".as_bytes()));
assert_eq!(Some((3, None, 3, Some(2))), DiffMatchPatch::parse_patch_header("@@ -3 +3,2 @@".as_bytes()));
// Bad cases
assert!(DiffMatchPatch::parse_patch_header("@@ +3,2 @@".as_bytes()).is_none());
assert!(DiffMatchPatch::parse_patch_header("@@ 2046 +3,2 @@".as_bytes()).is_none());
}
#[test]
fn test_diff_text() {
let diffs = vec![
Diff::equal(b"jump"),
Diff::delete(b"s"),
Diff::insert(b"ed"),
Diff::equal(b" over "),
Diff::delete(b"the"),
Diff::insert(b"a"),
Diff::equal(b" lazy")
];
assert_eq!(b"jumps over the lazy", &DiffMatchPatch::diff_text_old(&diffs[..])[..]);
assert_eq!(b"jumped over a lazy", &DiffMatchPatch::diff_text_new(&diffs[..])[..]);
}
#[test]
fn test_patch_add_padding() {
let dmp = DiffMatchPatch::default();
// Both edges full.
let patches = dmp.patch_make(PatchInput::Texts("", "test"));
assert_eq!("@@ -0,0 +1,4 @@\n+test\n", DiffMatchPatch::patch_to_text(&patches));
dmp.patch_add_padding(&patches);
// var patches = dmp.patch_make('', 'test');
// assertEquals('@@ -0,0 +1,4 @@\n+test\n', dmp.patch_toText(patches));
// dmp.patch_addPadding(patches);
// assertEquals('@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n', dmp.patch_toText(patches));
// // Both edges partial.
// // patches = dmp.patch_make('XY', 'XtestY');
// // assertEquals('@@ -1,2 +1,6 @@\n X\n+test\n Y\n', dmp.patch_toText(patches));
// // dmp.patch_addPadding(patches);
// // assertEquals('@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n', dmp.patch_toText(patches));
// // Both edges none.
// // patches = dmp.patch_make('XXXXYYYY', 'XXXXtestYYYY');
// // assertEquals('@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches));
// // dmp.patch_addPadding(patches);
// // assertEquals('@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches));
}
}