my fork of dmp
-rw-r--r--benches/diff.rs4
-rw-r--r--src/dmp.rs423
-rw-r--r--src/lib.rs4
-rw-r--r--src/traits.rs200
-rw-r--r--tests/test.rs547
5 files changed, 912 insertions, 266 deletions
diff --git a/benches/diff.rs b/benches/diff.rs
index dcd9330..f426c3c 100644
--- a/benches/diff.rs
+++ b/benches/diff.rs
@@ -1,7 +1,7 @@
use std::path::Path;
use criterion::{criterion_group, criterion_main, Criterion};
-use diff_match_patch_rs::dmp::DiffMatchPatch;
+use diff_match_patch_rs::{dmp::DiffMatchPatch, Efficient};
fn diff_main(c: &mut Criterion) {
let basedir = Path::new("testdata");
@@ -11,7 +11,7 @@ fn diff_main(c: &mut Criterion) {
let dmp = DiffMatchPatch::default();
c.bench_function("diff-match-patch", |bencher| {
- bencher.iter(|| dmp.diff_main(&old, &new).unwrap());
+ bencher.iter(|| dmp.diff_main::<Efficient>(&old, &new).unwrap());
});
}
diff --git a/src/dmp.rs b/src/dmp.rs
index e72b14b..959f1e6 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -2,24 +2,8 @@ use core::str;
use std::{char, collections::HashMap, fmt::Display};
use chrono::{NaiveTime, TimeDelta, Utc};
-use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
-
-use crate::{errors::Error, traits::BisectSplit};
-
-// Appending controls to ensure exact same encoding as cpp variant
-pub const ENCODE_SET: &AsciiSet = &CONTROLS
- .add(b'"')
- .add(b'<')
- .add(b'>')
- .add(b'`')
- .add(b'{')
- .add(b'}')
- .add(b'%')
- .add(b'[')
- .add(b'\\')
- .add(b']')
- .add(b'^')
- .add(b'|');
+
+use crate::{errors::Error, DType};
/// Enum representing the different ops of diff
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -34,7 +18,7 @@ pub enum Ops {
/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
/// (Ops::Equal, String::new("World")) means keep world
#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>);
+pub struct Diff<T: DType>(Ops, Vec<T>);
impl Display for Diff<u8> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -47,7 +31,18 @@ impl Display for Diff<u8> {
}
}
-impl<T: Copy + Ord + Eq> Diff<T> {
+impl Display for Diff<char> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "({:?}, {})",
+ self.op(),
+ self.data().iter().collect::<String>()
+ )
+ }
+}
+
+impl<T: DType> Diff<T> {
/// Create a new diff object
pub fn new(op: Ops, data: &[T]) -> Self {
Self(op, data.to_vec())
@@ -96,15 +91,15 @@ pub struct DiffMatchPatch {
/// 1.0 to the score (0.0 is a perfect match).
/// int Match_Distance;
match_distance: usize,
+ /// The number of bits in an int.
+ match_max_bits: usize,
/// When deleting a large block of text (over ~64 characters), how close does
/// the contents have to match the expected contents. (0.0 = perfection,
/// 1.0 = very loose). Note that `match_threshold` controls how closely the
/// end points of a delete need to match.
delete_threshold: f32,
/// Chunk size for context length.
- patch_margin: u16,
- /// The number of bits in an int.
- match_max_bits: usize,
+ patch_margin: u8,
}
impl Default for DiffMatchPatch {
@@ -122,7 +117,7 @@ impl Default for DiffMatchPatch {
}
#[derive(Debug, PartialEq, Eq)]
-struct HalfMatch<'a, T: Copy + Ord + Eq> {
+struct HalfMatch<'a, T: DType> {
prefix_long: &'a [T],
suffix_long: &'a [T],
prefix_short: &'a [T],
@@ -180,7 +175,7 @@ impl DiffMatchPatch {
}
// returns the current patch margin
- fn patch_margin(&self) -> u16 {
+ fn patch_margin(&self) -> u8 {
self.patch_margin
}
@@ -215,13 +210,13 @@ impl DiffMatchPatch {
self.match_distance = distance
}
- pub(crate) fn diff_internal<'a>(
+ pub(crate) fn diff_internal<'a, T: DType>(
&self,
- old_bytes: &'a [u8],
- new_bytes: &'a [u8],
+ old_bytes: &'a [T],
+ new_bytes: &'a [T],
linemode: bool,
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// First, check if lhs and rhs are equal
if old_bytes == new_bytes {
if old_bytes.is_empty() {
@@ -273,13 +268,13 @@ impl DiffMatchPatch {
Ok(diffs)
}
- fn compute<'a>(
+ fn compute<'a, T: DType>(
&self,
- old: &'a [u8],
- new: &'a [u8],
+ old: &'a [T],
+ new: &'a [T],
linemode: bool,
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
return Ok(vec![Diff::insert(new)]);
@@ -355,11 +350,7 @@ impl DiffMatchPatch {
}
}
- fn half_match<'a, T: Copy + Ord + Eq>(
- &self,
- old: &'a [T],
- new: &'a [T],
- ) -> Option<HalfMatch<'a, T>> {
+ fn half_match<'a, T: DType>(&self, old: &'a [T], new: &'a [T]) -> Option<HalfMatch<'a, T>> {
// Don't risk returning a suboptimal diff when we have unlimited time
self.timeout()?;
@@ -424,12 +415,12 @@ impl DiffMatchPatch {
// Quick line-level diff on both strings, then rediff the parts for greater accuracy
// This speedup can produce non-minimal diffs
- fn line_mode<'a>(
+ fn line_mode<'a, T: DType>(
&self,
- old: &'a [u8],
- new: &'a [u8],
+ old: &'a [T],
+ new: &'a [T],
deadline: Option<NaiveTime>,
- ) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let mut diffs = {
let to_chars = Self::lines_to_chars(old, new);
let diffs =
@@ -629,7 +620,7 @@ impl DiffMatchPatch {
// Find the 'middle snake' of a diff, split the problem in two
// and return the recursively constructed diff.
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
- pub fn bisect<'a, T: BisectSplit>(
+ pub fn bisect<'a, T: DType>(
&self,
old: &'a [T],
new: &'a [T],
@@ -799,7 +790,7 @@ impl DiffMatchPatch {
// is at least half the length of longtext?
//idx Start index of quarter length substring within longtext.
#[inline]
- fn half_match_i<'a, T: Copy + Ord + Eq>(
+ fn half_match_i<'a, T: DType>(
long: &'a [T],
short: &'a [T],
idx: usize,
@@ -858,7 +849,7 @@ impl DiffMatchPatch {
// Reverse prefix is suffix
// TODO: investigate this further
#[inline]
- fn common_prefix<T: Copy + Ord + Eq>(lhs: &[T], rhs: &[T], reverse: bool) -> usize {
+ fn common_prefix<T: DType>(lhs: &[T], rhs: &[T], reverse: bool) -> usize {
if lhs.is_empty()
|| rhs.is_empty()
|| (!reverse && (lhs.first() != rhs.first()))
@@ -897,7 +888,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn common_overlap(lhs: &[u8], rhs: &[u8]) -> usize {
+ fn common_overlap<T: DType>(lhs: &[T], rhs: &[T]) -> usize {
if lhs.is_empty() || rhs.is_empty() {
return 0;
}
@@ -949,7 +940,7 @@ impl DiffMatchPatch {
// Reduce the number of edits by eliminating semantically trivial equalities
#[inline]
- fn cleanup_semantic(diffs: &mut Vec<Diff<u8>>) {
+ fn cleanup_semantic<T: DType>(diffs: &mut Vec<Diff<T>>) {
let mut changes = false;
let mut pointer = 0_usize;
@@ -1087,7 +1078,7 @@ impl DiffMatchPatch {
// Look for single edits surrounded on both sides by equalities
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
#[inline]
- fn cleanup_semantic_lossless(diffs: &mut Vec<Diff<u8>>) {
+ fn cleanup_semantic_lossless<T: DType>(diffs: &mut Vec<Diff<T>>) {
let mut pointer = 1_usize;
let mut difflen = diffs.len();
@@ -1182,9 +1173,11 @@ impl DiffMatchPatch {
// boundary falls on logical boundaries
// Scores range from 6 (best) to 0 (worst)
#[inline]
- fn cleanup_semantic_score(one: &[u8], two: &[u8]) -> u8 {
- let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
- (char1 as char, char2 as char)
+ fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 {
+ let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first())
+ && let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char())
+ {
+ (c1, c2)
} else {
return 6;
};
@@ -1201,12 +1194,8 @@ impl DiffMatchPatch {
let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r');
let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r');
- let blankline_1 = linebreak_1 && (one.ends_with(b"\n\n") || (one.ends_with(b"\n\r\n")));
- let blankline_2 = linebreak_2
- && (two.starts_with(b"\r\n\n")
- || two.starts_with(b"\r\n\r\n")
- || two.starts_with(b"\n\r\n")
- || two.starts_with(b"\n\n"));
+ let blankline_1 = linebreak_1 && T::is_linebreak_end(one);
+ let blankline_2 = linebreak_2 && T::is_linebreak_start(two);
if blankline_1 || blankline_2 {
// 5 for blank lines
@@ -1231,7 +1220,7 @@ impl DiffMatchPatch {
// Reorder and merge like edit sections. Merge equalities.
// Any edit section can move as long as it doesn't cross an equality.
#[inline]
- fn cleanup_merge<T: BisectSplit>(diffs: &mut Vec<Diff<T>>) {
+ fn cleanup_merge<T: DType>(diffs: &mut Vec<Diff<T>>) {
// Push a dummy diff ... this triggers the equality as a last step
diffs.push(Diff::equal(&[]));
@@ -1407,26 +1396,28 @@ impl DiffMatchPatch {
}
}
- pub fn to_delta(diffs: &[Diff<u8>]) -> Vec<u8> {
+ pub fn to_delta<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
let mut data = diffs
.iter()
.map(|diff| {
match diff.op() {
Ops::Insert => {
- let encoded = percent_encode(diff.data(), ENCODE_SET)
- .map(|v| v.as_bytes())
- .collect::<Vec<_>>()
- .concat();
+ let encoded = T::percent_encode(diff.data());
// format!("+{encoded}")
- ["+".as_bytes(), &encoded, "\t".as_bytes()].concat()
- }
- Ops::Delete => {
- [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
- }
- Ops::Equal => {
- // format!("={}", diff.size())
- [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
+ [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat()
}
+ Ops::Delete => [
+ &[T::from_char('-')],
+ &T::from_str(diff.size().to_string().as_str())[..],
+ &[T::from_char('\t')],
+ ]
+ .concat(),
+ Ops::Equal => [
+ &[T::from_char('=')],
+ &T::from_str(diff.size().to_string().as_str())[..],
+ &[T::from_char('\t')],
+ ]
+ .concat(),
}
})
.collect::<Vec<_>>()
@@ -1437,11 +1428,14 @@ impl DiffMatchPatch {
data
}
- pub fn from_delta(old: &[u8], delta: &[u8]) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
+ pub fn from_delta<T: DType>(
+ old: &[T],
+ delta: &[T],
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let mut pointer = 0; // cursor to text
let mut diffs = vec![];
- for token in delta.split(|&k| k == b'\t') {
+ for token in delta.split(|&k| k == T::from_char('\t')) {
if token.is_empty() {
continue;
}
@@ -1451,22 +1445,13 @@ impl DiffMatchPatch {
let opcode = token.first();
let param = &token[1..];
- if opcode == Some(&b'+') {
- let param = percent_decode(param).collect::<Vec<_>>();
+ if opcode == Some(&T::from_char('+')) {
+ let param = T::percent_decode(param);
diffs.push(Diff::insert(&param));
- } else if opcode == Some(&b'-') || opcode == Some(&b'=') {
- let n = match std::str::from_utf8(param)
- .map_err(|_| crate::errors::Error::Utf8Error)
- .and_then(|t| {
- t.parse::<isize>()
- .map_err(|_| crate::errors::Error::InvalidInput)
- }) {
- Ok(n) => n,
- Err(_) => {
- return Err(crate::errors::Error::InvalidInput);
- }
- };
-
+ } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) {
+ let n = T::to_string(param)?
+ .parse::<isize>()
+ .map_err(|_| Error::Utf8Error)?;
if n < 0 {
return Err(crate::errors::Error::InvalidInput);
}
@@ -1480,7 +1465,7 @@ impl DiffMatchPatch {
let txt = &old[pointer..new_pointer];
pointer = new_pointer;
- if opcode == Some(&b'=') {
+ if opcode == Some(&T::from_char('=')) {
diffs.push(Diff::equal(txt))
} else {
diffs.push(Diff::delete(txt))
@@ -1624,7 +1609,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn x_index<T: Copy + Eq + Ord>(diffs: &[Diff<T>], loc: usize) -> usize {
+ fn x_index<T: DType>(diffs: &[Diff<T>], loc: usize) -> usize {
let mut char1 = 0;
let mut char2 = 0;
@@ -1666,7 +1651,7 @@ impl DiffMatchPatch {
}
#[inline]
- pub fn diff_text_old(diffs: &[Diff<u8>]) -> Vec<u8> {
+ pub fn diff_text_old<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
diffs
.iter()
.filter_map(|diff| {
@@ -1680,8 +1665,7 @@ impl DiffMatchPatch {
.concat()
}
-
- pub fn diff_text_new(diffs: &[Diff<u8>]) -> Vec<u8> {
+ pub fn diff_text_new<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
diffs
.iter()
.filter_map(|diff| {
@@ -1699,7 +1683,7 @@ impl DiffMatchPatch {
// limit of the match algorithm.
// Intended to be called only from within patch_apply.
#[inline]
- fn split_max(&self, patches: &mut Patches) {
+ fn split_max<T: DType>(&self, patches: &mut Patches<T>) {
let max_bit = self.match_max_bits();
let patch_margin = self.patch_margin() as usize;
@@ -1834,17 +1818,17 @@ impl DiffMatchPatch {
}
#[derive(Debug, Eq, PartialEq)]
-struct LineToChars<'a> {
+struct LineToChars<'a, T: DType> {
chars_old: Vec<usize>,
chars_new: Vec<usize>,
- lines: Vec<&'a [u8]>,
+ lines: Vec<&'a [T]>,
}
impl DiffMatchPatch {
#[inline]
- fn lines_to_chars<'a>(old: &'a [u8], new: &'a [u8]) -> LineToChars<'a> {
- let mut lines: Vec<&'a [u8]> = vec![];
- let mut linehash: HashMap<&'a [u8], usize> = HashMap::new();
+ fn lines_to_chars<'a, T: DType>(old: &'a [T], new: &'a [T]) -> LineToChars<'a, T> {
+ let mut lines: Vec<&'a [T]> = vec![];
+ let mut linehash: HashMap<&'a [T], usize> = HashMap::new();
// Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2.
// let mut maxlines = 5;
@@ -1864,10 +1848,10 @@ impl DiffMatchPatch {
}
#[inline]
- fn lines_to_chars_internal<'a>(
- text: &'a [u8],
- array: &mut Vec<&'a [u8]>,
- hash: &mut HashMap<&'a [u8], usize>,
+ fn lines_to_chars_internal<'a, T: DType>(
+ text: &'a [T],
+ array: &mut Vec<&'a [T]>,
+ hash: &mut HashMap<&'a [T], usize>,
maxlines: usize,
) -> Vec<usize> {
let take = maxlines - array.len();
@@ -1878,7 +1862,7 @@ impl DiffMatchPatch {
let mut broke = false;
let mut cursor = 0;
- text.split_inclusive(|u| *u == b'\n')
+ text.split_inclusive(|u| *u == T::from_char('\n'))
.enumerate()
.take(take)
.for_each(|(idx, line)| {
@@ -1913,7 +1897,7 @@ impl DiffMatchPatch {
}
#[inline]
- fn chars_to_lines(diffs: &[Diff<usize>], lines: &[&[u8]]) -> Vec<Diff<u8>> {
+ fn chars_to_lines<T: DType>(diffs: &[Diff<usize>], lines: &[&[T]]) -> Vec<Diff<T>> {
diffs
.iter()
.map(|d| {
@@ -1936,7 +1920,7 @@ impl DiffMatchPatch {
// Match methods
impl DiffMatchPatch {
- fn match_internal(&self, text: &[u8], pattern: &[u8], loc: usize) -> Option<usize> {
+ fn match_internal<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> {
// Check for null inputs.
// Nothing to match.
if text.is_empty() {
@@ -1959,7 +1943,7 @@ impl DiffMatchPatch {
}
}
- fn match_bitap(&self, text: &[u8], pattern: &[u8], loc: usize) -> Option<usize> {
+ fn match_bitap<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> {
if pattern.len() > self.match_max_bits() {
todo!("Throw error");
}
@@ -2085,7 +2069,7 @@ impl DiffMatchPatch {
best_loc
}
- fn match_alphabet(pattern: &[u8]) -> HashMap<u8, usize> {
+ fn match_alphabet<T: DType>(pattern: &[T]) -> HashMap<T, usize> {
let mut map = HashMap::with_capacity(pattern.len());
pattern.iter().enumerate().for_each(|(i, &p)| {
@@ -2115,16 +2099,28 @@ impl DiffMatchPatch {
}
// Patch Methods
-#[derive(Debug, Default, Clone)]
-pub struct Patch {
- diffs: Vec<Diff<u8>>,
+#[derive(Debug, Clone)]
+pub struct Patch<T: DType> {
+ diffs: Vec<Diff<T>>,
start1: usize,
start2: usize,
length1: usize,
length2: usize,
}
-impl Display for Patch {
+impl<T: DType> Default for Patch<T> {
+ fn default() -> Self {
+ Self {
+ diffs: Vec::new(),
+ start1: 0,
+ start2: 0,
+ length1: 0,
+ length2: 0,
+ }
+ }
+}
+
+impl<T: DType> Display for Patch<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let coord1 = if self.length1 == 0 {
format!("{},0", self.start1)
@@ -2161,37 +2157,43 @@ impl Display for Patch {
coord2,
" @@\n".to_string(),
];
- self.diffs.iter().for_each(|diff| {
+ for diff in self.diffs.iter() {
let sign = match diff.op() {
Ops::Insert => '+',
Ops::Delete => '-',
Ops::Equal => ' ',
};
- let segment = format!("{sign}{}\n", percent_encode(diff.data(), ENCODE_SET));
+ let enc = T::percent_encode(diff.data());
+ let segment = format!(
+ "{sign}{}\n",
+ T::to_string(&enc).map_err(|_| std::fmt::Error)?
+ );
segments.push(segment)
- });
+ }
write!(f, "{}", segments.join(""))
}
}
-impl Patch {
- pub fn diffs(&self) -> &[Diff<u8>] {
+impl<T: DType> Patch<T> {
+ pub fn diffs(&self) -> &[Diff<T>] {
&self.diffs[..]
}
}
-pub enum PatchInput<'a> {
+pub enum PatchInput<'a, T: DType> {
Texts(&'a str, &'a str),
- Diffs(&'a [Diff<u8>]),
- TextDiffs(&'a str, &'a [Diff<u8>]),
+ Diffs(&'a [Diff<T>]),
+ TextDiffs(&'a str, &'a [Diff<T>]),
}
-pub type Patches = Vec<Patch>;
+pub type Patches<T> = Vec<Patch<T>>;
impl DiffMatchPatch {
- fn parse_patch_header(s: &[u8]) -> Option<(usize, Option<usize>, usize, Option<usize>)> {
+ fn parse_patch_header<T: DType>(
+ s: &[T],
+ ) -> Option<(usize, Option<usize>, usize, Option<usize>)> {
let mut section = Vec::with_capacity(64);
let mut current_sect = 0;
@@ -2201,10 +2203,10 @@ impl DiffMatchPatch {
let mut new_cols = None;
for &c in s.iter() {
- if c == b' ' {
+ if c == T::from_char(' ') {
match current_sect {
0 => {
- if &section != b"@@" {
+ if section != T::from_str("@@") {
return None;
}
}
@@ -2213,23 +2215,29 @@ impl DiffMatchPatch {
return None;
}
- let splits = section[1..].split(|&p| p == b',').collect::<Vec<_>>();
+ let splits = section[1..]
+ .split(|&p| p == T::from_char(','))
+ .collect::<Vec<_>>();
let ol = splits.first()?;
- old_line = std::str::from_utf8(ol).ok()?.parse::<usize>().ok()?;
+ old_line = T::to_string(ol).ok()?.parse::<usize>().ok()?;
if let Some(&oc) = splits.get(1) {
- old_cols = Some(std::str::from_utf8(oc).ok()?.parse::<usize>().ok()?);
+ old_cols = Some(T::to_string(oc).ok()?.parse::<usize>().ok()?);
}
}
2 => {
- let splits = section[if *section.first()? == b'+' { 1 } else { 0 }..]
- .split(|&p| p == b',')
+ let splits = section[if *section.first()? == T::from_char('+') {
+ 1
+ } else {
+ 0
+ }..]
+ .split(|&p| p == T::from_char(','))
.collect::<Vec<_>>();
let nl = splits.first()?;
- new_line = std::str::from_utf8(nl).ok()?.parse::<usize>().ok()?;
+ new_line = T::to_string(nl).ok()?.parse::<usize>().ok()?;
if let Some(&nc) = splits.get(1) {
- new_cols = Some(std::str::from_utf8(nc).ok()?.parse::<usize>().ok()?);
+ new_cols = Some(T::to_string(nc).ok()?.parse::<usize>().ok()?);
}
}
_ => {
@@ -2243,25 +2251,25 @@ impl DiffMatchPatch {
continue;
}
- if current_sect == 1 && section.is_empty() && c != b'-' {
+ if current_sect == 1 && section.is_empty() && c != T::from_char('-') {
return None;
}
section.push(c);
}
- if &section != b"@@" {
+ if section != T::from_str("@@") {
return None;
}
Some((old_line, old_cols, new_line, new_cols))
}
- fn patch_make_internal(
+ fn patch_make_internal<T: DType>(
&self,
- txt: &[u8],
- diffs: &[Diff<u8>],
- ) -> Result<Patches, crate::errors::Error> {
+ txt: &[T],
+ diffs: &[Diff<T>],
+ ) -> Result<Patches<T>, crate::errors::Error> {
// No diffs -> no patches
if diffs.is_empty() {
return Ok(Vec::new());
@@ -2275,8 +2283,8 @@ impl DiffMatchPatch {
let mut char_n1 = 0;
let mut char_n2 = 0;
- let mut prepatch: Vec<u8> = txt.to_vec();
- let mut postpatch: Vec<u8> = prepatch.clone();
+ let mut prepatch: Vec<T> = txt.to_vec();
+ let mut postpatch: Vec<T> = prepatch.clone();
diffs.iter().enumerate().for_each(|(idx, diff)| {
// a new patch starts here
@@ -2342,7 +2350,7 @@ impl DiffMatchPatch {
Ok(patches)
}
- fn patch_add_context(&self, patch: &mut Patch, text: &[u8]) {
+ fn patch_add_context<T: DType>(&self, patch: &mut Patch<T>, text: &[T]) {
if text.is_empty() {
return;
}
@@ -2409,11 +2417,11 @@ impl DiffMatchPatch {
patch.length2 += prefix.len() + suffix.len();
}
- fn patch_apply_internal(
+ fn patch_apply_internal<T: DType>(
&self,
- patches: &Patches,
- source: &[u8],
- ) -> Result<(Vec<u8>, Vec<bool>), crate::errors::Error> {
+ patches: &Patches<T>,
+ source: &[T],
+ ) -> Result<(Vec<T>, Vec<bool>), crate::errors::Error> {
if patches.is_empty() {
return Ok((source.to_vec(), vec![]));
}
@@ -2533,13 +2541,13 @@ impl DiffMatchPatch {
Ok((source, results))
}
- fn patch_add_padding(&self, patches: &mut Patches) -> Vec<u8> {
- let pad_len = self.patch_margin() as usize;
-
- let null_pad = (1..pad_len + 1)
- .filter_map(|c| char::from_u32(c as u32).map(|c_| c_ as u8))
+ fn patch_add_padding<T: DType>(&self, patches: &mut Patches<T>) -> Vec<T> {
+ let null_pad = (1..self.patch_margin() + 1)
+ .filter_map(|c| c.as_char().map(|c| T::from_char(c)))
.collect::<Vec<_>>();
+ let pad_len = self.patch_margin() as usize;
+
// Bump all the patches forward.
patches.iter_mut().for_each(|p| {
p.start1 += pad_len;
@@ -2607,14 +2615,14 @@ impl DiffMatchPatch {
/// Create a new instance of the struct with default settings
/// # Example
/// ```
- /// use diff_match_patch_rs::{DiffMatchPatch, Error};
+ /// use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
///
/// # fn main() -> Result<(), Error> {
/// let mut dmp = DiffMatchPatch::new();
/// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
/// dmp.set_checklines(false);
/// // do the diffing
- /// let diffs = dmp.diff_main("Fast enough", "Blazing fast")?;
+ /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?;
/// # Ok(())
/// # }
/// ```
@@ -2627,14 +2635,14 @@ impl DiffMatchPatch {
/// Vec of changes (Diff).
/// # Example
/// ```
- /// use diff_match_patch_rs::{DiffMatchPatch, Error};
+ /// use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
///
/// # fn main() -> Result<(), Error> {
/// let mut dmp = DiffMatchPatch::new();
/// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
/// dmp.set_checklines(false);
/// // do the diffing
- /// let diffs = dmp.diff_main("Fast enough", "Blazing fast")?;
+ /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?;
/// println!("{}", diffs.iter().map(|d| format!("d")).collect::<Vec<_>>().join("\n"));
/// // You should see the following output
/// // (Delete, F)
@@ -2644,13 +2652,15 @@ impl DiffMatchPatch {
/// # Ok(())
/// # }
/// ```
- pub fn diff_main(&self, old: &str, new: &str) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
- self.diff_internal(
- old.as_bytes(),
- new.as_bytes(),
- self.checklines(),
- self.deadline(),
- )
+ pub fn diff_main<T: DType>(
+ &self,
+ old: &str,
+ new: &str,
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
+ let old = T::from_str(old);
+ let new = T::from_str(new);
+
+ self.diff_internal(&old, &new, self.checklines(), self.deadline())
}
/// A diff of two unrelated texts can be filled with coincidental matches.
@@ -2674,7 +2684,7 @@ impl DiffMatchPatch {
/// Given a diff, measure its Levenshtein distance in terms of the number of inserted, deleted or substituted characters.
/// The minimum distance is 0 which means equality, the maximum distance is the length of the longer string.
- pub fn diff_levenshtein(&self, diffs: &[Diff<u8>]) -> usize {
+ pub fn diff_levenshtein<T: DType>(&self, diffs: &[Diff<T>]) -> usize {
let mut levenshtein = 0;
let mut insert = 0;
let mut delete = 0;
@@ -2853,46 +2863,53 @@ impl DiffMatchPatch {
/// Given two texts, or an already computed list of differences, return an array of patch objects.
/// The third form PatchInput::TextDiffs(...) is preferred, use it if you happen to have that data available, otherwise this function will compute the missing pieces.
/// TODO: add example
- pub fn patch_make(&self, input: PatchInput) -> Result<Patches, crate::errors::Error> {
+ pub fn patch_make<T: DType>(
+ &self,
+ input: PatchInput<T>,
+ ) -> Result<Patches<T>, crate::errors::Error> {
let mut diff_input;
let txt_old;
let (txt, diffs) = match input {
// No diffs provided, lets make our own
PatchInput::Texts(txt1, txt2) => {
- let dmp = DiffMatchPatch::default();
- diff_input = dmp.diff_main(txt1, txt2)?;
+ diff_input = self.diff_main(txt1, txt2)?;
if diff_input.len() > 2 {
Self::cleanup_semantic(&mut diff_input);
}
- (txt1.as_bytes(), &diff_input[..])
+ (T::from_str(txt1), &diff_input[..])
}
PatchInput::Diffs(diffs) => {
// No origin string provided, compute our own.
- txt_old = Self::diff_text_old(diffs);
- (&txt_old[..], diffs)
+ (Self::diff_text_old(diffs), diffs)
+ }
+ PatchInput::TextDiffs(txt, diffs) => {
+ txt_old = T::from_str(txt);
+ (txt_old, diffs)
}
- PatchInput::TextDiffs(txt, diffs) => (txt.as_bytes(), diffs),
};
- self.patch_make_internal(txt, diffs)
+ self.patch_make_internal(&txt, diffs)
}
/// Reduces an array of patch objects to a block of text which looks extremely similar to the standard GNU diff/patch format. This text may be stored or transmitted.
/// TODO: add example
- pub fn patch_to_text(&self, patches: &Patches) -> String {
+ pub fn patch_to_text<T: DType>(&self, patches: &Patches<T>) -> String {
patches.iter().map(|p| p.to_string()).collect::<String>()
}
/// Parses a block of text (which was presumably created by the patch_toText function) and returns an array of patch objects.
/// TODO: add example
- pub fn patch_from_text(&self, text: &str) -> Result<Patches, Error> {
+ pub fn patch_from_text<T: DType>(&self, text: &str) -> Result<Patches<T>, Error> {
if text.is_empty() {
return Ok(vec![]);
}
- let mut text = text.as_bytes().split(|&p| p == b'\n').collect::<Vec<_>>();
+ let txt_t = T::from_str(text);
+ let mut text = txt_t
+ .split(|&p| p == T::from_char('\n'))
+ .collect::<Vec<_>>();
let mut patches = vec![];
@@ -2946,27 +2963,21 @@ impl DiffMatchPatch {
};
// Should never panic, already checked for `empty`
- let sign = txt.first().unwrap();
-
- let line = percent_decode(&txt[1..]).collect::<Vec<_>>();
-
- match sign {
- b'-' => {
- patch.diffs.push(Diff::delete(&line));
- }
- b'+' => {
- patch.diffs.push(Diff::insert(&line));
- }
- b' ' => {
- patch.diffs.push(Diff::equal(&line));
- }
- b'@' => {
- // next patch, break
- break;
- }
- _ => {
- return Err(Error::InvalidInput);
- }
+ let &sign = txt.first().unwrap();
+
+ let line = T::percent_decode(&txt[1..]);
+
+ if sign == T::from_char('-') {
+ patch.diffs.push(Diff::delete(&line));
+ } else if sign == T::from_char('+') {
+ patch.diffs.push(Diff::insert(&line));
+ } else if sign == T::from_char(' ') {
+ patch.diffs.push(Diff::equal(&line));
+ } else if sign == T::from_char('@') {
+ // next patch, break
+ break;
+ } else {
+ return Err(Error::InvalidInput);
}
text.remove(0);
@@ -2989,15 +3000,15 @@ impl DiffMatchPatch {
/// If patch_delete_threshold is closer to 1, then the deleted text may contain anything.
/// In most use cases Patch_DeleteThreshold should just be set to the same value as match_threshold.
/// TODO: add example
- pub fn patch_apply(
+ pub fn patch_apply<T: DType>(
&self,
- patches: &Patches,
+ patches: &Patches<T>,
source_txt: &str,
) -> Result<(String, Vec<bool>), crate::errors::Error> {
- let (str_bytes, results) = self.patch_apply_internal(patches, source_txt.as_bytes())?;
+ let (str_data, results) = self.patch_apply_internal(patches, &T::from_str(source_txt))?;
Ok((
- String::from_utf8(str_bytes).map_err(|_| crate::errors::Error::Utf8Error)?,
+ T::to_string(&str_data).map_err(|_| crate::errors::Error::Utf8Error)?,
results,
))
}
@@ -3009,7 +3020,7 @@ mod tests {
use crate::{
dmp::{Diff, HalfMatch, LineToChars},
- DiffMatchPatch, Error, Patch, PatchInput,
+ DiffMatchPatch, Efficient, Error, Patch, PatchInput,
};
#[test]
@@ -3757,8 +3768,9 @@ mod tests {
fn test_patch_add_padding() -> Result<(), Error> {
let dmp = DiffMatchPatch::default();
// Both edges full.
- let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
+ let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?;
assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches));
+
dmp.patch_add_padding(&mut patches);
assert_eq!(
"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n",
@@ -3766,7 +3778,7 @@ mod tests {
);
// Both edges partial.
- let mut patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?;
+ let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("XY", "XtestY"))?;
assert_eq!(
"@@ -1,2 +1,6 @@\n X\n+test\n Y\n",
dmp.patch_to_text(&patches)
@@ -3778,7 +3790,8 @@ mod tests {
);
// Both edges none.
- let mut patches = dmp.patch_make(PatchInput::Texts("XXXXYYYY", "XXXXtestYYYY"))?;
+ let mut patches =
+ dmp.patch_make(PatchInput::Texts::<Efficient>("XXXXYYYY", "XXXXtestYYYY"))?;
assert_eq!(
"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n",
dmp.patch_to_text(&patches)
@@ -3799,7 +3812,7 @@ mod tests {
let dmp = DiffMatchPatch::default();
// Assumes that dmp.Match_MaxBits is 32.
- let mut patches = dmp.patch_make(PatchInput::Texts(
+ let mut patches = dmp.patch_make(PatchInput::Texts::<char>(
"abcdefghijklmnopqrstuvwxyz01234567890",
"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0",
))?;
@@ -3809,7 +3822,7 @@ mod tests {
dmp.patch_to_text(&patches)
);
- let mut patches = dmp.patch_make(PatchInput::Texts(
+ let mut patches = dmp.patch_make(PatchInput::Texts::<char>(
"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz",
"abcdefuvwxyz",
))?;
@@ -3817,7 +3830,7 @@ mod tests {
dmp.split_max(&mut patches);
assert_eq!(p2t, dmp.patch_to_text(&patches));
- let mut patches = dmp.patch_make(PatchInput::Texts(
+ let mut patches = dmp.patch_make(PatchInput::Texts::<u8>(
"1234567890123456789012345678901234567890123456789012345678901234567890",
"abc",
))?;
@@ -3827,7 +3840,7 @@ mod tests {
dmp.patch_to_text(&patches)
);
- let mut patches = dmp.patch_make(PatchInput::Texts(
+ let mut patches = dmp.patch_make(PatchInput::Texts::<char>(
"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1",
"abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1",
))?;
diff --git a/src/lib.rs b/src/lib.rs
index 684cf21..b8394c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,10 @@
+#![feature(trait_alias, let_chains)]
+
pub mod dmp;
pub mod errors;
pub mod traits;
pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches};
pub use errors::Error;
+pub(crate) use traits::DType;
+pub use traits::{Compat, Efficient};
diff --git a/src/traits.rs b/src/traits.rs
index 8a5c9bb..3818837 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -1,8 +1,30 @@
+use std::hash::Hash;
+
use chrono::NaiveTime;
+use percent_encoding::{percent_decode, AsciiSet, CONTROLS};
use crate::dmp::{Diff, DiffMatchPatch};
-pub trait BisectSplit: Copy + Ord + Eq {
+pub type Efficient = u8;
+pub type Compat = char;
+
+// Appending controls to ensure exact same encoding as cpp variant
+const ENCODE_SET: &AsciiSet = &CONTROLS
+ .add(b'"')
+ .add(b'<')
+ .add(b'>')
+ .add(b'`')
+ .add(b'{')
+ .add(b'}')
+ .add(b'%')
+ .add(b'[')
+ .add(b'\\')
+ .add(b']')
+ .add(b'^')
+ .add(b'|');
+
+pub trait DType: Copy + Ord + Eq + Hash {
+ // fn differ(dmp: &DiffMatchPatch, txt_old: &str, txt_new: &str) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[Self],
@@ -11,9 +33,20 @@ pub trait BisectSplit: Copy + Ord + Eq {
y: usize,
deadline: Option<NaiveTime>,
) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
+
+ fn from_char(c: char) -> Self;
+ fn as_char(&self) -> Option<char>;
+ fn from_str(str: &str) -> Vec<Self>;
+ fn to_string(data: &[Self]) -> Result<String, crate::Error>;
+
+ fn is_linebreak_end(input: &[Self]) -> bool;
+ fn is_linebreak_start(input: &[Self]) -> bool;
+
+ fn percent_encode(input: &[Self]) -> Vec<Self>;
+ fn percent_decode(input: &[Self]) -> Vec<Self>;
}
-impl BisectSplit for u8 {
+impl DType for u8 {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[u8],
@@ -34,9 +67,135 @@ impl BisectSplit for u8 {
Ok(diffs_a)
}
+
+ fn from_char(c: char) -> Self {
+ c as u8
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self as char)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.as_bytes().to_vec()
+ }
+
+ #[inline]
+ fn to_string(data: &[Self]) -> Result<String, crate::Error> {
+ std::str::from_utf8(data)
+ .map_err(|_| crate::Error::Utf8Error)
+ .map(|s| s.to_string())
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(b"\n\n") || input.ends_with(b"\n\r\n")
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(b"\r\n\n")
+ || input.starts_with(b"\r\n\r\n")
+ || input.starts_with(b"\n\r\n")
+ || input.starts_with(b"\n\n")
+ }
+
+ #[inline]
+ fn percent_encode(input: &[Self]) -> Vec<Self> {
+ percent_encoding::percent_encode(input, ENCODE_SET)
+ .collect::<String>()
+ .as_bytes()
+ .to_vec()
+ }
+
+ #[inline]
+ fn percent_decode(input: &[Self]) -> Vec<Self> {
+ percent_decode(input).collect()
+ }
+}
+
+impl DType for char {
+ fn bisect_split(
+ dmp: &DiffMatchPatch,
+ old: &[char],
+ new: &[char],
+ x: usize,
+ y: usize,
+ deadline: Option<NaiveTime>,
+ ) -> Result<Vec<Diff<char>>, crate::errors::Error> {
+ let old_a = &old[..x];
+ let new_a = &new[..y];
+
+ let old_b = &old[x..];
+ let new_b = &new[y..];
+
+ // Compute both diffs serially.
+ let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?;
+ diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?);
+
+ Ok(diffs_a)
+ }
+
+ fn from_char(c: char) -> Self {
+ c
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.chars().collect::<Vec<_>>()
+ }
+
+ #[inline]
+ fn to_string(data: &[Self]) -> Result<String, crate::Error> {
+ Ok(data.iter().collect::<String>())
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(&['\n', '\n']) || input.ends_with(&['\n', '\r', '\n'])
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(&['\r', '\n', '\n'])
+ || input.starts_with(&['\r', '\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\n'])
+ }
+
+ #[inline]
+ fn percent_encode(input: &[Self]) -> Vec<Self> {
+ let d = input
+ .iter()
+ .map(|c| {
+ let mut b = vec![0; c.len_utf8()];
+ c.encode_utf8(&mut b);
+
+ b
+ })
+ .collect::<Vec<_>>()
+ .concat();
+
+ let encoded = percent_encoding::percent_encode(&d[..], ENCODE_SET).collect::<String>();
+
+ Self::from_str(&encoded)
+ }
+
+ #[inline]
+ fn percent_decode(input: &[Self]) -> Vec<Self> {
+ let ip = input.iter().collect::<String>();
+ percent_decode(ip.as_bytes())
+ .decode_utf8()
+ .unwrap()
+ .chars()
+ .collect()
+ }
}
-impl BisectSplit for usize {
+impl DType for usize {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[usize],
@@ -57,4 +216,39 @@ impl BisectSplit for usize {
Ok(diffs_a)
}
+
+ fn from_char(c: char) -> Self {
+ (c as u8) as usize
+ }
+
+ fn as_char(&self) -> Option<char> {
+ char::from_digit(*self as u32, 10)
+ }
+
+ fn from_str(_: &str) -> Vec<Self> {
+ unimplemented!()
+ }
+
+ fn to_string(_: &[Self]) -> Result<String, crate::Error> {
+ unimplemented!()
+ }
+
+ fn is_linebreak_end(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn is_linebreak_start(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn percent_encode(_: &[Self]) -> Vec<Self> {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn percent_decode(_: &[Self]) -> Vec<Self> {
+ unimplemented!()
+ }
}
diff --git a/tests/test.rs b/tests/test.rs
index fd00a4e..bc8262b 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -4,7 +4,7 @@ use chrono::Utc;
use diff_match_patch_rs::dmp::Diff;
-use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, PatchInput};
+use diff_match_patch_rs::{Compat, DiffMatchPatch, Efficient, Error, Ops, PatchInput};
// const tests = [
// 'testDiffIsDestructurable',
@@ -85,7 +85,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Which means the the diff should an equality block of 3 bytes folloed by insert and delete
let old = "πŸ€ͺ"; // [240, 159, 164, 170]
let new = "πŸ€”"; // [240, 159, 164, 148]
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span></span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -94,7 +94,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Now Case 1. but with some text before and after
let old = "I'm puzzledπŸ€ͺ or am I?";
let new = "I'm puzzledπŸ€” or thinking I guess!";
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span>I'm puzzled</span><del style=\"background:#ffe6e6;\">πŸ€ͺ</del><ins style=\"background:#e6ffe6;\">πŸ€”</ins><span> or </span><del style=\"background:#ffe6e6;\">am I?</del><ins style=\"background:#e6ffe6;\">thinking I guess!</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -103,7 +103,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Case 2. Emoticons with the third position different
let old = "🍊"; // [240, 159, 141, 138]
let new = "🌊"; // [240, 159, 140, 138]
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span></span><del style=\"background:#ffe6e6;\">🍊</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -112,7 +112,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Now Case 2. but with some text, lets complicate this
let old = "🍊, aah orange is the new black!"; // [240, 159, 141, 138]
let new = "Aah orange!🌊is the new 🌊"; // [240, 159, 140, 138]
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<del style=\"background:#ffe6e6;\">🍊, a</del><ins style=\"background:#e6ffe6;\">A</ins><span>ah orange</span><del style=\"background:#ffe6e6;\"> </del><ins style=\"background:#e6ffe6;\">!🌊</ins><span>is the new </span><del style=\"background:#ffe6e6;\">black!</del><ins style=\"background:#e6ffe6;\">🌊</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -121,7 +121,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Case 3. with second and third different, but lets complicate this with an equality
let old = "𠌊"; // [240, 160, 140, 138]
let new = "π– Š"; // [240, 150, 160, 138]
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span></span><ins style=\"background:#e6ffe6;\">π– Š</ins><del style=\"background:#ffe6e6;\">𠌊</del>",
dmp.diff_pretty_html(&diffs)?
@@ -130,7 +130,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Case 3. but let there be a swap
let old = "πž „"; // [240, 158, 160, 132]
let new = std::str::from_utf8(&[240, 160, 158, 132]).unwrap(); // basically an undefined element `π ž„`. Should still work
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span></span><del style=\"background:#ffe6e6;\">πž „</del><ins style=\"background:#e6ffe6;\">π ž„</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -139,7 +139,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Case 4. swap at the last 2 positions
let old = "🍌"; // [240, 159, 141, 140] -- FINALLY A BANANA
let new = "🌍"; // [240, 159, 140, 141] -- interesting revelation - last 2 bytes swapped and 🍌 becomes 🌍. Guess the world is going `Bananas!!`
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<span></span><del style=\"background:#ffe6e6;\">🍌</del><ins style=\"background:#e6ffe6;\">🌍</ins>",
dmp.diff_pretty_html(&diffs)?
@@ -148,7 +148,7 @@ fn test_diff_pretty_html() -> Result<(), Error> {
// Let's do this with a slightly longish string
let old = "Now, let's explore some emotional extremes 🌊.\nWe've got your ecstatic face 🀩, your devastated face 😭, and your utterly confused face 🀯. But that's not all! πŸ€” We've also got some subtle emotions like 😐, πŸ™ƒ, and πŸ‘€.";
let new = "Let's start with some basics 😊.\nWe've got your standard smiley face πŸ™‚, your sad face ☹️, and your angry face 😠. But wait, there's more! 🀩 We've also got some more complex emotions like 😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰, πŸ‘, and πŸ‘.";
- let diffs = dmp.diff_main(old, new)?;
+ let diffs = dmp.diff_main::<Efficient>(old, new)?;
assert_eq!(
"<del style=\"background:#ffe6e6;\">Now, let's explore some emotional extreme</del><ins style=\"background:#e6ffe6;\">Let's start with some basic</ins><span>s </span><del style=\"background:#ffe6e6;\">🌊</del><ins style=\"background:#e6ffe6;\">😊</ins><span>.&para;<br>We've got your </span><del style=\"background:#ffe6e6;\">ec</del><span>sta</span><del style=\"background:#ffe6e6;\">tic</del><ins style=\"background:#e6ffe6;\">ndard smiley</ins><span> face </span><del style=\"background:#ffe6e6;\">🀩</del><ins style=\"background:#e6ffe6;\">πŸ™‚</ins><span>, your </span><del style=\"background:#ffe6e6;\">devastate</del><ins style=\"background:#e6ffe6;\">sa</ins><span>d face </span><del style=\"background:#ffe6e6;\">😭</del><ins style=\"background:#e6ffe6;\">☹️</ins><span>, and your </span><del style=\"background:#ffe6e6;\">utterly confused</del><ins style=\"background:#e6ffe6;\">angry</ins><span> face </span><del style=\"background:#ffe6e6;\">🀯</del><ins style=\"background:#e6ffe6;\">😠</ins><span>. But </span><del style=\"background:#ffe6e6;\">that's not all</del><ins style=\"background:#e6ffe6;\">wait, there's more</ins><span>! </span><del style=\"background:#ffe6e6;\">πŸ€”</del><ins style=\"background:#e6ffe6;\">🀩</ins><span> We've also got some </span><del style=\"background:#ffe6e6;\">subt</del><ins style=\"background:#e6ffe6;\">more comp</ins><span>le</span><ins style=\"background:#e6ffe6;\">x</ins><span> emotions like </span><del style=\"background:#ffe6e6;\">😐</del><ins style=\"background:#e6ffe6;\">😍, 🀀, and πŸš€. And let's not forget about the classics: πŸ˜‰</ins><span>, </span><del style=\"background:#ffe6e6;\">πŸ™ƒ</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>, and </span><del style=\"background:#ffe6e6;\">πŸ‘€</del><ins style=\"background:#e6ffe6;\">πŸ‘</ins><span>.</span>",
@@ -164,21 +164,24 @@ fn test_diff_main() -> Result<(), Error> {
// Perform a trivial diff.
// Null case.
- assert!(dmp.diff_main("", "")?.is_empty());
+ assert!(dmp.diff_main::<Efficient>("", "")?.is_empty());
// Equality
- assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?);
+ assert_eq!(
+ vec![Diff::equal(b"abc")],
+ dmp.diff_main::<Efficient>("abc", "abc")?
+ );
// Simple insert
assert_eq!(
vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
- dmp.diff_main("abc", "ab123c")?
+ dmp.diff_main::<Efficient>("abc", "ab123c")?
);
// Simple delete
assert_eq!(
vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
- dmp.diff_main("a123bc", "abc")?
+ dmp.diff_main::<Efficient>("a123bc", "abc")?
);
// Two insertions
@@ -190,7 +193,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::insert(b"456"),
Diff::equal(b"c"),
],
- dmp.diff_main("abc", "a123b456c")?
+ dmp.diff_main::<Efficient>("abc", "a123b456c")?
);
// Two deletions.
@@ -202,7 +205,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::delete(b"456"),
Diff::equal(b"c"),
],
- dmp.diff_main("a123b456c", "abc")?
+ dmp.diff_main::<Efficient>("a123b456c", "abc")?
);
// Perform a real diff.
@@ -211,7 +214,7 @@ fn test_diff_main() -> Result<(), Error> {
// Simple cases.
assert_eq!(
vec![Diff::delete(b"a"), Diff::insert(b"b"),],
- dmp.diff_main("a", "b")?
+ dmp.diff_main::<Efficient>("a", "b")?
);
assert_eq!(
@@ -222,7 +225,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::insert(b"lso"),
Diff::equal(b" fruit.")
],
- dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.")?
+ dmp.diff_main::<Efficient>("Apples are a fruit.", "Bananas are also fruit.")?
);
assert_eq!(
@@ -233,7 +236,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::delete(b"\t"),
Diff::insert(b"\0")
],
- dmp.diff_main("ax\t", "\u{0680}x\0")?
+ dmp.diff_main::<Efficient>("ax\t", "\u{0680}x\0")?
);
// Overlaps.
@@ -246,7 +249,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::delete(b"2"),
Diff::insert(b"xab"),
],
- dmp.diff_main("1ayb2", "abxab")?
+ dmp.diff_main::<Efficient>("1ayb2", "abxab")?
);
assert_eq!(
@@ -255,7 +258,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::equal(b"abc"),
Diff::delete(b"y"),
],
- dmp.diff_main("abcy", "xaxcxabc")?
+ dmp.diff_main::<Efficient>("abcy", "xaxcxabc")?
);
assert_eq!(
@@ -270,7 +273,7 @@ fn test_diff_main() -> Result<(), Error> {
Diff::equal(b"efghijklmnopqrs"),
Diff::delete(b"EFGHIJKLMNOefg"),
],
- dmp.diff_main(
+ dmp.diff_main::<Efficient>(
"ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
"a-bcd-efghijklmnopqrs"
)?
@@ -285,7 +288,219 @@ fn test_diff_main() -> Result<(), Error> {
Diff::equal(b" [[Hepatopancreatic]]"),
Diff::delete(b" and [[New"),
],
- dmp.diff_main(
+ dmp.diff_main::<Efficient>(
+ "a [[Hepatopancreatic]] and [[New",
+ " and [[Hepatopancreatic]]"
+ )?
+ );
+
+ // Timeout.
+ const LOW_TIMEOUT: u32 = 100;
+ dmp.set_timeout(Some(LOW_TIMEOUT));
+ let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
+ let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
+
+ let start = Utc::now().time();
+ dmp.diff_main::<Efficient>(&a, &b)?;
+ let end = Utc::now().time();
+ // Test that we took at least the timeout period (+ 5ms being generous).
+ assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
+
+ // Test the linemode speedup.
+ // Must be long to pass the 100 char cutoff.
+ // Simple line-mode.
+ dmp.set_timeout(Some(1000));
+ let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let res_no_lm = dmp.diff_main::<Efficient>(a, b)?;
+ dmp.set_checklines(true);
+ let res_yes_lm = dmp.diff_main::<Efficient>(a, b)?;
+
+ // Now, we'll run 2 checks - one for result equality
+ assert_eq!(res_no_lm, res_yes_lm);
+
+ // Single line-mode.
+ let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+ let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
+ dmp.set_checklines(false);
+ let yes_lm = dmp.diff_main::<Efficient>(a, b)?;
+ dmp.set_checklines(true);
+ let no_lm = dmp.diff_main::<Efficient>(a, b)?;
+ assert_eq!(no_lm, yes_lm);
+
+ // Overlap line-mode.
+ let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
+ dmp.set_checklines(false);
+ let no_lm = dmp.diff_main::<Efficient>(a, b)?;
+ dmp.set_checklines(true);
+ let yes_lm = dmp.diff_main::<Efficient>(a, b)?;
+ assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?);
+
+ // Benefits of checklines can only be realized in text with many lines
+ let mut dmp = DiffMatchPatch::default();
+ let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap();
+ let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
+
+ let start = Instant::now();
+ let diff_yes_lm = dmp.diff_main::<Efficient>(&old, &new);
+ let yes_lm_dur = Instant::now() - start;
+ assert!(diff_yes_lm.is_ok());
+
+ dmp.set_checklines(false);
+ let start = Instant::now();
+ let diff_no_lm = dmp.diff_main::<Efficient>(&old, &new);
+ let no_lm_dur = Instant::now() - start;
+ assert!(diff_no_lm.is_ok());
+
+ assert!(no_lm_dur > yes_lm_dur);
+
+ Ok(())
+}
+
+#[test]
+fn test_diff_main_compat() -> Result<(), Error> {
+ let mut dmp = DiffMatchPatch::default();
+
+ // Perform a trivial diff.
+ // Null case.
+ assert!(dmp.diff_main::<Compat>("", "")?.is_empty());
+
+ // Equality
+ assert_eq!(
+ vec![Diff::equal(&"abc".chars().collect::<Vec<_>>()[..])],
+ dmp.diff_main::<Compat>("abc", "abc")?
+ );
+
+ // Simple insert
+ assert_eq!(
+ vec![
+ Diff::equal(&"ab".chars().collect::<Vec<_>>()[..]),
+ Diff::insert(&"123".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['c'])
+ ],
+ dmp.diff_main::<Compat>("abc", "ab123c")?
+ );
+
+ // Simple delete
+ assert_eq!(
+ vec![
+ Diff::equal(&['a']),
+ Diff::delete(&"123".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['b', 'c'])
+ ],
+ dmp.diff_main::<Compat>("a123bc", "abc")?
+ );
+
+ // Two insertions
+ assert_eq!(
+ vec![
+ Diff::equal(&['a']),
+ Diff::insert(&"123".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['b']),
+ Diff::insert(&['4', '5', '6']),
+ Diff::equal(&['c']),
+ ],
+ dmp.diff_main::<Compat>("abc", "a123b456c")?
+ );
+
+ // Two deletions.
+ assert_eq!(
+ vec![
+ Diff::equal(&['a']),
+ Diff::delete(&"123".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['b']),
+ Diff::delete(&"456".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['c']),
+ ],
+ dmp.diff_main::<Compat>("a123b456c", "abc")?
+ );
+
+ // Perform a real diff.
+ // Switch off the timeout.
+ dmp.set_timeout(None);
+ // Simple cases.
+ assert_eq!(
+ vec![Diff::delete(&['a']), Diff::insert(&['b']),],
+ dmp.diff_main::<Compat>("a", "b")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(&"Apple".chars().collect::<Vec<_>>()[..]),
+ Diff::insert(&"Banana".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&"s are a".chars().collect::<Vec<_>>()[..]),
+ Diff::insert(&"lso".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&" fruit.".chars().collect::<Vec<_>>()[..])
+ ],
+ dmp.diff_main::<Compat>("Apples are a fruit.", "Bananas are also fruit.")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(&['a']),
+ Diff::insert(&"\u{0680}".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&['x']),
+ Diff::delete(&['\t']),
+ Diff::insert(&['\0'])
+ ],
+ dmp.diff_main::<Compat>("ax\t", "\u{0680}x\0")?
+ );
+
+ // Overlaps.
+ assert_eq!(
+ vec![
+ Diff::delete(&['1']),
+ Diff::equal(&['a']),
+ Diff::delete(&['y']),
+ Diff::equal(&['b']),
+ Diff::delete(&['2']),
+ Diff::insert(&"xab".chars().collect::<Vec<_>>()[..]),
+ ],
+ dmp.diff_main::<Compat>("1ayb2", "abxab")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::insert(&"xaxcx".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&"abc".chars().collect::<Vec<_>>()[..]),
+ Diff::delete(&['y']),
+ ],
+ dmp.diff_main::<Compat>("abcy", "xaxcxabc")?
+ );
+
+ assert_eq!(
+ vec![
+ Diff::delete(&"ABCD".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&"a".chars().collect::<Vec<_>>()[..]),
+ Diff::delete(&"=".chars().collect::<Vec<_>>()[..]),
+ Diff::insert(&"-".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&"bcd".chars().collect::<Vec<_>>()[..]),
+ Diff::delete(&"=".chars().collect::<Vec<_>>()[..]),
+ Diff::insert(&"-".chars().collect::<Vec<_>>()[..]),
+ Diff::equal(&"efghijklmnopqrs".chars().collect::<Vec<_>>()[..]),
+ Diff::delete(&"EFGHIJKLMNOefg".chars().collect::<Vec<_>>()[..]),
+ ],
+ dmp.diff_main::<Compat>(
+ "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
+ "a-bcd-efghijklmnopqrs"
+ )?
+ );
+
+ // Large equality.
+ assert_eq!(
+ vec![
+ Diff::insert(&[' ']),
+ Diff::equal(&['a']),
+ Diff::insert(&['n', 'd']),
+ Diff::equal(&[
+ ' ', '[', '[', 'H', 'e', 'p', 'a', 't', 'o', 'p', 'a', 'n', 'c', 'r', 'e', 'a',
+ 't', 'i', 'c', ']', ']'
+ ]),
+ Diff::delete(&" and [[New".chars().collect::<Vec<_>>()[..]),
+ ],
+ dmp.diff_main::<Compat>(
"a [[Hepatopancreatic]] and [[New",
" and [[Hepatopancreatic]]"
)?
@@ -298,7 +513,7 @@ fn test_diff_main() -> Result<(), Error> {
let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
let start = Utc::now().time();
- dmp.diff_main(&a, &b)?;
+ dmp.diff_main::<Efficient>(&a, &b)?;
let end = Utc::now().time();
// Test that we took at least the timeout period (+ 5ms being generous).
assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
@@ -310,9 +525,9 @@ fn test_diff_main() -> Result<(), Error> {
let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
dmp.set_checklines(false);
- let res_no_lm = dmp.diff_main(a, b)?;
+ let res_no_lm = dmp.diff_main::<Efficient>(a, b)?;
dmp.set_checklines(true);
- let res_yes_lm = dmp.diff_main(a, b)?;
+ let res_yes_lm = dmp.diff_main::<Efficient>(a, b)?;
// Now, we'll run 2 checks - one for result equality
assert_eq!(res_no_lm, res_yes_lm);
@@ -321,18 +536,18 @@ fn test_diff_main() -> Result<(), Error> {
let a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
let b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
dmp.set_checklines(false);
- let yes_lm = dmp.diff_main(a, b)?;
+ let yes_lm = dmp.diff_main::<Efficient>(a, b)?;
dmp.set_checklines(true);
- let no_lm = dmp.diff_main(a, b)?;
+ let no_lm = dmp.diff_main::<Efficient>(a, b)?;
assert_eq!(no_lm, yes_lm);
// Overlap line-mode.
let a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
let b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
dmp.set_checklines(false);
- let no_lm = dmp.diff_main(a, b)?;
+ let no_lm = dmp.diff_main::<Efficient>(a, b)?;
dmp.set_checklines(true);
- let yes_lm = dmp.diff_main(a, b)?;
+ let yes_lm = dmp.diff_main::<Efficient>(a, b)?;
assert_eq!(rebuild_text(&yes_lm[..])?, rebuild_text(&no_lm[..])?);
// Benefits of checklines can only be realized in text with many lines
@@ -341,13 +556,13 @@ fn test_diff_main() -> Result<(), Error> {
let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
let start = Instant::now();
- let diff_yes_lm = dmp.diff_main(&old, &new);
+ let diff_yes_lm = dmp.diff_main::<Efficient>(&old, &new);
let yes_lm_dur = Instant::now() - start;
assert!(diff_yes_lm.is_ok());
dmp.set_checklines(false);
let start = Instant::now();
- let diff_no_lm = dmp.diff_main(&old, &new);
+ let diff_no_lm = dmp.diff_main::<Efficient>(&old, &new);
let no_lm_dur = Instant::now() - start;
assert!(diff_no_lm.is_ok());
@@ -442,28 +657,51 @@ fn rebuild_text(diffs: &[Diff<u8>]) -> Result<(String, String), Error> {
fn test_patch_from_text() -> Result<(), Error> {
let dmp = DiffMatchPatch::new();
- assert!(dmp.patch_from_text("")?.is_empty());
+ assert!(dmp.patch_from_text::<Efficient>("")?.is_empty());
let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
- assert_eq!(strp, dmp.patch_from_text(strp)?[0].to_string());
+ assert_eq!(strp, dmp.patch_from_text::<Efficient>(strp)?[0].to_string());
assert_eq!(
"@@ -1 +1 @@\n-a\n+b\n",
- dmp.patch_from_text("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string()
+ dmp.patch_from_text::<Efficient>("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string()
);
assert_eq!(
"@@ -1,3 +0,0 @@\n-abc\n",
- dmp.patch_from_text("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string()
+ dmp.patch_from_text::<Efficient>("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string()
);
assert_eq!(
"@@ -0,0 +1,3 @@\n+abc\n",
- dmp.patch_from_text("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string()
+ dmp.patch_from_text::<Efficient>("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string()
);
// Generates error.
- assert!(dmp.patch_from_text("Bad\nPatch\n").is_err());
+ assert!(dmp.patch_from_text::<Efficient>("Bad\nPatch\n").is_err());
+
+ assert!(dmp.patch_from_text::<Compat>("")?.is_empty());
+
+ let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
+ assert_eq!(strp, dmp.patch_from_text::<Compat>(strp)?[0].to_string());
+
+ assert_eq!(
+ "@@ -1 +1 @@\n-a\n+b\n",
+ dmp.patch_from_text::<Compat>("@@ -1 +1 @@\n-a\n+b\n")?[0].to_string()
+ );
+
+ assert_eq!(
+ "@@ -1,3 +0,0 @@\n-abc\n",
+ dmp.patch_from_text::<Compat>("@@ -1,3 +0,0 @@\n-abc\n")?[0].to_string()
+ );
+
+ assert_eq!(
+ "@@ -0,0 +1,3 @@\n+abc\n",
+ dmp.patch_from_text::<Compat>("@@ -0,0 +1,3 @@\n+abc\n")?[0].to_string()
+ );
+
+ // Generates error.
+ assert!(dmp.patch_from_text::<Compat>("Bad\nPatch\n").is_err());
Ok(())
}
@@ -473,11 +711,19 @@ fn test_patch_to_text() -> Result<(), Error> {
let dmp = DiffMatchPatch::new();
let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
- let patches = dmp.patch_from_text(strp)?;
+ let patches = dmp.patch_from_text::<Efficient>(strp)?;
+ assert_eq!(strp, dmp.patch_to_text(&patches));
+
+ let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n";
+ let patches = dmp.patch_from_text::<Efficient>(strp)?;
+ assert_eq!(strp, dmp.patch_to_text(&patches));
+
+ let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
+ let patches = dmp.patch_from_text::<Compat>(strp)?;
assert_eq!(strp, dmp.patch_to_text(&patches));
let strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n";
- let patches = dmp.patch_from_text(strp)?;
+ let patches = dmp.patch_from_text::<Compat>(strp)?;
assert_eq!(strp, dmp.patch_to_text(&patches));
Ok(())
@@ -486,22 +732,75 @@ fn test_patch_to_text() -> Result<(), Error> {
#[test]
fn test_patch_make() -> Result<(), Error> {
let dmp = DiffMatchPatch::default();
- let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", ""))?;
+ assert!(patches.is_empty());
+
+ let txt1 = "The quick brown fox jumps over the lazy dog.";
+ let txt2 = "That quick brown fox jumped over a lazy dog.";
+
+ // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(txt2, txt1))?;
+ assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches));
+
+ // Text1+Text2 inputs.
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(txt1, txt2))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Diff input.
+ let diffs = dmp.diff_main::<Efficient>(txt1, txt2)?;
+ let patches = dmp.patch_make(PatchInput::Diffs(&diffs[..]))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Text1+Diff inputs.
+ let patches = dmp.patch_make(PatchInput::TextDiffs(txt1, &diffs[..]))?;
+ assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
+
+ // Character encoding.
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "`1234567890-=[]\\;',./",
+ "~!@#$%^&*()_+{}|:\"<>?",
+ ))?;
+
+ assert_eq!(
+ "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n",
+ dmp.patch_to_text(&patches)
+ );
+
+ // Character decoding.
+ let diffs = vec![
+ Diff::delete(b"`1234567890-=[]\\;',./"),
+ Diff::insert(b"~!@#$%^&*()_+{}|:\"<>?"),
+ ];
+ assert_eq!(
+ diffs,
+ dmp.patch_from_text("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")?[0].diffs()
+ );
+
+ // Long string with repeats.
+ let txt1 = vec!["abcdef"; 100].join("");
+ let txt2 = [&txt1, "123"].join("");
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(&txt1, &txt2))?;
+ assert_eq!(
+ "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n",
+ dmp.patch_to_text(&patches)
+ );
+
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", ""))?;
assert!(patches.is_empty());
let txt1 = "The quick brown fox jumps over the lazy dog.";
let txt2 = "That quick brown fox jumped over a lazy dog.";
// The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
- let patches = dmp.patch_make(PatchInput::Texts(txt2, txt1))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(txt2, txt1))?;
assert_eq!("@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n", dmp.patch_to_text(&patches));
// Text1+Text2 inputs.
- let patches = dmp.patch_make(PatchInput::Texts(txt1, txt2))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(txt1, txt2))?;
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
// Diff input.
- let diffs = dmp.diff_main(txt1, txt2)?;
+ let diffs = dmp.diff_main::<Efficient>(txt1, txt2)?;
let patches = dmp.patch_make(PatchInput::Diffs(&diffs[..]))?;
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
@@ -510,7 +809,7 @@ fn test_patch_make() -> Result<(), Error> {
assert_eq!("@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", dmp.patch_to_text(&patches));
// Character encoding.
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"`1234567890-=[]\\;',./",
"~!@#$%^&*()_+{}|:\"<>?",
))?;
@@ -533,7 +832,7 @@ fn test_patch_make() -> Result<(), Error> {
// Long string with repeats.
let txt1 = vec!["abcdef"; 100].join("");
let txt2 = [&txt1, "123"].join("");
- let patches = dmp.patch_make(PatchInput::Texts(&txt1, &txt2))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(&txt1, &txt2))?;
assert_eq!(
"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n",
dmp.patch_to_text(&patches)
@@ -568,11 +867,147 @@ fn test_diff_text() {
fn test_patch_apply() -> Result<(), Error> {
let mut dmp = DiffMatchPatch::default();
- let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", ""))?;
+ let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?;
+ assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0");
+
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ ))?;
+
+ // Exact match
+ assert_eq!(
+ (
+ "That quick brown fox jumped over a lazy dog.".to_string(),
+ vec![true, true]
+ ),
+ dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?
+ );
+
+ // Partial match
+ assert_eq!(
+ (
+ "That quick red rabbit jumped over a tired tiger.".to_string(),
+ vec![true, true]
+ ),
+ dmp.patch_apply(&patches, "The quick red rabbit jumps over the tired tiger.")?
+ );
+
+ // Failed match
+ assert_eq!(
+ (
+ "I am the very model of a modern major general.".to_string(),
+ vec![false, false]
+ ),
+ dmp.patch_apply(&patches, "I am the very model of a modern major general.")?
+ );
+
+ // Big delete, small change
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!(
+ ("xabcy".to_string(), vec![true, true]),
+ dmp.patch_apply(
+ &patches,
+ "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"
+ )?
+ );
+
+ // Big delete, large change
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!(
+ (
+ "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ .to_string(),
+ vec![false, true]
+ ),
+ dmp.patch_apply(
+ &patches,
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ )?
+ );
+
+ dmp.set_delete_threshold(0.6);
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ ))?;
+ assert_eq!(
+ ("xabcy".to_string(), vec![true, true]),
+ dmp.patch_apply(
+ &patches,
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ )?
+ );
+ dmp.set_delete_threshold(0.5);
+
+ // Compesate for failed patch
+ dmp.set_match_threshold(0.);
+ dmp.set_match_distance(0);
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "abcdefghijklmnopqrstuvwxyz--------------------1234567890",
+ "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
+ ))?;
+ assert_eq!(
+ (
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_string(),
+ vec![false, true]
+ ),
+ dmp.patch_apply(
+ &patches,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"
+ )?
+ );
+
+ dmp.set_match_threshold(0.5);
+ dmp.set_match_distance(1000);
+
+ // No side-effects - kinds useless cos patches is not mutable in rust
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?;
+ let srcstr = dmp.patch_to_text(&patches);
+ dmp.patch_apply(&patches, "")?;
+ assert_eq!(srcstr, dmp.patch_to_text(&patches));
+
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
+ "The quick brown fox jumps over the lazy dog.",
+ "Woof",
+ ))?;
+ let srcstr = dmp.patch_to_text(&patches);
+ dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?;
+ assert_eq!(srcstr, dmp.patch_to_text(&patches));
+
+ // Edge exact match
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?;
+ assert_eq!(
+ ("test".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "")?
+ );
+
+ // Near edge exact match
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("XY", "XtestY"))?;
+ assert_eq!(
+ ("XtestY".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "XY")?
+ );
+
+ // Edge partial match
+ let patches = dmp.patch_make(PatchInput::Texts::<Efficient>("y", "y123"))?;
+ assert_eq!(
+ ("x123".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "x")?
+ );
+
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", ""))?;
let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?;
assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0");
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"The quick brown fox jumps over the lazy dog.",
"That quick brown fox jumped over a lazy dog.",
))?;
@@ -605,7 +1040,7 @@ fn test_patch_apply() -> Result<(), Error> {
);
// Big delete, small change
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
@@ -618,7 +1053,7 @@ fn test_patch_apply() -> Result<(), Error> {
);
// Big delete, large change
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
@@ -635,7 +1070,7 @@ fn test_patch_apply() -> Result<(), Error> {
);
dmp.set_delete_threshold(0.6);
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
@@ -651,7 +1086,7 @@ fn test_patch_apply() -> Result<(), Error> {
// Compesate for failed patch
dmp.set_match_threshold(0.);
dmp.set_match_distance(0);
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"abcdefghijklmnopqrstuvwxyz--------------------1234567890",
"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
))?;
@@ -670,12 +1105,12 @@ fn test_patch_apply() -> Result<(), Error> {
dmp.set_match_distance(1000);
// No side-effects - kinds useless cos patches is not mutable in rust
- let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", "test"))?;
let srcstr = dmp.patch_to_text(&patches);
dmp.patch_apply(&patches, "")?;
assert_eq!(srcstr, dmp.patch_to_text(&patches));
- let patches = dmp.patch_make(PatchInput::Texts(
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>(
"The quick brown fox jumps over the lazy dog.",
"Woof",
))?;
@@ -684,21 +1119,21 @@ fn test_patch_apply() -> Result<(), Error> {
assert_eq!(srcstr, dmp.patch_to_text(&patches));
// Edge exact match
- let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("", "test"))?;
assert_eq!(
("test".to_string(), vec![true]),
dmp.patch_apply(&patches, "")?
);
// Near edge exact match
- let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("XY", "XtestY"))?;
assert_eq!(
("XtestY".to_string(), vec![true]),
dmp.patch_apply(&patches, "XY")?
);
// Edge partial match
- let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?;
+ let patches = dmp.patch_make(PatchInput::Texts::<Compat>("y", "y123"))?;
assert_eq!(
("x123".to_string(), vec![true]),
dmp.patch_apply(&patches, "x")?