my fork of dmp
Optimizing existing
Anubhab Bandyopadhyay 2024-12-29
parent 05a688a · commit 6d30525
-rw-r--r--examples/compat.rs2
-rw-r--r--examples/delta.rs2
-rw-r--r--examples/efficiency.rs3
-rw-r--r--src/dmp.rs136
4 files changed, 98 insertions, 45 deletions
diff --git a/examples/compat.rs b/examples/compat.rs
index 691e9f9..547ca21 100644
--- a/examples/compat.rs
+++ b/examples/compat.rs
@@ -2,7 +2,7 @@ use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput};
/// An example flow of the effitient mode
/// This demo will cover creating a diff of two texts and then `patching` it back to get the original text
-
+///
// This is the source text
const TXT_OLD: &str = "I am the very model of a modern Major-General,
I've information vegetable, animal, and mineral,
diff --git a/examples/delta.rs b/examples/delta.rs
index 4368625..62192ab 100644
--- a/examples/delta.rs
+++ b/examples/delta.rs
@@ -2,7 +2,7 @@ use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput};
/// An example flow of the effitient mode
/// This demo will cover creating a diff of two texts and then `patching` it back to get the original text
-
+///
// This is the source text
const TXT_OLD: &str = "I am the very model of a modern Major-General,
I've information vegetable, animal, and mineral,
diff --git a/examples/efficiency.rs b/examples/efficiency.rs
index dd26948..f88c844 100644
--- a/examples/efficiency.rs
+++ b/examples/efficiency.rs
@@ -9,7 +9,8 @@ use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput};
/// `Efficiency mode` is not compatible with other libraries or implementations of `diff-match-patch`.
/// Use `efficiency` mode ONLY if you are using this `crate` across your stack
/// If you want a standardized implementation (working across libraries), use `Compat` mode istead
-
+///
+///
// This is the source text
const TXT_OLD: &str = "I am the very model of a modern Major-General,
I've information vegetable, animal, and mineral,
diff --git a/src/dmp.rs b/src/dmp.rs
index c4be11a..be3b2cf 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -21,6 +21,8 @@ pub enum Ops {
Insert,
}
+// const CHUNK_SIZE: usize = 32;
+
/// A structure representing a diff
/// (Ops::Delete, String::new("Hello")) means delete `Hello`
/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
@@ -648,17 +650,18 @@ impl DiffMatchPatch {
new: &'a [T],
deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
- // let text1_length = old.len() as isize;
- // let text2_length = new.len() as isize;
let old_len = old.len() as isize;
let new_len = new.len() as isize;
- let max_d = (old_len + new_len + 1) / 2;
- let v_offset = max_d;
- let v_len = 2 * max_d;
+ let v_offset = (old_len + new_len + 1) >> 1;
+ let v_len = v_offset << 1;
- let mut v1 = vec![-1_isize; v_len as usize];
- let mut v2 = vec![-1_isize; v_len as usize];
+ // Document: write about this optimization
+ let mut v = vec![-1_isize; (v_len << 1) as usize];
+ let (v1, v2) = v.split_at_mut(v_len as usize);
+ // let mut v1 = &mut v[..v_len as usize];
+ // let mut v2 = &mut v[v_len as usize ..];
+ // let mut v2 = v1.clone();
v1[v_offset as usize + 1] = 0;
v2[v_offset as usize + 1] = 0;
@@ -666,7 +669,7 @@ impl DiffMatchPatch {
let delta = old_len - new_len;
// If the total number of characters is odd, then the front path will collide
// with the reverse path.
- let front = delta % 2 != 0;
+ let front = (delta & 1) != 0;
// Offsets for start and end of k loop.
// Prevents mapping of space beyond the grid.
@@ -675,7 +678,7 @@ impl DiffMatchPatch {
let mut k2start: isize = 0;
let mut k2end: isize = 0;
- for d in 0..max_d {
+ for d in 0..v_offset {
// Bail out if deadline is reached.
if let Some(tout) = deadline {
#[cfg(target_arch = "wasm32")]
@@ -689,7 +692,7 @@ impl DiffMatchPatch {
}
// Walk the front path one step.
- let mut k1 = -d + k1start;
+ let mut k1 = k1start - d;
while k1 < d + 1 - k1end {
let (x1, y1) = {
let k1_offset = v_offset + k1;
@@ -702,15 +705,20 @@ impl DiffMatchPatch {
};
let mut y1 = x1 - k1;
- while x1 < old_len && y1 < new_len {
- let i1 = if x1 < 0 { old_len + x1 } else { x1 };
- let i2 = if y1 < 0 { new_len + y1 } else { y1 };
- if old[i1 as usize] != new[i2 as usize] {
- break;
- }
- x1 += 1;
- y1 += 1;
- }
+ // while x1 < old_len && y1 < new_len {
+ // if old[x1 as usize] != new[y1 as usize] {
+ // break;
+ // }
+ // x1 += 1;
+ // y1 += 1;
+ // }
+ // Document: write about this optimization - failure
+
+ let (xi, yi) = Self::front_path_i(old, new, x1 as usize, y1 as usize);
+
+ y1 = yi as isize;
+ x1 = xi as isize;
+
v1[k1_offset as usize] = x1;
(x1, y1)
@@ -813,6 +821,39 @@ impl DiffMatchPatch {
Ok(vec![Diff::delete(old), Diff::insert(new)])
}
+ fn front_path_i<'a, T: DType>(
+ old: &'a [T],
+ new: &'a [T],
+ x1: usize,
+ y1: usize,
+ ) -> (usize, usize) {
+ let mut x1 = x1;
+ let mut y1 = y1;
+ // Try and vectorize this
+ // if x1 < old.len() && y1 < new.len() {
+ // let t = old[x1 ..].chunks_exact(CHUNK_SIZE).zip(new[y1 ..].chunks_exact(CHUNK_SIZE))
+ // .take_while(|(a, b)| a == b)
+ // .count() * CHUNK_SIZE;
+
+ // // if t > 0 {
+ // // println!("{t}");
+ // // }
+ // x1 += t;
+ // y1 += t;
+ // }
+
+ // Handle remaining
+ while x1 < old.len() && y1 < new.len() {
+ if old[x1] != new[y1] {
+ break;
+ }
+ x1 += 1;
+ y1 += 1;
+ }
+
+ (x1, y1)
+ }
+
// Does a substring of shorttext exist within longtext such that the substring
// is at least half the length of longtext?
//idx Start index of quarter length substring within longtext.
@@ -920,23 +961,18 @@ impl DiffMatchPatch {
return 0;
}
- let minlen = lhs.len().min(rhs.len());
+ // let minlen = lhs.len().min(rhs.len());
// A working set with longer string truncated
- let l = if lhs.len() > rhs.len() {
- &lhs[lhs.len() - rhs.len()..]
- } else {
- lhs
- };
- let r = if lhs.len() < rhs.len() {
- &rhs[..lhs.len()]
+ let (l, r) = if lhs.len() > rhs.len() {
+ (&lhs[lhs.len() - rhs.len()..], rhs)
} else {
- rhs
+ (lhs, &rhs[..lhs.len()])
};
// Quick check for the worst case.
if l == r {
- return minlen;
+ return l.len();
}
// Start by looking for a single character match
@@ -945,24 +981,40 @@ impl DiffMatchPatch {
let mut len = 1;
let mut best = 0;
- loop {
- let pattern = &l[minlen - len..];
- let found = if let Some(found) = r
- .windows(pattern.len())
- .step_by(1)
- .position(|p| p == pattern)
- {
- found
- } else {
- return best;
- };
-
+ // let minlen = l.len();
+
+ // loop {
+ // let pattern = &l[minlen - len..];
+ // let found = if let Some(found) = r
+ // .windows(pattern.len())
+ // .step_by(1)
+ // .position(|p| p == pattern)
+ // {
+ // found
+ // } else {
+ // return best;
+ // };
+
+ // len += found;
+ // if found == 0 || l[minlen - len..] == r[..len] {
+ // best = len;
+ // len += 1;
+ // }
+ // }
+ // let mut pattern = &l[minlen - len..];
+ while let Some(found) = r
+ .windows(len)
+ .step_by(1)
+ .position(|p| p == &l[l.len() - len..])
+ {
len += found;
- if found == 0 || l[minlen - len..] == r[..len] {
+ if found == 0 || l[l.len() - len..] == r[..len] {
best = len;
len += 1;
}
}
+
+ best
}
// Reduce the number of edits by eliminating semantically trivial equalities