my fork of dmp
Minor performance improvements and cleanups
| -rw-r--r-- | CHANGELOG.md | 12 | ||||
| -rw-r--r-- | README.md | 8 | ||||
| -rw-r--r-- | src/dmp.rs | 379 | ||||
| -rw-r--r-- | src/lib.rs | 202 |
4 files changed, 241 insertions, 360 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 341f0f9..e96c82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,19 +1,22 @@ # CHANGELOG.md -## 0.4.0 +## 0.4.1 +Minor performance & cleanup to Diff: + + - the techniques to attain performance gains in `v0.4.0` now applied across the entire diff flow. + - some cleanups and simplification +## 0.4.0 Performance: - - Non-algorithmic improvements to `Diff` implementation resulting in `~41%` improvements over previous benchmarks. Experiments: https://blog.anubhab.me/tech/optimizing-diff-match-patch/. + - Non-algorithmic improvements to `Diff` implementation resulting in `~41%` improvements over previous benchmarks. Experiments: https://blog.anubhab.me/tech/optimizing-diff-match-patch/ ## 0.3.2 - Fix: - Minor fix for [Issue](https://github.com/AnubhabB/diff-match-patch-rs/issues/7) ## 0.3.1 - Fix: - Fixing order of Ops definition [Issue](https://github.com/AnubhabB/diff-match-patch-rs/issues/5) @@ -29,7 +32,6 @@ Fix: - fixed bug in optional dependency `chrono` based on target `wasm32-unknown-unknown` ## 0.2.0 - Features: - stabilizing APIs & coming out of beta @@ -1,4 +1,4 @@ -# diff-match-patch-rs: Efficient port of Google's diff-match-patch implemented in Rust +# Efficient port of Google's diff-match-patch implemented in Rust [<img alt="github" src="https://img.shields.io/badge/github-Anubhab/diff_match_patch_rs-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/AnubhabB/diff-match-patch-rs) [<img alt="crates.io" src="https://img.shields.io/crates/v/diff-match-patch-rs" height="20">](https://crates.io/crates/diff-match-patch-rs) @@ -24,8 +24,8 @@ Benchmarks are maintained [diff-match-patch-bench repository](https://github.com |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:| | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^1] | 56.618 ms | 9.00 ms | Criterion | - | ✅ | | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 56.609 ms | 12.25 ms | Criterion | - | ✅ | -| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 32.795 ms | 533.17 µs | Criterion | `Efficient`| ✅ | -| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 33.222 ms | 993.90 µs | Criterion | `Compat` | ✅ | +| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 32.699 ms | 533.17 µs | Criterion | `Efficient`| ✅ | +| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 33.171 ms | 993.90 µs | Criterion | `Compat` | ✅ | | `go` | [go-diff](https://github.com/sergi/go-diff)[^2] | 30.31 ms | 118.2 ms | go test | - | ❗ | | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch)[^3] | 246.90 ms | 1.07 ms | tinybench | - | ❌ | | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ | @@ -39,7 +39,7 @@ Benchmarks are maintained [diff-match-patch-bench repository](https://github.com ```toml [dependencies] -diff-match-patch-rs = "0.4.0" +diff-match-patch-rs = "0.4.1" ``` ### `Effitient` mode @@ -703,6 +703,8 @@ impl DiffMatchPatch { new: &[T], deadline: Option<Time>, ) -> Result<Vec<Diff<T>>, crate::errors::Error> { + type Int = isize; + // Micro optimization: // Do all setup before casting to isize let mut v; @@ -710,7 +712,7 @@ impl DiffMatchPatch { let v_offset = (old.len() + new.len() + 1) / 2; let v_len = v_offset * 2; - v = vec![-1_isize; v_len * 2]; + v = vec![-1 as Int; v_len * 2]; let (v1, v2) = v.split_at_mut(v_len); let v_trg = v_offset + 1; if v_trg < v1.len() { @@ -721,12 +723,12 @@ impl DiffMatchPatch { } ( - v_offset as isize, - v_len as isize, + v_offset as Int, + v_len as Int, v1, v2, - old.len() as isize, - new.len() as isize, + old.len() as Int, + new.len() as Int, ) }; @@ -755,144 +757,122 @@ impl DiffMatchPatch { } } - // Walk the front path one step. - let mut k1 = k1start - edit_dist; - while k1 < edit_dist + 1 - k1end { - let (x1, y1) = { - let k1_offset = v_offset + k1; - - let v1_prev = { - let prev_idx = (k1_offset - 1) as usize; - if prev_idx < v1.len() { - v1[prev_idx] - } else { - -1 - } - }; - let v1_next = { - let next_idx = (k1_offset + 1) as usize; - if next_idx < v1.len() { - v1[next_idx] - } else { - -1 - } - }; - - let mut x1 = if k1 == -edit_dist || (k1 != edit_dist && v1_prev < v1_next) { - v1_next - } else { - v1_prev + 1 - }; - - let mut y1 = x1 - k1; - let (xi, yi) = Self::bisect_fwd_path_i(old, new, x1 as usize, y1 as usize); - - y1 = yi as isize; - x1 = xi as isize; - - if (0..v1.len() as isize).contains(&k1_offset) { - v1[k1_offset as usize] = x1; - } + (k1start, k1end) = { + let (k1s, k1e, overlap) = Self::bisect_fwd( + old, + new, + (old_len, new_len, delta, front), + (v_offset, v_len), + v1, + v2, + (k1start, k1end), + edit_dist, + ); + if let Some((x1, x2)) = overlap { + return T::bisect_split(self, old, new, x1, x2, deadline); + } - (x1, y1) - }; + (k1s, k1e) + }; - if x1 > old_len { - // Ran off the right of the graph. - k1end += 2; - } else if y1 > new_len { - // Ran off the bottom of the graph. - k1start += 2; - } else if front { - let k2_offset = (v_offset + delta - k1) as usize; - if k2_offset < v_len as usize && k2_offset < v2.len() && v2[k2_offset] != -1 { - // Mirror x2 onto top-left coordinate system. - let x2 = old_len - v2[k2_offset]; - if x1 >= x2 { - // Overlap detected. - return T::bisect_split( - self, - old, - new, - x1 as usize, - y1 as usize, - deadline, - ); - } - } + (k2start, k2end) = { + let (k1s, k1e, overlap) = Self::bisect_rev( + old, + new, + (old_len, new_len, delta, front), + (v_offset, v_len), + v1, + v2, + (k2start, k2end), + edit_dist, + ); + if let Some((x1, x2)) = overlap { + return T::bisect_split(self, old, new, x1, x2, deadline); } - k1 += 2; - } - // Walk the reverse path one step. - let mut k2 = k2start - edit_dist; - while k2 < edit_dist + 1 - k2end { - let (mut x2, y2) = { - let k2_offset = (v_offset + k2) as usize; - let prev_idx = k2_offset - 1; - let next_idx = k2_offset + 1; + (k1s, k1e) + }; + } - let v2_prev = if prev_idx < v2.len() { - v2[prev_idx] + Ok(vec![Diff::delete(old), Diff::insert(new)]) + } + + // Walk the front path one step. + #[allow(clippy::too_many_arguments)] + #[inline] + fn bisect_fwd<T: DType>( + old: &[T], + new: &[T], + (old_len, new_len, delta, front): (isize, isize, isize, bool), // length related: old_len, new_len, delta, front + (v_offset, v_len): (isize, isize), // v_offset, v_len + v1: &mut [isize], + v2: &mut [isize], + (mut k1start, mut k1end): (isize, isize), // k related: k1start, k1end + edit_dist: isize, + ) -> (isize, isize, Option<(usize, usize)>) { + let mut k1 = k1start - edit_dist; + + while k1 < edit_dist + 1 - k1end { + let (x1, y1) = { + let k1_offset = v_offset + k1; + + let v1_prev = { + let prev_idx = (k1_offset - 1) as usize; + if prev_idx < v1.len() { + v1[prev_idx] } else { -1 - }; - let v2_next = if next_idx < v2.len() { - v2[next_idx] + } + }; + let v1_next = { + let next_idx = (k1_offset + 1) as usize; + if next_idx < v1.len() { + v1[next_idx] } else { -1 - }; + } + }; - let mut x2 = if k2 == -edit_dist || (k2 != edit_dist && v2_prev < v2_next) { - v2_next - } else { - v2_prev + 1 - }; + let mut x1 = if k1 == -edit_dist || (k1 != edit_dist && v1_prev < v1_next) { + v1_next + } else { + v1_prev + 1 + }; - let mut y2 = x2 - k2; - let (xi, yi) = Self::bisect_rev_path_i(old, new, x2 as usize, y2 as usize); - y2 = yi as isize; - x2 = xi as isize; + let mut y1 = x1 - k1; + let (xi, yi) = Self::bisect_fwd_path_i(old, new, x1 as usize, y1 as usize); - if k2_offset < v2.len() { - v2[k2_offset] = x2; - } + y1 = yi as isize; + x1 = xi as isize; - (x2, y2) - }; + if (0..v1.len() as isize).contains(&k1_offset) { + v1[k1_offset as usize] = x1; + } - if x2 > old_len { - // Ran off the left of the graph. - k2end += 2; - } else if y2 > new_len { - // Ran off the top of the graph. - k2start += 2; - } else if !front { - let k1_offset = (v_offset + delta - k2) as usize; - if k1_offset < v_len as usize && k1_offset < v1.len() && v1[k1_offset] != -1 { - // if (0..v_len).contains(&k1_offset) && k1_offset < v1.len() as isize && v1[k1_offset as usize] != -1 { - let x1 = v1[k1_offset]; - let y1 = v_offset + x1 - k1_offset as isize; - // Mirror x2 onto top-left coordinate system. - x2 = old_len - x2; - if x1 >= x2 { - // Overlap detected. - return T::bisect_split( - self, - old, - new, - x1 as usize, - y1 as usize, - deadline, - ); - } + (x1, y1) + }; + + if x1 > old_len { + // Ran off the right of the graph. + k1end += 2; + } else if y1 > new_len { + // Ran off the bottom of the graph. + k1start += 2; + } else if front { + let k2_offset = (v_offset + delta - k1) as usize; + if k2_offset < v_len as usize && k2_offset < v2.len() && v2[k2_offset] != -1 { + // Mirror x2 onto top-left coordinate system. + let x2 = old_len - v2[k2_offset]; + if x1 >= x2 { + // Overlap detected. + return (k1start, k1end, Some((x1 as usize, y1 as usize))); } } - k2 += 2; } + k1 += 2; } - Ok(vec![Diff::delete(old), Diff::insert(new)]) + (k1start, k1end, None) } fn bisect_fwd_path_i<T: DType>(old: &[T], new: &[T], x1: usize, y1: usize) -> (usize, usize) { @@ -911,6 +891,89 @@ impl DiffMatchPatch { (x1, y1) } + // Walk the reverse path one step. + #[allow(clippy::too_many_arguments)] + #[inline] + fn bisect_rev<T: DType>( + old: &[T], + new: &[T], + (old_len, new_len, delta, front): (isize, isize, isize, bool), // length related: old_len, new_len, delta, front + (v_offset, v_len): (isize, isize), // v_offset, v_len + v1: &mut [isize], + v2: &mut [isize], + (mut k2start, mut k2end): (isize, isize), // k related: k1start, k1end + edit_dist: isize, + ) -> (isize, isize, Option<(usize, usize)>) { + let mut k2 = k2start - edit_dist; + while k2 < edit_dist + 1 - k2end { + let (mut x2, y2) = { + let k2_offset = (v_offset + k2) as usize; + let prev_idx = k2_offset - 1; + let next_idx = k2_offset + 1; + + let v2_prev = if prev_idx < v2.len() { + v2[prev_idx] + } else { + -1 + }; + let v2_next = if next_idx < v2.len() { + v2[next_idx] + } else { + -1 + }; + + let mut x2 = if k2 == -edit_dist || (k2 != edit_dist && v2_prev < v2_next) { + v2_next + } else { + v2_prev + 1 + }; + + let mut y2 = x2 - k2; + let (xi, yi) = Self::bisect_rev_path_i(old, new, x2 as usize, y2 as usize); + y2 = yi as isize; + x2 = xi as isize; + + if k2_offset < v2.len() { + v2[k2_offset] = x2; + } + + (x2, y2) + }; + + if x2 > old_len { + // Ran off the left of the graph. + k2end += 2; + } else if y2 > new_len { + // Ran off the top of the graph. + k2start += 2; + } else if !front { + let k1_offset = (v_offset + delta - k2) as usize; + if k1_offset < v_len as usize && k1_offset < v1.len() && v1[k1_offset] != -1 { + // if (0..v_len).contains(&k1_offset) && k1_offset < v1.len() as isize && v1[k1_offset as usize] != -1 { + let x1 = v1[k1_offset]; + let y1 = v_offset + x1 - k1_offset as isize; + // Mirror x2 onto top-left coordinate system. + x2 = old_len - x2; + if x1 >= x2 { + // Overlap detected. + return (k2start, k2end, Some((x1 as usize, y1 as usize))); + // return T::bisect_split( + // self, + // old, + // new, + // x1 as usize, + // y1 as usize, + // deadline, + // ); + } + } + } + k2 += 2; + } + + (k2start, k2end, None) + } + fn bisect_rev_path_i<T: DType>(old: &[T], new: &[T], x2: usize, y2: usize) -> (usize, usize) { let mut x2 = x2; let mut y2 = y2; @@ -1495,9 +1558,15 @@ impl DiffMatchPatch { let mut changes = false; let mut pointer = 1; - while !diffs.is_empty() && (1..diffs.len() - 1).contains(&pointer) { - let (diff_prev, diff, diff_next) = - (&diffs[pointer - 1], &diffs[pointer], &diffs[pointer + 1]); + while !diffs.is_empty() { + let p_prev = pointer - 1; + let p_next = pointer + 1; + + if p_next >= diffs.len() { + break; + } + + let (diff_prev, diff, diff_next) = (&diffs[p_prev], &diffs[pointer], &diffs[p_next]); // This is a single edit surrounded by equalities. if diff_prev.op() == Ops::Equal && diff_next.op() == Ops::Equal { @@ -1506,35 +1575,45 @@ impl DiffMatchPatch { } else { 0 }; - if &diff.data()[substr_idx..] == diff_prev.data() { - // Shift the edit over the previous equality. - let new_current_data = [ - diff_prev.data(), - &diff.data()[..diff.size() - diff_prev.size()], - ] - .concat(); + + let diff_d = if substr_idx < diff.size() { + &diff.data()[substr_idx..] + } else { + &[] + }; + + if diff_d == diff_prev.data() { + let diff_d = { + let end = diff.size() - diff_prev.size(); + if end <= diff.data().len() { + &diff.data()[..end] + } else { + &[] + } + }; + + // Shift the edit over the previous equality + let new_current_data = [diff_prev.data(), diff_d].concat(); + let new_next_data = [diff_prev.data(), diff_next.data()].concat(); diffs[pointer].1 = new_current_data; - diffs[pointer + 1].1 = new_next_data; - diffs.remove(pointer - 1); + diffs[p_next].1 = new_next_data; + diffs.remove(p_prev); changes = true; - } else if &diff.data()[..if diff_next.size() <= diff.size() { - diff_next.size() - } else { - diff.size() - }] == diff_next.data() - { + } else if &diff.data()[..diff_next.size().min(diff.size())] == diff_next.data() { // Shift the edit over the next equality. - diffs[pointer - 1].1 = - [&diffs[pointer - 1].1[..], diffs[pointer + 1].data()].concat(); - diffs[pointer].1 = [ - &diffs[pointer].data()[diffs[pointer + 1].size()..], - diffs[pointer + 1].data(), - ] - .concat(); - diffs.remove(pointer + 1); + diffs[p_prev].1 = [diffs[p_prev].data(), diffs[p_next].data()].concat(); + + let cur_data = if diffs[p_next].size() < diffs[pointer].size() { + &diffs[pointer].data()[diffs[p_next].size()..] + } else { + &[] + }; + + diffs[pointer].1 = [cur_data, diffs[p_next].data()].concat(); + diffs.remove(p_next); changes = true; } @@ -1,204 +1,4 @@ -//! # Efficient port of Google's diff-match-patch implemented in Rust -//! -//! [<img alt="github" src="https://img.shields.io/badge/github-Anubhab/diff_match_patch_rs-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/AnubhabB/diff-match-patch-rs) -//! [<img alt="crates.io" src="https://img.shields.io/crates/v/diff-match-patch-rs" height="20">](https://crates.io/crates/diff-match-patch-rs) -//! [<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-diff_match_patch_rs?style=for-the-badge&logo=docs.rs&labelColor=%23555555" height="20">](https://docs.rs/diff-match-patch-rs) -//! -//! -//! **Fastest**, **accurate** and **wasm ready** implementation of [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) in Rust based on [Myers' diff algorithm](https://neil.fraser.name/writing/diff/myers.pdf). -//! -//! ## Highlights of this crate -//! - Exposes two modes of operating with `diff-match-patch`, a `Efficient` mode and `Compat` mode. While the `Efficient` mode squeezes out the max performance the `Compat` mode ensures compatibility across other libraries or implementations (rust or otherwise). According to [Benchmarks](#benchmarks), our slower `Compat` mode is still faster than other implementations in rust. -//! - **`Efficient`** mode works on `&[u8]` and the generated diffs break compatibility with other implementation. Use the **`Efficient`** mode ONLY if you are using this [crate](https://crates.io/crates/diff-match-patch-rs) at the source of diff generation and the destination. -//! - **`Compat`** mode on the other hand works on `&[char]` and the generated `diffs` and `patches` are compatible across other implementations of `diff-match-patch`. Checkout `tests/compat.rs` for some test cases around this. -//! - `wasm` ready, you can check out a [demo here](https://github.com/AnubhabB/wasm-diff.git) -//! - **Accurate**, while working on this crate I've realized that there are a bunch of implementations that have major issues (wrong diffs, inaccurate flows, silent errors etc.). -//! - Helper method **pretty_html** provided by this crate allows some configurations to control the generated visuals elements. -//! - Well tested -//! - Added a `fuzzer` for sanity -//! - Exposes the same APIs as [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) with minor changes to make it more idiomatic in Rust. -//! -//! ## Benchmarks -//! Benchmarks are maintained [diff-match-patch-bench repository](https://github.com/AnubhabB/diff-match-patch-rs-bench) -//! -//! | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct | -//! |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:| -//! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^1] | 56.618 ms | 9.00 ms | Criterion | - | ✅ | -//! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 56.609 ms | 12.25 ms | Criterion | - | ✅ | -//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 32.795 ms | 533.17 µs | Criterion | `Efficient`| ✅ | -//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 33.222 ms | 993.90 µs | Criterion | `Compat` | ✅ | -//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^2] | 30.31 ms | 118.2 ms | go test | - | ❗ | -//! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch)[^3] | 246.90 ms | 1.07 ms | tinybench | - | ❌ | -//! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ | -//! -//! [^1]: [diff_match_patch](https://crates.io/crates/diff_match_patch) Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`. -//! [^2]: [go-diff](https://github.com/sergi/go-diff) is technically a correct implementation but it generates far more diffs than any other implementation I've tested. E.g. In our test data [here](https://github.com/AnubhabB/diff-match-patch-rs-bench/testdata) the go lib ends up generating `~3000` diffs while other implementations are generating `~2300` diffs. My guess is one of the cleanup passes are skipped by `go-diff`.That's probably the reason behind unreasonably high `patch` timings. -//! [^3]: [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) generated `patch text` and `delta` breaks on `unicode surrogates`. -//! -//! ## Usage Examples -//! -//! ```toml -//! [dependencies] -//! diff-match-patch-rs = "0.4.0" -//! ``` -//! -//! ### `Effitient` mode -//! -//! ```rust -//! use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput}; -//! -//! // This is the source text -//! const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, 🚀👏👀"; -//! -//! // Let's assume this to be the text that was editted from the source text -//! const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.😊👀"; -//! -//! // An example of a function that creates a diff and returns a set of patches serialized -//! fn at_source() -> Result<String, Error> { -//! // initializing the module -//! let dmp = DiffMatchPatch::new(); -//! // create a list of diffs -//! let diffs = dmp.diff_main::<Efficient>(TXT_OLD, TXT_NEW)?; -//! // Now, we are going to create a list of `patches` to be applied to the old text to get the new text -//! let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?; -//! // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese -//! let patch_txt = dmp.patch_to_text(&patches); -//! -//! Ok(patch_txt) -//! } -//! -//! fn at_destination(patches: &str) -> Result<(), Error> { -//! // initializing the module -//! let dmp = DiffMatchPatch::new(); -//! // lets recreate the diffs from patches -//! let patches = dmp.patch_from_text::<Efficient>(patches)?; -//! // Now, lets apply these patches to the `old_txt` which is the original to get the new text -//! let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?; -//! // Lets print out if the ops succeeded or not -//! ops.iter() -//! .for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" })); -//! -//! // If everything goes as per plan you should see -//! // OK -//! // OK -//! // ... and so on -//! -//! // lets check out if our `NEW_TXT` (presumably the edited one) -//! if new_txt != TXT_NEW { -//! return Err(Error::InvalidInput); -//! } -//! -//! println!("Wallah! Patch applied successfully!"); -//! -//! Ok(()) -//! } -//! -//! fn main() -> Result<(), Error> { -//! // At the source of diff where the old text is being edited we'll create a set of patches -//! let patches = at_source()?; -//! // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied -//! // The destination will receive the patch string and will apply the patches to recreate the edits -//! at_destination(&patches) -//! } -//! -//! ``` -//! -//! ### `Compat` mode -//! -//! ```rust -//! use diff_match_patch_rs::{DiffMatchPatch, Compat, Error, PatchInput}; -//! -//! // This is the source text -//! const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, 🚀👏👀"; -//! -//! // Let's assume this to be the text that was editted from the source text -//! const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.😊👀"; -//! -//! // An example of a function that creates a diff and returns a set of patches serialized -//! fn at_source() -> Result<String, Error> { -//! // initializing the module -//! let dmp = DiffMatchPatch::new(); -//! // create a list of diffs -//! let diffs = dmp.diff_main::<Compat>(TXT_OLD, TXT_NEW)?; -//! // Now, we are going to create a list of `patches` to be applied to the old text to get the new text -//! let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?; -//! // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese -//! let patch_txt = dmp.patch_to_text(&patches); - -//! Ok(patch_txt) -//! } -//! -//! fn at_destination(patches: &str) -> Result<(), Error> { -//! // initializing the module -//! let dmp = DiffMatchPatch::new(); -//! // lets recreate the diffs from patches -//! let patches = dmp.patch_from_text::<Compat>(patches)?; -//! // Now, lets apply these patches to the `old_txt` which is the original to get the new text -//! let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?; -//! // Lets print out if the ops succeeded or not -//! ops.iter() -//! .for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" })); -//! -//! // If everything goes as per plan you should see -//! // OK -//! // OK -//! // ... and so on -//! -//! // lets check out if our `NEW_TXT` (presumably the edited one) -//! if new_txt != TXT_NEW { -//! return Err(Error::InvalidInput); -//! } -//! -//! println!("Wallah! Patch applied successfully!"); -//! -//! Ok(()) -//! } -//! -//! fn main() -> Result<(), Error> { -//! // At the source of diff where the old text is being edited we'll create a set of patches -//! let patches = at_source()?; -//! // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied -//! // The destination will receive the patch string and will apply the patches to recreate the edits -//! at_destination(&patches) -//! } -//! ``` -//! ### `Match` - fuzzy match of pattern in Text -//! -//! ```rust -//! use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput}; -//! // This is the source text -//! const TXT: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, 🚀👏👀"; -//! -//! // The patter we are trying to fing -//! const PATTERN: &str = " that berry "; -//! -//! // Returns `location` of match if found, `None` if not found -//! fn main() { -//! let dmp = DiffMatchPatch::new(); -//! -//! // works with both `Efficient` and `Compat` modes -//! // `5` here is an approx location to find `nearby` matches -//! let res = dmp.match_main::<Efficient>(TXT, PATTERN, 5); -//! println!("{:?}", res); // Should print `Some(4)` -//! } -//! ``` -//! -//! #### Note -//! The `Efficient` and `Compat` mode APIs are identical with the only chage being the `generic` parameter declared during the calls. -//! -//! E.g. we initiated a `diff` in the `Efficient` mode with `dmp.diff_main::<Efficient>( ... )` while for `Compat` mode we did `dmp.diff_main::<Compat>( ... )`. -//! -//! Please checkout the `examples` directory of the [source repo](https://github.com/AnubhabB/diff-match-patch-rs/tree/main/examples) for a few common use-cases. -//! -//! <div class="warning">The `Effitient` and `Compat` modes are mutually exclusive and will not generate correct output if used interchangibly at source and destination</div> -//! -//! ## Gotchas -//! **Diff incompatibility with `JavaScript` libs**: -//! -//! There are 2 kinds of implementations - one which use a `postprocessing` function for merging `unicode surrogates` which break compatibility with every other popular `diff-match-patch` implementations and the other kind (packages based on the original implementation) break while `urlEncode()` of unicode surrogates. -//! As of now, this crate brakes compatibility while working with `JS` generated diffs with the surrogate patch. -//! If you are interfacing with `JavaScript` in browser, using this crate through `wasm` would be ideal. -//! +#![doc = include_str!("../README.md")] pub mod dmp; pub mod errors; |