my fork of dmp
-rw-r--r--Cargo.toml7
-rw-r--r--README.md81
-rw-r--r--benches/Bench.xlsxbin14975 -> 0 bytes
-rw-r--r--benches/diff.rs19
-rw-r--r--src/dmp.rs80
-rw-r--r--src/lib.rs171
6 files changed, 284 insertions, 74 deletions
diff --git a/Cargo.toml b/Cargo.toml
index e8b9e68..559f720 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,13 +18,6 @@ percent-encoding = "2"
targets = ["x86_64-unknown-linux-gnu", "wasm32-unknown-unknown"]
rustdoc-args = ["--generate-link-to-definition"]
-[dev-dependencies]
-criterion = "0"
-
-[[bench]]
-name = "diff"
-harness = false
-
[[example]]
name = "efficiency"
diff --git a/README.md b/README.md
index b292de0..5d77576 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ diff implementation is based on [Myers' diff algorithm](https://neil.fraser.name
- Helper method **pretty_html** provided by this crate is a bit more advanced and allows some configurations to control the generated visuals elements.
- Well tested
- Added a `fuzzer` for sanity
+- Exposes the same APIs as [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) with minor changes to make it more idiomatic in Rust.
## Usage Examples
@@ -22,17 +23,20 @@ diff implementation is based on [Myers' diff algorithm](https://neil.fraser.name
```rust
use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput};
+// This is the source text
+const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€"
+
+// Let's assume this to be the text that was editted from the source text
+const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.๐Ÿ˜Š๐Ÿ‘€"
+
// An example of a function that creates a diff and returns a set of patches serialized
fn at_source() -> Result<String, Error> {
// initializing the module
let dmp = DiffMatchPatch::new();
-
// create a list of diffs
let diffs = dmp.diff_main::<Efficient>(TXT_OLD, TXT_NEW)?;
-
// Now, we are going to create a list of `patches` to be applied to the old text to get the new text
let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
-
// in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese
let patch_txt = dmp.patch_to_text(&patches);
@@ -42,13 +46,10 @@ fn at_source() -> Result<String, Error> {
fn at_destination(patches: &str) -> Result<(), Error> {
// initializing the module
let dmp = DiffMatchPatch::new();
-
// lets recreate the diffs from patches
let patches = dmp.patch_from_text::<Efficient>(patches)?;
-
// Now, lets apply these patches to the `old_txt` which is the original to get the new text
let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?;
-
// Lets print out if the ops succeeded or not
ops.iter()
.for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" }));
@@ -71,15 +72,79 @@ fn at_destination(patches: &str) -> Result<(), Error> {
fn main() -> Result<(), Error> {
// At the source of diff where the old text is being edited we'll create a set of patches
let patches = at_source()?;
-
// We'll send this diff to some destination e.g. db or the client where these changes are going to be applied
-
// The destination will receive the patch string and will apply the patches to recreate the edits
at_destination(&patches)
}
```
+### `Compat` mode
+
+```rust
+use diff_match_patch_rs::{DiffMatchPatch, Compat, Error, PatchInput};
+
+// This is the source text
+const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€"
+
+// Let's assume this to be the text that was editted from the source text
+const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.๐Ÿ˜Š๐Ÿ‘€"
+
+// An example of a function that creates a diff and returns a set of patches serialized
+fn at_source() -> Result<String, Error> {
+ // initializing the module
+ let dmp = DiffMatchPatch::new();
+ // create a list of diffs
+ let diffs = dmp.diff_main::<Compat>(TXT_OLD, TXT_NEW)?;
+ // Now, we are going to create a list of `patches` to be applied to the old text to get the new text
+ let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
+ // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese
+ let patch_txt = dmp.patch_to_text(&patches);
+
+ Ok(patch_txt)
+}
+
+fn at_destination(patches: &str) -> Result<(), Error> {
+ // initializing the module
+ let dmp = DiffMatchPatch::new();
+ // lets recreate the diffs from patches
+ let patches = dmp.patch_from_text::<Compat>(patches)?;
+ // Now, lets apply these patches to the `old_txt` which is the original to get the new text
+ let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?;
+ // Lets print out if the ops succeeded or not
+ ops.iter()
+ .for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" }));
+
+ // If everything goes as per plan you should see
+ // OK
+ // OK
+ // ... and so on
+
+ // lets check out if our `NEW_TXT` (presumably the edited one)
+ if new_txt != TXT_NEW {
+ return Err(Error::InvalidInput);
+ }
+
+ println!("Wallah! Patch applied successfully!");
+
+ Ok(())
+}
+
+fn main() -> Result<(), Error> {
+ // At the source of diff where the old text is being edited we'll create a set of patches
+ let patches = at_source()?;
+ // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied
+ // The destination will receive the patch string and will apply the patches to recreate the edits
+ at_destination(&patches)
+}
+```
+
+#### Note
+The `Efficient` and `Compat` mode APIs are identical with the only chage being the `generic` parameter declared during the calls.
+
+E.g. we initiated a `diff` in the `Efficient` mode with `dmp.diff_main::<Efficient>( ... )` while for `Compat` mode we did `dmp.diff_main::<Compat>( ... )`.
+
+<div class="warning">The `Effitient` and `Compat` modes are mutually exclusive and will not generate correct output if used interchangibly at source and destination</div>
## Benchmarks
Benchmarks are maintained [diff-match-patch-bench repository](https://github.com/AnubhabB/diff-match-patch-rs-bench)
diff --git a/benches/Bench.xlsx b/benches/Bench.xlsx
deleted file mode 100644
index e0ec0b6..0000000
--- a/benches/Bench.xlsx
+++ /dev/null
Binary files differ
diff --git a/benches/diff.rs b/benches/diff.rs
deleted file mode 100644
index f426c3c..0000000
--- a/benches/diff.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-use std::path::Path;
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use diff_match_patch_rs::{dmp::DiffMatchPatch, Efficient};
-
-fn diff_main(c: &mut Criterion) {
- let basedir = Path::new("testdata");
- let old = std::fs::read_to_string(basedir.join("txt_old.txt")).unwrap();
- let new = std::fs::read_to_string(basedir.join("txt_new.txt")).unwrap();
-
- let dmp = DiffMatchPatch::default();
-
- c.bench_function("diff-match-patch", |bencher| {
- bencher.iter(|| dmp.diff_main::<Efficient>(&old, &new).unwrap());
- });
-}
-
-criterion_group!(diff, diff_main);
-criterion_main!(diff);
diff --git a/src/dmp.rs b/src/dmp.rs
index c677a36..6ffda77 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1187,10 +1187,12 @@ impl DiffMatchPatch {
// Scores range from 6 (best) to 0 (worst)
#[inline]
fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 {
- let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first())
- && let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char())
- {
- (c1, c2)
+ let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
+ if let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char()) {
+ (c1, c2)
+ } else {
+ return 6;
+ }
} else {
return 6;
};
@@ -1337,10 +1339,10 @@ impl DiffMatchPatch {
}
}
- if let Some(dl) = diffs.last()
- && dl.data().is_empty()
- {
- diffs.pop();
+ if let Some(dl) = diffs.last() {
+ if dl.data().is_empty() {
+ diffs.pop();
+ }
}
difflen = diffs.len();
@@ -1466,45 +1468,45 @@ impl DiffMatchPatch {
// <ins>A</del>X<ins>C</ins><del>D</del>
// <ins>A</ins><del>B</del>X<del>C</del>
- if let Some(le) = &mut last_eq
- && ((pre_ins && pre_del && post_ins && post_del)
+ if let Some(le) = &mut last_eq {
+ if (pre_ins && pre_del && post_ins && post_del)
|| (le.len() < edit_cost / 2
- && pre_ins as u8 + pre_del as u8 + post_ins as u8 + post_del as u8
- == 3))
- {
- // Duplicate record.
- let item = equalities.pop().unwrap();
- // change the second copy (after the insert in next line) to Insert
- diffs[item].0 = Ops::Insert;
- // add an item
- diffs.insert(item, Diff::delete(le));
-
- last_eq = None;
+ && pre_ins as u8 + pre_del as u8 + post_ins as u8 + post_del as u8 == 3)
+ {
+ // Duplicate record.
+ let item = equalities.pop().unwrap();
+ // change the second copy (after the insert in next line) to Insert
+ diffs[item].0 = Ops::Insert;
+ // add an item
+ diffs.insert(item, Diff::delete(le));
- if pre_ins && pre_del {
- // No changes made which could affect previous entry, keep going.
- post_ins = true;
- post_del = true;
+ last_eq = None;
- equalities = vec![];
- } else {
- equalities.pop();
+ if pre_ins && pre_del {
+ // No changes made which could affect previous entry, keep going.
+ post_ins = true;
+ post_del = true;
- if let Some(&l) = equalities.last() {
- pointer = l;
+ equalities = vec![];
} else {
- pointer = 0;
+ equalities.pop();
+
+ if let Some(&l) = equalities.last() {
+ pointer = l;
+ } else {
+ pointer = 0;
+ post_ins = false;
+ post_del = false;
+ changes = true;
+ continue;
+ };
+ // pointer = equalitiesLength > 0 ?
+ // equalities[equalitiesLength - 1] : -1;
post_ins = false;
post_del = false;
- changes = true;
- continue;
- };
- // pointer = equalitiesLength > 0 ?
- // equalities[equalitiesLength - 1] : -1;
- post_ins = false;
- post_del = false;
+ }
+ changes = true;
}
- changes = true;
}
}
pointer += 1;
diff --git a/src/lib.rs b/src/lib.rs
index e54bfa7..4ac630c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,173 @@
-#![feature(trait_alias, let_chains)]
+// #![feature(trait_alias, let_chains)]
+
+//! # diff-match-patch-rs: Efficient port of Google's diff-match-patch implemented in Rust
+
+//! A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) in Rust. The
+//! diff implementation is based on [Myers' diff algorithm](https://neil.fraser.name/writing/diff/myers.pdf).
+
+//! ## Highlights of this crate
+//! - Exposes two modes of operating with `diff-match-patch`, a `Efficient` mode and `Compat` mode. While the `Efficient` mode squeezes out the max performance the `Compat` mode ensures compatibility across other libraries or implementations (rust or otherwise). According to [Benchmarks](#benchmarks), our slower `Compat` mode is still faster than other implementations in rust.
+//! - **`Efficient`** mode works on `&[u8]` and the generated diffs break compatibility with other implementation. Use the **`Efficient`** mode ONLY if you are using this [crate](https://crates.io/crates/diff-match-patch-rs) at the source of diff generation and the destination.
+//! - **`Compat`** mode on the other hand works on `&[char]` and the generated `diffs` and `patches` are compatible across other implementations of `diff-match-patch`. Checkout `tests/compat.rs` for some test cases around this.
+//! - `wasm` ready, you can check out a [demo here](https://github.com/AnubhabB/wasm-diff.git)
+//! - **Accurate**, while working on this crate I've realized that there are a bunch of implementations that have major issues (wrong diffs, inaccurate flows, silent errors etc.).
+//! - Helper method **pretty_html** provided by this crate allows some configurations to control the generated visuals elements.
+//! - Well tested
+//! - Added a `fuzzer` for sanity
+//! - Exposes the same APIs as [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) with minor changes to make it more idiomatic in Rust.
+//!
+//! ## Usage Examples
+//!
+//! ### `Effitient` mode
+//!
+//! ```rust
+//! use diff_match_patch_rs::{DiffMatchPatch, Efficient, Error, PatchInput};
+//!
+//! // This is the source text
+//! const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€";
+//!
+//! // Let's assume this to be the text that was editted from the source text
+//! const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.๐Ÿ˜Š๐Ÿ‘€";
+//!
+//! // An example of a function that creates a diff and returns a set of patches serialized
+//! fn at_source() -> Result<String, Error> {
+//! // initializing the module
+//! let dmp = DiffMatchPatch::new();
+//! // create a list of diffs
+//! let diffs = dmp.diff_main::<Efficient>(TXT_OLD, TXT_NEW)?;
+//! // Now, we are going to create a list of `patches` to be applied to the old text to get the new text
+//! let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
+//! // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese
+//! let patch_txt = dmp.patch_to_text(&patches);
+//!
+//! Ok(patch_txt)
+//! }
+//!
+//! fn at_destination(patches: &str) -> Result<(), Error> {
+//! // initializing the module
+//! let dmp = DiffMatchPatch::new();
+//! // lets recreate the diffs from patches
+//! let patches = dmp.patch_from_text::<Efficient>(patches)?;
+//! // Now, lets apply these patches to the `old_txt` which is the original to get the new text
+//! let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?;
+//! // Lets print out if the ops succeeded or not
+//! ops.iter()
+//! .for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" }));
+//!
+//! // If everything goes as per plan you should see
+//! // OK
+//! // OK
+//! // ... and so on
+//!
+//! // lets check out if our `NEW_TXT` (presumably the edited one)
+//! if new_txt != TXT_NEW {
+//! return Err(Error::InvalidInput);
+//! }
+//!
+//! println!("Wallah! Patch applied successfully!");
+//!
+//! Ok(())
+//! }
+//!
+//! fn main() -> Result<(), Error> {
+//! // At the source of diff where the old text is being edited we'll create a set of patches
+//! let patches = at_source()?;
+//! // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied
+//! // The destination will receive the patch string and will apply the patches to recreate the edits
+//! at_destination(&patches)
+//! }
+//!
+//! ```
+//!
+//! ### `Compat` mode
+//!
+//! ```rust
+//! use diff_match_patch_rs::{DiffMatchPatch, Compat, Error, PatchInput};
+//!
+//! // This is the source text
+//! const TXT_OLD: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€";
+//!
+//! // Let's assume this to be the text that was editted from the source text
+//! const TXT_NEW: &str = "I am the very model of a cartoon individual, My animation's comical, unusual, and whimsical.๐Ÿ˜Š๐Ÿ‘€";
+//!
+//! // An example of a function that creates a diff and returns a set of patches serialized
+//! fn at_source() -> Result<String, Error> {
+//! // initializing the module
+//! let dmp = DiffMatchPatch::new();
+//! // create a list of diffs
+//! let diffs = dmp.diff_main::<Compat>(TXT_OLD, TXT_NEW)?;
+//! // Now, we are going to create a list of `patches` to be applied to the old text to get the new text
+//! let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
+//! // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese
+//! let patch_txt = dmp.patch_to_text(&patches);
+
+//! Ok(patch_txt)
+//! }
+//!
+//! fn at_destination(patches: &str) -> Result<(), Error> {
+//! // initializing the module
+//! let dmp = DiffMatchPatch::new();
+//! // lets recreate the diffs from patches
+//! let patches = dmp.patch_from_text::<Compat>(patches)?;
+//! // Now, lets apply these patches to the `old_txt` which is the original to get the new text
+//! let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?;
+//! // Lets print out if the ops succeeded or not
+//! ops.iter()
+//! .for_each(|&o| println!("{}", if o { "OK" } else { "FAIL" }));
+//!
+//! // If everything goes as per plan you should see
+//! // OK
+//! // OK
+//! // ... and so on
+//!
+//! // lets check out if our `NEW_TXT` (presumably the edited one)
+//! if new_txt != TXT_NEW {
+//! return Err(Error::InvalidInput);
+//! }
+//!
+//! println!("Wallah! Patch applied successfully!");
+//!
+//! Ok(())
+//! }
+//!
+//! fn main() -> Result<(), Error> {
+//! // At the source of diff where the old text is being edited we'll create a set of patches
+//! let patches = at_source()?;
+//! // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied
+//! // The destination will receive the patch string and will apply the patches to recreate the edits
+//! at_destination(&patches)
+//! }
+//! ```
+//!
+//! #### Note
+//! The `Efficient` and `Compat` mode APIs are identical with the only chage being the `generic` parameter declared during the calls.
+//!
+//! E.g. we initiated a `diff` in the `Efficient` mode with `dmp.diff_main::<Efficient>( ... )` while for `Compat` mode we did `dmp.diff_main::<Compat>( ... )`.
+//!
+//! Please checkout the `examples` directory of the [source repo](https://github.com/AnubhabB/diff-match-patch-rs/tree/main/examples) for a few common use-cases.
+//!
+//! <div class="warning">The `Effitient` and `Compat` modes are mutually exclusive and will not generate correct output if used interchangibly at source and destination</div>
+//!
+//! ## Benchmarks
+//! Benchmarks are maintained [diff-match-patch-bench repository](https://github.com/AnubhabB/diff-match-patch-rs-bench)
+//!
+//! | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct |
+//! |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:|
+//! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | โœ… |
+//! | `rust` | [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch)[^3] | 66.454 ms | - | Criterion | - | โŒ |
+//! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | โœ… |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.487 ms | 631.13 ยตs | Criterion | `Efficient` | โœ… |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.642 ms | 1.1703 ms | Criterion | `Compat` | โœ… |
+//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | โŒ |
+//! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) | 246.90 ms | 1.07 ms | tinybench | - | โœ… |
+//! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | โœ… |
+//!
+//!
+//! [^1]: [go-diff](https://github.com/sergi/go-diff) seems to generate wrong diffs for emoticons. This benchmark is on the text with the emoticons removed.
+//! [^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`.
+//! [^3]: The crate [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch) is still a WIP, cound't find the `patch_apply` method.
+//!
+//!
pub mod dmp;
pub mod errors;