my fork of dmp
Merge branch 'main' of github.com:AnubhabB/diff-match-patch-rs
Anubhab Bandyopadhyay 2024-09-02
parent cb7e07c · parent ad8b234 · commit 79b698b
-rw-r--r--CHANGELOG.md17
-rw-r--r--Cargo.toml15
-rw-r--r--README.md16
-rw-r--r--src/dmp.rs51
-rw-r--r--src/html.rs2
-rw-r--r--src/lib.rs25
-rw-r--r--src/traits.rs11
-rw-r--r--tests/compat.rs2
-rw-r--r--tests/test.rs22
9 files changed, 109 insertions, 52 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8e3723a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# CHANGELOG.md
+
+## 0.2.0
+
+Features:
+
+ - stabilizing APIs & coming out of beta
+ - removes dependency burden on `chrono` for non-wasm targets - minor performance improvements for non-wasm targets
+ - tested and added more targets
+
+Fix:
+
+ - Fixes a panic [Issue](https://github.com/AnubhabB/diff-match-patch-rs/issues/2)
+
+General:
+
+ - elaborate compatibility tests with python, go and js libs. [Here](https://github.com/AnubhabB/diff-match-patch-rs-bench) \ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 5ab495c..5b035fe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,9 @@
[package]
name = "diff-match-patch-rs"
-version = "0.1.0-beta.4"
+version = "0.2.0"
edition = "2021"
authors = ["Anubhab Bandyopadhyay"]
+homepage = "https://docs.rs/diff-match-patch-rs"
repository = "https://github.com/AnubhabB/diff-match-patch-rs.git"
description = "A high-performance port of Myer's diff algorithm to perform the operations required for synchronizing plain text."
readme = "README.md"
@@ -11,11 +12,19 @@ keywords = ["diff", "match", "patch", "text-synchronization"]
categories = ["algorithms", "text-processing", "text-editors", "wasm"]
[dependencies]
-chrono = "0"
percent-encoding = "2"
+[target_arch.'cfg(wasm32-unknown-unknown)'.dependencies]
+chrono = "0"
+
[package.metadata.docs.rs]
-targets = ["x86_64-unknown-linux-gnu", "wasm32-unknown-unknown"]
+targets = [
+ "aarch64-unknown-linux-gnu",
+ "aarch64-apple-darwin",
+ "x86_64-unknown-linux-gnu",
+ "x86_64-apple-darwin",
+ "wasm32-unknown-unknown"
+]
rustdoc-args = ["--generate-link-to-definition"]
[[example]]
diff --git a/README.md b/README.md
index e544a4c..f5fb2b3 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](http
```toml
[dependencies]
-diff-match-patch-rs = "0.1.0-beta.4"
+diff-match-patch-rs = "0.2.0"
```
### `Effitient` mode
@@ -160,19 +160,23 @@ Benchmarks are maintained [diff-match-patch-bench repository](https://github.com
| Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct |
|:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:|
| `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | ✅ |
-| `rust` | [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch)[^3] | 66.454 ms | - | Criterion | - | ❌ |
| `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | ✅ |
-| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.487 ms | 631.13 µs | Criterion | `Efficient` | ✅ |
-| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.642 ms | 1.1703 ms | Criterion | `Compat` | ✅ |
-| `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ |
+| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 µs | Criterion | `Efficient` | ✅ |
+| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | ✅ |
+| `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ |
| `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) | 246.90 ms | 1.07 ms | tinybench | - | ✅ |
| `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ |
[^1]: [go-diff](https://github.com/sergi/go-diff) seems to generate wrong diffs for emoticons. This benchmark is on the text with the emoticons removed.
[^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`.
-[^3]: The crate [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch) is still a WIP, cound't find the `patch_apply` method.
+## Gotchas
+**Diff incompatibility with `JavaScript` libs**:
+
+There are 2 kinds of implementations - one which use a `postprocessing` function for merging `unicode surrogates` which break compatibility with every other popular `diff-match-patch` implementations and the other kind (packages based on the original implementation) break while `urlEncode()` of unicode surrogates.
+As of now, this crate brakes compatibility while working with `JS` generated diffs with the surrogate patch.
+If you are interfacing with `JavaScript` in browser, using this crate through `wasm` would be ideal.
## Related projects
diff --git a/src/dmp.rs b/src/dmp.rs
index 6ffda77..fa07081 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1,7 +1,15 @@
use core::str;
use std::{char, collections::HashMap, fmt::Display};
+#[cfg(target_arch = "wasm32")]
use chrono::{NaiveTime, TimeDelta, Utc};
+#[cfg(not(target_arch = "wasm32"))]
+use std::time::{Duration, Instant};
+
+#[cfg(target_arch = "wasm32")]
+pub(crate) type Time = NaiveTime;
+#[cfg(not(target_arch = "wasm32"))]
+pub(crate) type Time = Instant;
use crate::{errors::Error, html::HtmlConfig, DType, PatchInput};
@@ -166,12 +174,19 @@ impl DiffMatchPatch {
}
/// creates a deadline from the given timeout
- pub fn deadline(&self) -> Option<NaiveTime> {
+ #[cfg(target_arch = "wasm32")]
+ pub fn deadline(&self) -> Option<Time> {
self.timeout()
.and_then(|t| Utc::now().checked_add_signed(TimeDelta::milliseconds(t)))
.map(|t| t.time())
}
+ #[cfg(not(target_arch = "wasm32"))]
+ pub fn deadline(&self) -> Option<Time> {
+ self.timeout()
+ .and_then(|t| Instant::now().checked_add(Duration::from_millis(t as u64)))
+ }
+
// returns configured match_threshold
fn match_threshold(&self) -> f32 {
self.match_threshold
@@ -228,7 +243,7 @@ impl DiffMatchPatch {
old_bytes: &'a [T],
new_bytes: &'a [T],
linemode: bool,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// First, check if lhs and rhs are equal
if old_bytes == new_bytes {
@@ -286,7 +301,7 @@ impl DiffMatchPatch {
old: &'a [T],
new: &'a [T],
linemode: bool,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
@@ -432,7 +447,7 @@ impl DiffMatchPatch {
&self,
old: &'a [T],
new: &'a [T],
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let mut diffs = {
let to_chars = Self::lines_to_chars(old, new);
@@ -512,7 +527,7 @@ impl DiffMatchPatch {
&self,
old: &'a [usize],
new: &'a [usize],
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
if old == new {
if old.is_empty() {
@@ -557,7 +572,7 @@ impl DiffMatchPatch {
&self,
old: &'a [usize],
new: &'a [usize],
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
@@ -637,7 +652,7 @@ impl DiffMatchPatch {
&self,
old: &'a [T],
new: &'a [T],
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
// let text1_length = old.len() as isize;
// let text2_length = new.len() as isize;
@@ -669,9 +684,14 @@ impl DiffMatchPatch {
for d in 0..max_d {
// Bail out if deadline is reached.
if let Some(tout) = deadline {
+ #[cfg(target_arch = "wasm32")]
if Utc::now().time() > tout {
break;
}
+ #[cfg(not(target_arch = "wasm32"))]
+ if Instant::now() > tout {
+ break;
+ }
}
// Walk the front path one step.
@@ -2350,12 +2370,12 @@ impl DiffMatchPatch {
// of the previous patch. If there are patches expected at positions 10 and
// 20, but the first patch was found at 12, delta is 2 and the second patch
// has an effective expected position of 22.
- let mut delta = 0;
+ let mut delta = 0_isize;
let mut results = vec![false; patches.len()];
// patches.iter().enumerate().for_each(|(x, p)| {
for (x, p) in patches.iter().enumerate() {
- let expected_loc = p.start2 + delta;
+ let expected_loc = (p.start2 as isize + delta) as usize;
let txt_old = Self::diff_text_old(&p.diffs);
let (start_loc, end_loc) = if txt_old.len() > self.match_max_bits() {
// patch_splitMax will only provide an oversized pattern in the case of
@@ -2385,15 +2405,16 @@ impl DiffMatchPatch {
if let Some(sl) = start_loc {
// Found a match. :)
results[x] = true;
- delta = sl - expected_loc;
+ delta = sl as isize - expected_loc as isize;
- let txt_new = if let Some(el) = end_loc {
- // safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc);
- &source[sl..el + self.match_max_bits()]
+ let trg_idx = if let Some(el) = end_loc {
+ el + self.match_max_bits()
} else {
- &source[sl..sl + txt_old.len()]
+ sl + txt_old.len()
};
+ let txt_new = &source[sl..trg_idx.min(source.len())];
+
if txt_old == txt_new {
// Perfect match, just shove the replacement text in.
source = [
@@ -2442,7 +2463,7 @@ impl DiffMatchPatch {
// No match found. :(
results[x] = false;
// Subtract the delta for this failed patch from subsequent patches.
- delta -= p.length2 - p.length1;
+ delta -= (p.length2 - p.length1) as isize;
}
}
diff --git a/src/html.rs b/src/html.rs
index 97b6c95..81d22a0 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -11,7 +11,7 @@
/// `insert_style`, `delete_style` and `equality_style` would add css style property to the output.
/// E.g. if `insert_style: Some("background: yellow; color: purple")` is set the
/// `insert` part of the pretty html would look like `<ins style="background: yellow; color: purple">insert text</ins>`
-///
+///
/// [`pretty_html`]: struct.DiffMatchPatch.html/method.diff_pretty_html
pub struct HtmlConfig<'a> {
insert_tag: &'a str,
diff --git a/src/lib.rs b/src/lib.rs
index 8d7e5af..727aea5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,13 +1,13 @@
//! # Efficient port of Google's diff-match-patch implemented in Rust
-//!
+//!
//! [<img alt="github" src="https://img.shields.io/badge/github-Anubhab/diff_match_patch_rs-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/AnubhabB/diff-match-patch-rs)
//! [<img alt="crates.io" src="https://img.shields.io/crates/v/diff-match-patch-rs" height="20">](https://crates.io/crates/diff-match-patch-rs)
//! [<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-diff_match_patch_rs?style=for-the-badge&logo=docs.rs&labelColor=%23555555" height="20">](https://docs.rs/diff-match-patch-rs)
-//!
-//!
+//!
+//!
//! A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) in Rust. The
//! diff implementation is based on [Myers' diff algorithm](https://neil.fraser.name/writing/diff/myers.pdf).
-//!
+//!
//! ## Highlights of this crate
//! - Exposes two modes of operating with `diff-match-patch`, a `Efficient` mode and `Compat` mode. While the `Efficient` mode squeezes out the max performance the `Compat` mode ensures compatibility across other libraries or implementations (rust or otherwise). According to [Benchmarks](#benchmarks), our slower `Compat` mode is still faster than other implementations in rust.
//! - **`Efficient`** mode works on `&[u8]` and the generated diffs break compatibility with other implementation. Use the **`Efficient`** mode ONLY if you are using this [crate](https://crates.io/crates/diff-match-patch-rs) at the source of diff generation and the destination.
@@ -23,7 +23,7 @@
//!
//! ```toml
//! [dependencies]
-//! diff-match-patch-rs = "0.1.0-beta.3"
+//! diff-match-patch-rs = "0.2.0"
//! ```
//!
//! ### `Effitient` mode
@@ -162,19 +162,20 @@
//! | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct |
//! |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:|
//! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | ✅ |
-//! | `rust` | [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch)[^3] | 66.454 ms | - | Criterion | - | ❌ |
//! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | ✅ |
-//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.487 ms | 631.13 µs | Criterion | `Efficient` | ✅ |
-//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.642 ms | 1.1703 ms | Criterion | `Compat` | ✅ |
-//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 µs | Criterion | `Efficient` | ✅ |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | ✅ |
+//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ |
//! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) | 246.90 ms | 1.07 ms | tinybench | - | ✅ |
//! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ |
//!
//!
-//! [^1]: [go-diff](https://github.com/sergi/go-diff) seems to generate wrong diffs for emoticons. This benchmark is on the text with the emoticons removed.
-//! [^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`.
-//! [^3]: The crate [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch) is still a WIP, cound't find the `patch_apply` method.
+//! ## Gotchas
+//! **Diff incompatibility with `JavaScript` libs**:
//!
+//! There are 2 kinds of implementations - one which use a `postprocessing` function for merging `unicode surrogates` which break compatibility with every other popular `diff-match-patch` implementations and the other kind (packages based on the original implementation) break while `urlEncode()` of unicode surrogates.
+//! As of now, this crate brakes compatibility while working with `JS` generated diffs with the surrogate patch.
+//! If you are interfacing with `JavaScript` in browser, using this crate through `wasm` would be ideal.
//!
pub mod dmp;
diff --git a/src/traits.rs b/src/traits.rs
index f02c63c..90f9078 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -1,10 +1,9 @@
use std::hash::Hash;
-use chrono::NaiveTime;
use percent_encoding::{percent_decode, AsciiSet, CONTROLS};
use crate::{
- dmp::{Diff, DiffMatchPatch},
+ dmp::{Diff, DiffMatchPatch, Time},
Ops,
};
@@ -34,7 +33,7 @@ pub trait DType: Copy + Ord + Eq + Hash {
new: &[Self],
x: usize,
y: usize,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
fn from_char(c: char) -> Self;
@@ -60,7 +59,7 @@ impl DType for u8 {
new: &[u8],
x: usize,
y: usize,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
let old_a = &old[..x];
let new_a = &new[..y];
@@ -221,7 +220,7 @@ impl DType for char {
new: &[char],
x: usize,
y: usize,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<char>>, crate::errors::Error> {
let old_a = &old[..x];
let new_a = &new[..y];
@@ -302,7 +301,7 @@ impl DType for usize {
new: &[usize],
x: usize,
y: usize,
- deadline: Option<NaiveTime>,
+ deadline: Option<Time>,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
let old_a = &old[..x];
let new_a = &new[..y];
diff --git a/tests/compat.rs b/tests/compat.rs
index 29018c2..5423913 100644
--- a/tests/compat.rs
+++ b/tests/compat.rs
@@ -1 +1 @@
-// Compatibility tests have been moved to `https://anubhabb/diff-match-path-rs-bench` repo \ No newline at end of file
+// Compatibility tests have been moved to `https://anubhabb/diff-match-path-rs-bench` repo
diff --git a/tests/test.rs b/tests/test.rs
index 286200e..2bac015 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -1,7 +1,5 @@
use std::time::Instant;
-use chrono::Utc;
-
use diff_match_patch_rs::dmp::Diff;
use diff_match_patch_rs::html::HtmlConfig;
@@ -387,11 +385,11 @@ fn test_diff_main() -> Result<(), Error> {
let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
- let start = Utc::now().time();
+ let start = Instant::now();
dmp.diff_main::<Efficient>(&a, &b)?;
- let end = Utc::now().time();
+ let end = Instant::now();
// Test that we took at least the timeout period (+ 5ms being generous).
- assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
+ assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5);
// Test the linemode speedup.
// Must be long to pass the 100 char cutoff.
@@ -599,11 +597,11 @@ fn test_diff_main_compat() -> Result<(), Error> {
let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
- let start = Utc::now().time();
+ let start = Instant::now();
dmp.diff_main::<Efficient>(&a, &b)?;
- let end = Utc::now().time();
+ let end = Instant::now();
// Test that we took at least the timeout period (+ 5ms being generous).
- assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5);
+ assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5);
// Test the linemode speedup.
// Must be long to pass the 100 char cutoff.
@@ -1298,6 +1296,14 @@ fn test_patch_apply() -> Result<(), Error> {
dmp.patch_apply(&patches, "x")?
);
+ let dmp = DiffMatchPatch::new();
+ // Tests for issue https://github.com/AnubhabB/diff-match-patch-rs/issues/2
+ let strp = "@@ -1,11 +1,5 @@\n-%F0%9F%8D%8A, a\n+A\n ah o\n@@ -3,17 +3,21 @@\n h orange\n- \n+!%F0%9F%8C%8A\n is the n\n@@ -23,10 +23,8 @@\n new \n-black!\n+%F0%9F%8C%8A\n";
+ let patches = dmp.patch_from_text::<Compat>(strp)?;
+ assert_eq!(strp, dmp.patch_to_text(&patches));
+ let (patched, _) = dmp.patch_apply(&patches, "🍊, aah orange is the new black!")?;
+ assert_eq!(patched, "Aah orange!🌊is the new 🌊");
+
Ok(())
}