my fork of dmp
Merge pull request #3 from AnubhabB/debug
Fixes and documentation
| -rw-r--r-- | CHANGELOG.md | 17 | ||||
| -rw-r--r-- | Cargo.toml | 15 | ||||
| -rw-r--r-- | README.md | 16 | ||||
| -rw-r--r-- | src/dmp.rs | 51 | ||||
| -rw-r--r-- | src/html.rs | 2 | ||||
| -rw-r--r-- | src/lib.rs | 25 | ||||
| -rw-r--r-- | src/traits.rs | 11 | ||||
| -rw-r--r-- | tests/compat.rs | 2 | ||||
| -rw-r--r-- | tests/test.rs | 22 |
9 files changed, 109 insertions, 52 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8e3723a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# CHANGELOG.md + +## 0.2.0 + +Features: + + - stabilizing APIs & coming out of beta + - removes dependency burden on `chrono` for non-wasm targets - minor performance improvements for non-wasm targets + - tested and added more targets + +Fix: + + - Fixes a panic [Issue](https://github.com/AnubhabB/diff-match-patch-rs/issues/2) + +General: + + - elaborate compatibility tests with python, go and js libs. [Here](https://github.com/AnubhabB/diff-match-patch-rs-bench)
\ No newline at end of file @@ -1,8 +1,9 @@ [package] name = "diff-match-patch-rs" -version = "0.1.0-beta.4" +version = "0.2.0" edition = "2021" authors = ["Anubhab Bandyopadhyay"] +homepage = "https://docs.rs/diff-match-patch-rs" repository = "https://github.com/AnubhabB/diff-match-patch-rs.git" description = "A high-performance port of Myer's diff algorithm to perform the operations required for synchronizing plain text." readme = "README.md" @@ -11,11 +12,19 @@ keywords = ["diff", "match", "patch", "text-synchronization"] categories = ["algorithms", "text-processing", "text-editors", "wasm"] [dependencies] -chrono = "0" percent-encoding = "2" +[target_arch.'cfg(wasm32-unknown-unknown)'.dependencies] +chrono = "0" + [package.metadata.docs.rs] -targets = ["x86_64-unknown-linux-gnu", "wasm32-unknown-unknown"] +targets = [ + "aarch64-unknown-linux-gnu", + "aarch64-apple-darwin", + "x86_64-unknown-linux-gnu", + "x86_64-apple-darwin", + "wasm32-unknown-unknown" +] rustdoc-args = ["--generate-link-to-definition"] [[example]] @@ -21,7 +21,7 @@ A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](http ```toml [dependencies] -diff-match-patch-rs = "0.1.0-beta.4" +diff-match-patch-rs = "0.2.0" ``` ### `Effitient` mode @@ -160,19 +160,23 @@ Benchmarks are maintained [diff-match-patch-bench repository](https://github.com | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct | |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:| | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | ✅ | -| `rust` | [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch)[^3] | 66.454 ms | - | Criterion | - | ❌ | | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | ✅ | -| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.487 ms | 631.13 µs | Criterion | `Efficient` | ✅ | -| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.642 ms | 1.1703 ms | Criterion | `Compat` | ✅ | -| `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ | +| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 µs | Criterion | `Efficient` | ✅ | +| `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | ✅ | +| `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ | | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) | 246.90 ms | 1.07 ms | tinybench | - | ✅ | | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ | [^1]: [go-diff](https://github.com/sergi/go-diff) seems to generate wrong diffs for emoticons. This benchmark is on the text with the emoticons removed. [^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`. -[^3]: The crate [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch) is still a WIP, cound't find the `patch_apply` method. +## Gotchas +**Diff incompatibility with `JavaScript` libs**: + +There are 2 kinds of implementations - one which use a `postprocessing` function for merging `unicode surrogates` which break compatibility with every other popular `diff-match-patch` implementations and the other kind (packages based on the original implementation) break while `urlEncode()` of unicode surrogates. +As of now, this crate brakes compatibility while working with `JS` generated diffs with the surrogate patch. +If you are interfacing with `JavaScript` in browser, using this crate through `wasm` would be ideal. ## Related projects @@ -1,7 +1,15 @@ use core::str; use std::{char, collections::HashMap, fmt::Display}; +#[cfg(target_arch = "wasm32")] use chrono::{NaiveTime, TimeDelta, Utc}; +#[cfg(not(target_arch = "wasm32"))] +use std::time::{Duration, Instant}; + +#[cfg(target_arch = "wasm32")] +pub(crate) type Time = NaiveTime; +#[cfg(not(target_arch = "wasm32"))] +pub(crate) type Time = Instant; use crate::{errors::Error, html::HtmlConfig, DType, PatchInput}; @@ -166,12 +174,19 @@ impl DiffMatchPatch { } /// creates a deadline from the given timeout - pub fn deadline(&self) -> Option<NaiveTime> { + #[cfg(target_arch = "wasm32")] + pub fn deadline(&self) -> Option<Time> { self.timeout() .and_then(|t| Utc::now().checked_add_signed(TimeDelta::milliseconds(t))) .map(|t| t.time()) } + #[cfg(not(target_arch = "wasm32"))] + pub fn deadline(&self) -> Option<Time> { + self.timeout() + .and_then(|t| Instant::now().checked_add(Duration::from_millis(t as u64))) + } + // returns configured match_threshold fn match_threshold(&self) -> f32 { self.match_threshold @@ -228,7 +243,7 @@ impl DiffMatchPatch { old_bytes: &'a [T], new_bytes: &'a [T], linemode: bool, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // First, check if lhs and rhs are equal if old_bytes == new_bytes { @@ -286,7 +301,7 @@ impl DiffMatchPatch { old: &'a [T], new: &'a [T], linemode: bool, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // returning all of the new part if old.is_empty() { @@ -432,7 +447,7 @@ impl DiffMatchPatch { &self, old: &'a [T], new: &'a [T], - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<T>>, crate::errors::Error> { let mut diffs = { let to_chars = Self::lines_to_chars(old, new); @@ -512,7 +527,7 @@ impl DiffMatchPatch { &self, old: &'a [usize], new: &'a [usize], - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<usize>>, crate::errors::Error> { if old == new { if old.is_empty() { @@ -557,7 +572,7 @@ impl DiffMatchPatch { &self, old: &'a [usize], new: &'a [usize], - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<usize>>, crate::errors::Error> { // returning all of the new part if old.is_empty() { @@ -637,7 +652,7 @@ impl DiffMatchPatch { &self, old: &'a [T], new: &'a [T], - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<T>>, crate::errors::Error> { // let text1_length = old.len() as isize; // let text2_length = new.len() as isize; @@ -669,9 +684,14 @@ impl DiffMatchPatch { for d in 0..max_d { // Bail out if deadline is reached. if let Some(tout) = deadline { + #[cfg(target_arch = "wasm32")] if Utc::now().time() > tout { break; } + #[cfg(not(target_arch = "wasm32"))] + if Instant::now() > tout { + break; + } } // Walk the front path one step. @@ -2350,12 +2370,12 @@ impl DiffMatchPatch { // of the previous patch. If there are patches expected at positions 10 and // 20, but the first patch was found at 12, delta is 2 and the second patch // has an effective expected position of 22. - let mut delta = 0; + let mut delta = 0_isize; let mut results = vec![false; patches.len()]; // patches.iter().enumerate().for_each(|(x, p)| { for (x, p) in patches.iter().enumerate() { - let expected_loc = p.start2 + delta; + let expected_loc = (p.start2 as isize + delta) as usize; let txt_old = Self::diff_text_old(&p.diffs); let (start_loc, end_loc) = if txt_old.len() > self.match_max_bits() { // patch_splitMax will only provide an oversized pattern in the case of @@ -2385,15 +2405,16 @@ impl DiffMatchPatch { if let Some(sl) = start_loc { // Found a match. :) results[x] = true; - delta = sl - expected_loc; + delta = sl as isize - expected_loc as isize; - let txt_new = if let Some(el) = end_loc { - // safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); - &source[sl..el + self.match_max_bits()] + let trg_idx = if let Some(el) = end_loc { + el + self.match_max_bits() } else { - &source[sl..sl + txt_old.len()] + sl + txt_old.len() }; + let txt_new = &source[sl..trg_idx.min(source.len())]; + if txt_old == txt_new { // Perfect match, just shove the replacement text in. source = [ @@ -2442,7 +2463,7 @@ impl DiffMatchPatch { // No match found. :( results[x] = false; // Subtract the delta for this failed patch from subsequent patches. - delta -= p.length2 - p.length1; + delta -= (p.length2 - p.length1) as isize; } } diff --git a/src/html.rs b/src/html.rs index 97b6c95..81d22a0 100644 --- a/src/html.rs +++ b/src/html.rs @@ -11,7 +11,7 @@ /// `insert_style`, `delete_style` and `equality_style` would add css style property to the output. /// E.g. if `insert_style: Some("background: yellow; color: purple")` is set the /// `insert` part of the pretty html would look like `<ins style="background: yellow; color: purple">insert text</ins>` -/// +/// /// [`pretty_html`]: struct.DiffMatchPatch.html/method.diff_pretty_html pub struct HtmlConfig<'a> { insert_tag: &'a str, @@ -1,13 +1,13 @@ //! # Efficient port of Google's diff-match-patch implemented in Rust -//! +//! //! [<img alt="github" src="https://img.shields.io/badge/github-Anubhab/diff_match_patch_rs-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/AnubhabB/diff-match-patch-rs) //! [<img alt="crates.io" src="https://img.shields.io/crates/v/diff-match-patch-rs" height="20">](https://crates.io/crates/diff-match-patch-rs) //! [<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-diff_match_patch_rs?style=for-the-badge&logo=docs.rs&labelColor=%23555555" height="20">](https://docs.rs/diff-match-patch-rs) -//! -//! +//! +//! //! A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](https://github.com/dmsnell/diff-match-patch) in Rust. The //! diff implementation is based on [Myers' diff algorithm](https://neil.fraser.name/writing/diff/myers.pdf). -//! +//! //! ## Highlights of this crate //! - Exposes two modes of operating with `diff-match-patch`, a `Efficient` mode and `Compat` mode. While the `Efficient` mode squeezes out the max performance the `Compat` mode ensures compatibility across other libraries or implementations (rust or otherwise). According to [Benchmarks](#benchmarks), our slower `Compat` mode is still faster than other implementations in rust. //! - **`Efficient`** mode works on `&[u8]` and the generated diffs break compatibility with other implementation. Use the **`Efficient`** mode ONLY if you are using this [crate](https://crates.io/crates/diff-match-patch-rs) at the source of diff generation and the destination. @@ -23,7 +23,7 @@ //! //! ```toml //! [dependencies] -//! diff-match-patch-rs = "0.1.0-beta.3" +//! diff-match-patch-rs = "0.2.0" //! ``` //! //! ### `Effitient` mode @@ -162,19 +162,20 @@ //! | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct | //! |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:| //! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | ✅ | -//! | `rust` | [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch)[^3] | 66.454 ms | - | Criterion | - | ❌ | //! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | ✅ | -//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.487 ms | 631.13 µs | Criterion | `Efficient` | ✅ | -//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 65.642 ms | 1.1703 ms | Criterion | `Compat` | ✅ | -//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ | +//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 µs | Criterion | `Efficient` | ✅ | +//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | ✅ | +//! | `go` | [go-diff](https://github.com/sergi/go-diff)[^1] | 50.31 ms | 135.2 ms | go test | - | ❌ | //! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) | 246.90 ms | 1.07 ms | tinybench | - | ✅ | //! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | ✅ | //! //! -//! [^1]: [go-diff](https://github.com/sergi/go-diff) seems to generate wrong diffs for emoticons. This benchmark is on the text with the emoticons removed. -//! [^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`. -//! [^3]: The crate [diffmatchpatch v0.0.4](https://crates.io/crates/diffmatchpatch) is still a WIP, cound't find the `patch_apply` method. +//! ## Gotchas +//! **Diff incompatibility with `JavaScript` libs**: //! +//! There are 2 kinds of implementations - one which use a `postprocessing` function for merging `unicode surrogates` which break compatibility with every other popular `diff-match-patch` implementations and the other kind (packages based on the original implementation) break while `urlEncode()` of unicode surrogates. +//! As of now, this crate brakes compatibility while working with `JS` generated diffs with the surrogate patch. +//! If you are interfacing with `JavaScript` in browser, using this crate through `wasm` would be ideal. //! pub mod dmp; diff --git a/src/traits.rs b/src/traits.rs index f02c63c..90f9078 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,10 +1,9 @@ use std::hash::Hash; -use chrono::NaiveTime; use percent_encoding::{percent_decode, AsciiSet, CONTROLS}; use crate::{ - dmp::{Diff, DiffMatchPatch}, + dmp::{Diff, DiffMatchPatch, Time}, Ops, }; @@ -34,7 +33,7 @@ pub trait DType: Copy + Ord + Eq + Hash { new: &[Self], x: usize, y: usize, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<Self>>, crate::errors::Error>; fn from_char(c: char) -> Self; @@ -60,7 +59,7 @@ impl DType for u8 { new: &[u8], x: usize, y: usize, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<u8>>, crate::errors::Error> { let old_a = &old[..x]; let new_a = &new[..y]; @@ -221,7 +220,7 @@ impl DType for char { new: &[char], x: usize, y: usize, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<char>>, crate::errors::Error> { let old_a = &old[..x]; let new_a = &new[..y]; @@ -302,7 +301,7 @@ impl DType for usize { new: &[usize], x: usize, y: usize, - deadline: Option<NaiveTime>, + deadline: Option<Time>, ) -> Result<Vec<Diff<usize>>, crate::errors::Error> { let old_a = &old[..x]; let new_a = &new[..y]; diff --git a/tests/compat.rs b/tests/compat.rs index 29018c2..5423913 100644 --- a/tests/compat.rs +++ b/tests/compat.rs @@ -1 +1 @@ -// Compatibility tests have been moved to `https://anubhabb/diff-match-path-rs-bench` repo
\ No newline at end of file +// Compatibility tests have been moved to `https://anubhabb/diff-match-path-rs-bench` repo diff --git a/tests/test.rs b/tests/test.rs index 286200e..2bac015 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,7 +1,5 @@ use std::time::Instant; -use chrono::Utc; - use diff_match_patch_rs::dmp::Diff; use diff_match_patch_rs::html::HtmlConfig; @@ -387,11 +385,11 @@ fn test_diff_main() -> Result<(), Error> { let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); - let start = Utc::now().time(); + let start = Instant::now(); dmp.diff_main::<Efficient>(&a, &b)?; - let end = Utc::now().time(); + let end = Instant::now(); // Test that we took at least the timeout period (+ 5ms being generous). - assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); + assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5); // Test the linemode speedup. // Must be long to pass the 100 char cutoff. @@ -599,11 +597,11 @@ fn test_diff_main_compat() -> Result<(), Error> { let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join(""); let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join(""); - let start = Utc::now().time(); + let start = Instant::now(); dmp.diff_main::<Efficient>(&a, &b)?; - let end = Utc::now().time(); + let end = Instant::now(); // Test that we took at least the timeout period (+ 5ms being generous). - assert!((end - start).num_milliseconds() <= LOW_TIMEOUT as i64 + 5); + assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5); // Test the linemode speedup. // Must be long to pass the 100 char cutoff. @@ -1298,6 +1296,14 @@ fn test_patch_apply() -> Result<(), Error> { dmp.patch_apply(&patches, "x")? ); + let dmp = DiffMatchPatch::new(); + // Tests for issue https://github.com/AnubhabB/diff-match-patch-rs/issues/2 + let strp = "@@ -1,11 +1,5 @@\n-%F0%9F%8D%8A, a\n+A\n ah o\n@@ -3,17 +3,21 @@\n h orange\n- \n+!%F0%9F%8C%8A\n is the n\n@@ -23,10 +23,8 @@\n new \n-black!\n+%F0%9F%8C%8A\n"; + let patches = dmp.patch_from_text::<Compat>(strp)?; + assert_eq!(strp, dmp.patch_to_text(&patches)); + let (patched, _) = dmp.patch_apply(&patches, "🍊, aah orange is the new black!")?; + assert_eq!(patched, "Aah orange!🌊is the new 🌊"); + Ok(()) } |