my fork of dmp
Merge pull request #4 from AnubhabB/match-modes
Match modes with support for `Compat` and `Efficienct` modes
Anubhab Bandyopadhyay 2024-09-17
parent 987177b · parent 77968e8 · commit 7952db7
-rw-r--r--CHANGELOG.md5
-rw-r--r--Cargo.toml2
-rw-r--r--README.md23
-rw-r--r--src/dmp.rs25
-rw-r--r--src/lib.rs35
-rw-r--r--tests/test.rs38
6 files changed, 108 insertions, 20 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dfb308e..22e155e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
# CHANGELOG.md
+## 0.3.0
+Breaking Change:
+
+ - the `match_main` API now supports `Efficient` and `Compat` modes. The call to `match_main` is now `match_main::<Efficient>` or `match_main::<Compat>` depending on your use-case
+
## 0.2.1
Fix:
diff --git a/Cargo.toml b/Cargo.toml
index e7e0481..914a111 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "diff-match-patch-rs"
-version = "0.2.1"
+version = "0.3.0"
edition = "2021"
authors = ["Anubhab Bandyopadhyay"]
homepage = "https://docs.rs/diff-match-patch-rs"
diff --git a/README.md b/README.md
index f0ec8bb..b2762b3 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ A very **fast**, **accurate** and **wasm ready** port of [Diff Match Patch](http
```toml
[dependencies]
-diff-match-patch-rs = "0.2.1"
+diff-match-patch-rs = "0.3.0"
```
### `Effitient` mode
@@ -145,6 +145,27 @@ fn main() -> Result<(), Error> {
}
```
+### `Match` - fuzzy match of pattern in Text
+
+```rust
+use diff_match_patch_rs::{DiffMatchPatch, Compat, Error, PatchInput};
+
+// This is the source text
+const TXT: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€";
+
+// The patter we are trying to fing
+const PATTERN: &str = " that berry ";
+
+// Returns `location` of match if found, `None` if not found
+fn main() -> Option<usize> {
+ let dmp = DiffMatchPatch::new();
+
+ // works with both `Efficient` and `Compat` modes
+ // `5` here is an approx location to find `nearby` matches
+ dmp.match_main::<Efficient>(TXT, PATTERN, 5) // this should return Some(4)
+}
+```
+
#### Note
The `Efficient` and `Compat` mode APIs are identical with the only chage being the `generic` parameter declared during the calls.
diff --git a/src/dmp.rs b/src/dmp.rs
index fa07081..3e2210f 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -2773,9 +2773,28 @@ impl DiffMatchPatch {
/// If `match_threshold` is closer to 1 then it is more likely that a match will be found.
/// The larger `match_threshold` is, the slower match_main() may take to compute. `match_threshold` defaults to 0.5 and can be updated by `dmp.set_match_threshold()` method.
///
- /// If no match is found, the function returns -1.
- pub fn match_main(&self, text: &str, pattern: &str, loc: usize) -> Option<usize> {
- self.match_internal(text.as_bytes(), pattern.as_bytes(), loc)
+ /// If no match is found, the function returns `None`
+ ///
+ /// # Examle
+ /// ```
+ /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
+ ///
+ /// # fn main() {
+ /// let dmp = DiffMatchPatch::new();
+ /// // Works with both `Compat` and `Efficient` modes
+ /// let matched = dmp.match_main::<Efficient>(
+ /// "I am the very model of a modern major general.",
+ /// " that berry ",
+ /// 5
+ /// );
+ ///
+ /// # assert_eq!(matched, Some(4));
+ /// # }
+ /// ```
+ pub fn match_main<T: DType>(&self, text: &str, pattern: &str, loc: usize) -> Option<usize> {
+ let text = T::from_str(text);
+ let pattern = T::from_str(pattern);
+ self.match_internal(&text, &pattern, loc)
}
/// Given two texts, or an already computed list of differences (`diffs`), return an array of patch objects.
diff --git a/src/lib.rs b/src/lib.rs
index 4b0ba80..29f5a61 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -23,7 +23,7 @@
//!
//! ```toml
//! [dependencies]
-//! diff-match-patch-rs = "0.2.1"
+//! diff-match-patch-rs = "0.3.0"
//! ```
//!
//! ### `Effitient` mode
@@ -146,6 +146,25 @@
//! at_destination(&patches)
//! }
//! ```
+//! ### `Match` - fuzzy match of pattern in Text
+//!
+//! ```rust
+//! use diff_match_patch_rs::{DiffMatchPatch, Compat, Error, PatchInput};
+//! // This is the source text
+//! const TXT: &str = "I am the very model of a modern Major-General, I've information on vegetable, animal, and mineral, ๐Ÿš€๐Ÿ‘๐Ÿ‘€";
+//!
+//! // The patter we are trying to fing
+//! const PATTERN: &str = " that berry ";
+//!
+//! // Returns `location` of match if found, `None` if not found
+//! fn main() -> Option<usize> {
+//! let dmp = DiffMatchPatch::new();
+//!
+//! // works with both `Efficient` and `Compat` modes
+//! // `5` here is an approx location to find `nearby` matches
+//! dmp.match_main::<Efficient>(TXT, PATTERN, 5) // this should return Some(4)
+//! }
+//! ```
//!
//! #### Note
//! The `Efficient` and `Compat` mode APIs are identical with the only chage being the `generic` parameter declared during the calls.
@@ -161,13 +180,13 @@
//!
//! | Lang. | Library | Diff Avg. | Patch Avg. | Bencher | Mode | Correct |
//! |:-------:|:----------------------------------------------------------------------------------------:|:---------:|:----------:|:----------:|:-----------:|:-------:|
-//! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | โœ… |
-//! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | โœ… |
-//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 ยตs | Criterion | `Efficient` | โœ… |
-//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | โœ… |
-//! | `go` | [go-diff](https://github.com/sergi/go-diff) | 50.31 ms | 135.2 ms | go test | - | โœ… |
-//! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch)[^1] | 246.90 ms | 1.07 ms | tinybench | - | โŒ |
-//! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | โœ… |
+//! | `rust` | [diff_match_patch v0.1.1](https://crates.io/crates/diff_match_patch)[^2] | 68.108 ms | 10.596 ms | Criterion | - | โœ… |
+//! | `rust` | [dmp v0.2.0](https://crates.io/crates/dmp) | 69.019 ms | 14.654 ms | Criterion | - | โœ… |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.66 ms | 631.13 ยตs | Criterion | `Efficient` | โœ… |
+//! | `rust` | [diff-match-patch-rs](https://github.com/AnubhabB/diff-match-patch-rs.git)<sup>our</sup> | 64.68 ms | 1.1703 ms | Criterion | `Compat` | โœ… |
+//! | `go` | [go-diff](https://github.com/sergi/go-diff) | 50.31 ms | 135.2 ms | go test | - | โœ… |
+//! | `node` | [diff-match-patch](https://www.npmjs.com/package/diff-match-patch)[^1] | 246.90 ms | 1.07 ms | tinybench | - | โŒ |
+//! | `python`| [diff-match-patch](https://pypi.org/project/diff-match-patch/) | 1.01 s | 0.25 ms | timeit | - | โœ… |
//!
//! [^1]: [diff-match-patch](https://www.npmjs.com/package/diff-match-patch) generated `patch text` and `delta` breaks on `unicode surrogates`.
//! [^2]: Adds an extra clone to the iterator because the `patch_apply` method takes mutable refc. to `diffs`.
diff --git a/tests/test.rs b/tests/test.rs
index 2bac015..5e96a5f 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -1312,21 +1312,45 @@ fn test_match_main() {
let dmp = DiffMatchPatch::default();
// Full match.
// Shortcut matches.
- assert_eq!(Some(0), dmp.match_main("abcdef", "abcdef", 1000));
- assert_eq!(None, dmp.match_main("", "abcdef", 1));
- assert_eq!(Some(3), dmp.match_main("abcdef", "", 3));
- assert_eq!(Some(3), dmp.match_main("abcdef", "de", 3));
+ assert_eq!(
+ Some(0),
+ dmp.match_main::<Efficient>("abcdef", "abcdef", 1000)
+ );
+ assert_eq!(None, dmp.match_main::<Efficient>("", "abcdef", 1));
+ assert_eq!(Some(3), dmp.match_main::<Efficient>("abcdef", "", 3));
+ assert_eq!(Some(3), dmp.match_main::<Efficient>("abcdef", "de", 3));
+
+ // Beyond end match.
+ assert_eq!(Some(3), dmp.match_main::<Efficient>("abcdef", "defy", 4));
+
+ // Oversized pattern.
+ assert_eq!(Some(0), dmp.match_main::<Efficient>("abcdef", "abcdefy", 0));
+
+ // Complex match.
+ assert_eq!(
+ Some(4),
+ dmp.match_main::<Efficient>(
+ "I am the very model of a modern major general.",
+ " that berry ",
+ 5
+ )
+ );
+
+ assert_eq!(Some(0), dmp.match_main::<Compat>("abcdef", "abcdef", 1000));
+ assert_eq!(None, dmp.match_main::<Compat>("", "abcdef", 1));
+ assert_eq!(Some(3), dmp.match_main::<Compat>("abcdef", "", 3));
+ assert_eq!(Some(3), dmp.match_main::<Compat>("abcdef", "de", 3));
// Beyond end match.
- assert_eq!(Some(3), dmp.match_main("abcdef", "defy", 4));
+ assert_eq!(Some(3), dmp.match_main::<Compat>("abcdef", "defy", 4));
// Oversized pattern.
- assert_eq!(Some(0), dmp.match_main("abcdef", "abcdefy", 0));
+ assert_eq!(Some(0), dmp.match_main::<Compat>("abcdef", "abcdefy", 0));
// Complex match.
assert_eq!(
Some(4),
- dmp.match_main(
+ dmp.match_main::<Compat>(
"I am the very model of a modern major general.",
" that berry ",
5