my fork of dmp
-rw-r--r--src/dmp.rs199
-rw-r--r--src/lib.rs4
-rw-r--r--tests/test.rs224
3 files changed, 233 insertions, 194 deletions
diff --git a/src/dmp.rs b/src/dmp.rs
index 28821e4..08e3322 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -45,7 +45,12 @@ pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>);
impl Display for Diff<u8> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "({:?}, {})", self.op(), std::str::from_utf8(self.data()).unwrap())
+ write!(
+ f,
+ "({:?}, {})",
+ self.op(),
+ std::str::from_utf8(self.data()).unwrap()
+ )
}
}
@@ -139,7 +144,7 @@ impl DiffMatchPatch {
/// Enables or disables `line mode` optimization.
/// When enabled, the diff algorithm tries to find the `lines` that have changes and apply diff on the same
- ///
+ ///
/// This optimization makes sense for text with many lines (~100s), defaults to `true`
pub fn set_checklines(&mut self, checklines: bool) {
self.checklines = checklines;
@@ -151,9 +156,9 @@ impl DiffMatchPatch {
}
/// Set a timeout in number of `milliseconds`. This creates a cutoff for internal `recursive` function calls
- ///
+ ///
/// Defaults to `1000ms` (1 second)
- ///
+ ///
/// None means `infinite time`
pub fn set_timeout(&mut self, tout: Option<u32>) {
self.timeout = tout;
@@ -172,10 +177,10 @@ impl DiffMatchPatch {
}
/// The `match_threshold` property determines the cut-off value for a valid match.
- /// If `match_threshold` is closer to 0, the requirements for accuracy increase.
+ /// If `match_threshold` is closer to 0, the requirements for accuracy increase.
/// If `match_threshold` is closer to 1 then it is more likely that a match will be found.
/// The `match_threshold` is, the slower `match_main()` may take to compute.
- ///
+ ///
/// defaults to 0.5
pub fn set_match_threshold(&mut self, threshold: f32) {
self.match_threshold = threshold
@@ -191,6 +196,16 @@ impl DiffMatchPatch {
self.delete_threshold
}
+ /// When deleting a large block of text (over ~64 characters), how close does
+ /// the contents have to match the expected contents. (0.0 = perfection,
+ /// 1.0 = very loose). Note that `match_threshold` controls how closely the
+ /// end points of a delete need to match.
+ ///
+ /// Defaults to `0.5`
+ pub fn set_delete_threshold(&mut self, threshold: f32) {
+ self.delete_threshold = threshold;
+ }
+
// returns the configured max_bits
fn match_max_bits(&self) -> usize {
self.match_max_bits
@@ -200,7 +215,12 @@ impl DiffMatchPatch {
self.match_distance
}
-
+ /// How far to search for a match (0 = exact location, 1000+ = broad match).
+ /// A match this many characters away from the expected location will add
+ /// 1.0 to the score (0.0 is a perfect match).
+ pub fn set_match_distance(&mut self, distance: usize) {
+ self.match_distance = distance
+ }
pub(crate) fn diff_internal<'a>(
&self,
@@ -1395,24 +1415,29 @@ impl DiffMatchPatch {
}
pub fn to_delta(diffs: &[Diff<u8>]) -> Vec<u8> {
- let mut data = diffs.iter()
+ let mut data = diffs
+ .iter()
.map(|diff| {
- match diff.op() {
- Ops::Insert => {
- let encoded = percent_encode(diff.data(), ENCODE_SET).map(|v| v.as_bytes()).collect::<Vec<_>>().concat();
- // format!("+{encoded}")
- ["+".as_bytes(), &encoded, "\t".as_bytes()].concat()
- }
- Ops::Delete => {
- [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
- }
- Ops::Equal => {
- // format!("={}", diff.size())
- [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
+ match diff.op() {
+ Ops::Insert => {
+ let encoded = percent_encode(diff.data(), ENCODE_SET)
+ .map(|v| v.as_bytes())
+ .collect::<Vec<_>>()
+ .concat();
+ // format!("+{encoded}")
+ ["+".as_bytes(), &encoded, "\t".as_bytes()].concat()
+ }
+ Ops::Delete => {
+ [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
+ }
+ Ops::Equal => {
+ // format!("={}", diff.size())
+ [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat()
+ }
}
- }
- })
- .collect::<Vec<_>>().concat();
+ })
+ .collect::<Vec<_>>()
+ .concat();
data.pop();
@@ -1434,22 +1459,20 @@ impl DiffMatchPatch {
let param = &token[1..];
if opcode == Some(&b'+') {
-
let param = percent_decode(param).collect::<Vec<_>>();
diffs.push(Diff::insert(&param));
-
} else if opcode == Some(&b'-') || opcode == Some(&b'=') {
- let n = match std::str::from_utf8(param)
+ let n = match std::str::from_utf8(param)
.map_err(|_| crate::errors::Error::Utf8Error)
- .and_then(|t|
- t.parse::<isize>().map_err(|_| crate::errors::Error::InvalidInput
- )
- ) {
- Ok(n) => n,
- Err(_) => {
- return Err(crate::errors::Error::InvalidInput);
- }
- };
+ .and_then(|t| {
+ t.parse::<isize>()
+ .map_err(|_| crate::errors::Error::InvalidInput)
+ }) {
+ Ok(n) => n,
+ Err(_) => {
+ return Err(crate::errors::Error::InvalidInput);
+ }
+ };
if n < 0 {
return Err(crate::errors::Error::InvalidInput);
@@ -1461,7 +1484,7 @@ impl DiffMatchPatch {
return Err(crate::errors::Error::InvalidInput);
}
- let txt = &old[pointer .. new_pointer];
+ let txt = &old[pointer..new_pointer];
pointer = new_pointer;
if opcode == Some(&b'=') {
@@ -1473,11 +1496,11 @@ impl DiffMatchPatch {
return Err(crate::errors::Error::InvalidInput);
}
}
-
+
if pointer != old.len() {
return Err(crate::errors::Error::InvalidInput);
}
-
+
Ok(diffs)
}
@@ -2151,7 +2174,7 @@ impl Display for Patch {
};
let segment = format!("{sign}{}\n", percent_encode(diff.data(), ENCODE_SET));
-
+
segments.push(segment)
});
@@ -2590,7 +2613,7 @@ impl DiffMatchPatch {
/// # Example
/// ```
/// use diff_match_patch_rs::{DiffMatchPatch, Error};
- ///
+ ///
/// # fn main() -> Result<(), Error> {
/// let mut dmp = DiffMatchPatch::new();
/// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
@@ -2610,7 +2633,7 @@ impl DiffMatchPatch {
/// # Example
/// ```
/// use diff_match_patch_rs::{DiffMatchPatch, Error};
- ///
+ ///
/// # fn main() -> Result<(), Error> {
/// let mut dmp = DiffMatchPatch::new();
/// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
@@ -2646,7 +2669,7 @@ impl DiffMatchPatch {
/// <div class="warning">Not Implemented</div>
/// This function is similar to diff_cleanupSemantic, except that instead of optimising a diff to be human-readable, it optimises the diff to be efficient for machine processing.
/// The results of both cleanup types are often the same.
- ///
+ ///
/// The efficiency cleanup is based on the observation that a diff made up of large numbers of small diffs edits may take longer to process (in downstream applications) or take more capacity to store or transmit than a smaller number of larger diffs.
/// The diff_match_patch.Diff_EditCost property sets what the cost of handling a new edit is in terms of handling extra characters in an existing edit.
/// The default value is 4, which means if expanding the length of a diff by three characters can eliminate one edit, then that optimisation will reduce the total costs.
@@ -2813,7 +2836,7 @@ impl DiffMatchPatch {
/// Given a text to search, a pattern to search for and an expected location in the text near which to find the pattern, return the location which matches closest.
/// The function will search for the best match based on both the number of character errors between the pattern and the potential match,
/// as well as the distance between the expected location and the potential match.
- ///
+ ///
/// The following example is a classic dilemma. There are two potential matches, one is close to the expected location but contains a one character error,
/// the other is far from the expected location but is exactly the pattern sought after: match_main("abc12345678901234567890abbc", "abc", 26)
/// Which result is returned (0 or 24) is determined by the diff_match_patch.match_distance property.
@@ -2821,18 +2844,17 @@ impl DiffMatchPatch {
/// For example, a distance of '0' requires the match be at the exact location specified, whereas a threshold of '1000' would require a perfect match to be within 800
/// characters of the expected location to be found using a 0.8 threshold (see below).
/// The larger match_distance is, the slower match_main() may take to compute. This variable defaults to 1000.
- ///
+ ///
/// Another property is `diff_match_patch.match_threshold` which determines the cut-off value for a valid match.
/// If `match_threshold` is closer to 0, the requirements for accuracy increase.
/// If `match_threshold` is closer to 1 then it is more likely that a match will be found.
/// The larger `match_threshold` is, the slower match_main() may take to compute. `match_threshold` defaults to 0.5 and can be updated by `dmp.set_match_threshold()` method.
- ///
+ ///
/// If no match is found, the function returns -1.
pub fn match_main(&self, text: &str, pattern: &str, loc: usize) -> Option<usize> {
self.match_internal(text.as_bytes(), pattern.as_bytes(), loc)
}
-
/// Given two texts, or an already computed list of differences, return an array of patch objects.
/// The third form PatchInput::TextDiffs(...) is preferred, use it if you happen to have that data available, otherwise this function will compute the missing pieces.
/// TODO: add example
@@ -2965,7 +2987,7 @@ impl DiffMatchPatch {
/// The second element is an array of true/false values indicating which of the patches were successfully applied.
/// [Note that this second element is not too useful since large patches may get broken up internally, resulting in a longer results list than the input with no way to figure out which patch succeeded or failed.
/// A more informative API is in development.]
- ///
+ ///
/// The `match_distance` and `match_threshold` properties are used to evaluate patch application on text which does not match exactly.
/// In addition, the diff_match_patch.patch_delete_threshold property determines how closely the text within a major (~64 character) delete needs to match the expected text.
/// If patch_delete_threshold is closer to 0, then the deleted text must match the expected text more closely.
@@ -2988,7 +3010,12 @@ impl DiffMatchPatch {
#[cfg(test)]
mod tests {
- use crate::{dmp::{Diff, HalfMatch, LineToChars}, DiffMatchPatch, Error, Patch, PatchInput};
+ use std::collections::HashMap;
+
+ use crate::{
+ dmp::{Diff, HalfMatch, LineToChars},
+ DiffMatchPatch, Error, Patch, PatchInput,
+ };
#[test]
fn test_prefix() {
@@ -3736,10 +3763,7 @@ mod tests {
let dmp = DiffMatchPatch::default();
// Both edges full.
let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
- assert_eq!(
- "@@ -0,0 +1,4 @@\n+test\n",
- dmp.patch_to_text(&patches)
- );
+ assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches));
dmp.patch_add_padding(&mut patches);
assert_eq!(
"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n",
@@ -3820,4 +3844,75 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_match_alphabet() {
+ // Initialise the bitmasks for Bitap.
+ // Unique.
+ assert_eq!(
+ HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]),
+ DiffMatchPatch::match_alphabet(b"abc")
+ );
+
+ // Duplicates.
+ assert_eq!(
+ HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]),
+ DiffMatchPatch::match_alphabet(b"abcaba")
+ )
+ }
+
+ #[test]
+ fn test_match_bitap() {
+ // Bitap algorithm.
+ let mut dmp = DiffMatchPatch {
+ match_distance: 100,
+ ..Default::default()
+ };
+
+ // Exact matches.
+ assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5));
+ assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0));
+
+ // Fuzzy matches.
+ assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0));
+ assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5));
+ assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1));
+
+ // Overflow.
+ assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2));
+
+ // Threshold test.
+ dmp.match_threshold = 0.4;
+ assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
+
+ // dmp.`match_threshold` = 0.3;
+ dmp.match_threshold = 0.3;
+ assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
+
+ dmp.match_threshold = 0.;
+ assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1));
+
+ dmp.match_threshold = 0.5;
+
+ // Multiple select.
+ assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3));
+ assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5));
+
+ // Distance test.
+ dmp.match_distance = 10;
+ assert_eq!(
+ None,
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
+ );
+ assert_eq!(
+ Some(0),
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1)
+ );
+
+ dmp.match_distance = 1000;
+ assert_eq!(
+ Some(0),
+ dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
+ );
+ }
}
diff --git a/src/lib.rs b/src/lib.rs
index a41d6a3..684cf21 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,5 +2,5 @@ pub mod dmp;
pub mod errors;
pub mod traits;
-pub use dmp::{DiffMatchPatch, PatchInput, Ops, Patch, Patches};
-pub use errors::Error; \ No newline at end of file
+pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches};
+pub use errors::Error;
diff --git a/tests/test.rs b/tests/test.rs
index 2c043b5..fd00a4e 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -1,10 +1,10 @@
-use std::{collections::HashMap, time::Instant};
+use std::time::Instant;
use chrono::Utc;
use diff_match_patch_rs::dmp::Diff;
-use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, Patch, PatchInput};
+use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, PatchInput};
// const tests = [
// 'testDiffIsDestructurable',
@@ -339,7 +339,7 @@ fn test_diff_main() -> Result<(), Error> {
let mut dmp = DiffMatchPatch::default();
let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap();
let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap();
-
+
let start = Instant::now();
let diff_yes_lm = dmp.diff_main(&old, &new);
let yes_lm_dur = Instant::now() - start;
@@ -350,8 +350,9 @@ fn test_diff_main() -> Result<(), Error> {
let diff_no_lm = dmp.diff_main(&old, &new);
let no_lm_dur = Instant::now() - start;
assert!(diff_no_lm.is_ok());
-
+
assert!(no_lm_dur > yes_lm_dur);
+
Ok(())
}
@@ -368,27 +369,24 @@ fn test_diff_delta() -> Result<(), Error> {
Diff::insert(b"old dog"),
];
let txt_old = "jumps over the lazy".as_bytes();
- assert_eq!(
- txt_old,
- DiffMatchPatch::diff_text_old(&diffs)
- );
+ assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs));
let delta = DiffMatchPatch::to_delta(&diffs);
assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta);
// Convert delta string into a diff.
assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?);
-
+
// Generates error (19 != 20).
assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err());
-
+
// Generates error (19 != 18).
assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err());
-
+
// Test deltas with special characters.
let diffs = vec![
Diff::equal("\u{0680} \x00 \t %".as_bytes()),
Diff::delete("\u{0681} \x01 \n ^".as_bytes()),
- Diff::insert("\u{0682} \x02 \\ |".as_bytes())
+ Diff::insert("\u{0682} \x02 \\ |".as_bytes()),
];
let txt_old = DiffMatchPatch::diff_text_old(&diffs);
assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old);
@@ -399,14 +397,20 @@ fn test_diff_delta() -> Result<(), Error> {
assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?);
// Verify pool of unchanged characters.
- let diffs = vec![
- Diff::insert("A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes())
- ];
+ let diffs = vec![Diff::insert(
+ "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes(),
+ )];
let txt_new = DiffMatchPatch::diff_text_new(&diffs);
- assert_eq!("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&txt_new).unwrap());
+ assert_eq!(
+ "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ",
+ std::str::from_utf8(&txt_new).unwrap()
+ );
let delta = DiffMatchPatch::to_delta(&diffs);
- assert_eq!("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&delta).unwrap());
+ assert_eq!(
+ "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ",
+ std::str::from_utf8(&delta).unwrap()
+ );
// Convert delta string into a diff.
assert_eq!(diffs, DiffMatchPatch::from_delta("".as_bytes(), &delta)?);
@@ -441,12 +445,7 @@ fn test_patch_from_text() -> Result<(), Error> {
assert!(dmp.patch_from_text("")?.is_empty());
let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
- assert_eq!(
- strp,
- dmp.patch_from_text(
- strp
- )?[0].to_string()
- );
+ assert_eq!(strp, dmp.patch_from_text(strp)?[0].to_string());
assert_eq!(
"@@ -1 +1 @@\n-a\n+b\n",
@@ -515,7 +514,7 @@ fn test_patch_make() -> Result<(), Error> {
"`1234567890-=[]\\;',./",
"~!@#$%^&*()_+{}|:\"<>?",
))?;
-
+
assert_eq!(
"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n",
dmp.patch_to_text(&patches)
@@ -565,17 +564,13 @@ fn test_diff_text() {
);
}
-
#[test]
fn test_patch_apply() -> Result<(), Error> {
let mut dmp = DiffMatchPatch::default();
let patches = dmp.patch_make(PatchInput::Texts("", ""))?;
- let (txt, results) = dmp.patch_apply_internal(&patches, b"Hello world.")?;
- assert_eq!(
- format!("{}\t{}", std::str::from_utf8(&txt).unwrap(), results.len()),
- "Hello world.\t0"
- );
+ let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?;
+ assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0");
let patches = dmp.patch_make(PatchInput::Texts(
"The quick brown fox jumps over the lazy dog.",
@@ -585,31 +580,28 @@ fn test_patch_apply() -> Result<(), Error> {
// Exact match
assert_eq!(
(
- b"That quick brown fox jumped over a lazy dog.".to_vec(),
+ "That quick brown fox jumped over a lazy dog.".to_string(),
vec![true, true]
),
- dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?
+ dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?
);
// Partial match
assert_eq!(
(
- b"That quick red rabbit jumped over a tired tiger.".to_vec(),
+ "That quick red rabbit jumped over a tired tiger.".to_string(),
vec![true, true]
),
- dmp.patch_apply_internal(
- &patches,
- b"The quick red rabbit jumps over the tired tiger."
- )?
+ dmp.patch_apply(&patches, "The quick red rabbit jumps over the tired tiger.")?
);
// Failed match
assert_eq!(
(
- b"I am the very model of a modern major general.".to_vec(),
+ "I am the very model of a modern major general.".to_string(),
vec![false, false]
),
- dmp.patch_apply_internal(&patches, b"I am the very model of a modern major general.")?
+ dmp.patch_apply(&patches, "I am the very model of a modern major general.")?
);
// Big delete, small change
@@ -617,47 +609,70 @@ fn test_patch_apply() -> Result<(), Error> {
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
- assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y")?);
+ assert_eq!(
+ ("xabcy".to_string(), vec![true, true]),
+ dmp.patch_apply(
+ &patches,
+ "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"
+ )?
+ );
// Big delete, large change
let patches = dmp.patch_make(PatchInput::Texts(
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
- assert_eq!((b"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y".to_vec(), vec![false, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
+ assert_eq!(
+ (
+ "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ .to_string(),
+ vec![false, true]
+ ),
+ dmp.patch_apply(
+ &patches,
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ )?
+ );
- dmp.delete_threshold = 0.6;
+ dmp.set_delete_threshold(0.6);
let patches = dmp.patch_make(PatchInput::Texts(
"x1234567890123456789012345678901234567890123456789012345678901234567890y",
"xabcy",
))?;
- assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?);
- dmp.delete_threshold = 0.5;
+ assert_eq!(
+ ("xabcy".to_string(), vec![true, true]),
+ dmp.patch_apply(
+ &patches,
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y"
+ )?
+ );
+ dmp.set_delete_threshold(0.5);
// Compesate for failed patch
- dmp.match_threshold = 0.;
- dmp.match_distance = 0;
+ dmp.set_match_threshold(0.);
+ dmp.set_match_distance(0);
let patches = dmp.patch_make(PatchInput::Texts(
"abcdefghijklmnopqrstuvwxyz--------------------1234567890",
"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
))?;
assert_eq!(
(
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_vec(),
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_string(),
vec![false, true]
),
- dmp.patch_apply_internal(
+ dmp.patch_apply(
&patches,
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"
)?
);
- dmp.match_threshold = 0.5;
- dmp.match_distance = 1000;
+
+ dmp.set_match_threshold(0.5);
+ dmp.set_match_distance(1000);
// No side-effects - kinds useless cos patches is not mutable in rust
let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
let srcstr = dmp.patch_to_text(&patches);
- dmp.patch_apply_internal(&patches, b"")?;
+ dmp.patch_apply(&patches, "")?;
assert_eq!(srcstr, dmp.patch_to_text(&patches));
let patches = dmp.patch_make(PatchInput::Texts(
@@ -665,127 +680,56 @@ fn test_patch_apply() -> Result<(), Error> {
"Woof",
))?;
let srcstr = dmp.patch_to_text(&patches);
- dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?;
+ dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?;
assert_eq!(srcstr, dmp.patch_to_text(&patches));
// Edge exact match
let patches = dmp.patch_make(PatchInput::Texts("", "test"))?;
assert_eq!(
- (b"test".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"")?
+ ("test".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "")?
);
// Near edge exact match
let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?;
assert_eq!(
- (b"XtestY".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"XY")?
+ ("XtestY".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "XY")?
);
// Edge partial match
let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?;
assert_eq!(
- (b"x123".to_vec(), vec![true]),
- dmp.patch_apply_internal(&patches, b"x")?
+ ("x123".to_string(), vec![true]),
+ dmp.patch_apply(&patches, "x")?
);
Ok(())
}
#[test]
-fn test_match_alphabet() {
- // Initialise the bitmasks for Bitap.
- // Unique.
- assert_eq!(
- HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]),
- DiffMatchPatch::match_alphabet(b"abc")
- );
-
- // Duplicates.
- assert_eq!(
- HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]),
- DiffMatchPatch::match_alphabet(b"abcaba")
- )
-}
-
-#[test]
-fn test_match_bitap() {
- // Bitap algorithm.
- let mut dmp = DiffMatchPatch {
- match_distance: 100,
- ..Default::default()
- };
-
- // Exact matches.
- assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5));
- assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0));
-
- // Fuzzy matches.
- assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0));
- assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5));
- assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1));
-
- // Overflow.
- assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2));
-
- // Threshold test.
- dmp.match_threshold = 0.4;
- assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
-
- // dmp.`match_threshold` = 0.3;
- dmp.match_threshold = 0.3;
- assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
-
- dmp.match_threshold = 0.;
- assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1));
-
- dmp.match_threshold = 0.5;
-
- // Multiple select.
- assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3));
- assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5));
-
- // Distance test.
- dmp.match_distance = 10;
- assert_eq!(
- None,
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
- );
- assert_eq!(
- Some(0),
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1)
- );
-
- dmp.match_distance = 1000;
- assert_eq!(
- Some(0),
- dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
- );
-}
-
-#[test]
fn test_match_main() {
let dmp = DiffMatchPatch::default();
// Full match.
// Shortcut matches.
- assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdef", 1000));
- assert_eq!(None, dmp.match_internal(b"", b"abcdef", 1));
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"", 3));
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"de", 3));
+ assert_eq!(Some(0), dmp.match_main("abcdef", "abcdef", 1000));
+ assert_eq!(None, dmp.match_main("", "abcdef", 1));
+ assert_eq!(Some(3), dmp.match_main("abcdef", "", 3));
+ assert_eq!(Some(3), dmp.match_main("abcdef", "de", 3));
// Beyond end match.
- assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"defy", 4));
+ assert_eq!(Some(3), dmp.match_main("abcdef", "defy", 4));
// Oversized pattern.
- assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdefy", 0));
+ assert_eq!(Some(0), dmp.match_main("abcdef", "abcdefy", 0));
// Complex match.
assert_eq!(
Some(4),
- dmp.match_internal(
- b"I am the very model of a modern major general.",
- b" that berry ",
+ dmp.match_main(
+ "I am the very model of a modern major general.",
+ " that berry ",
5
)
);
-} \ No newline at end of file
+}