my fork of dmp
Separated tests
| -rw-r--r-- | src/dmp.rs | 199 | ||||
| -rw-r--r-- | src/lib.rs | 4 | ||||
| -rw-r--r-- | tests/test.rs | 224 |
3 files changed, 233 insertions, 194 deletions
@@ -45,7 +45,12 @@ pub struct Diff<T: Copy + Ord + Eq>(Ops, Vec<T>); impl Display for Diff<u8> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "({:?}, {})", self.op(), std::str::from_utf8(self.data()).unwrap()) + write!( + f, + "({:?}, {})", + self.op(), + std::str::from_utf8(self.data()).unwrap() + ) } } @@ -139,7 +144,7 @@ impl DiffMatchPatch { /// Enables or disables `line mode` optimization. /// When enabled, the diff algorithm tries to find the `lines` that have changes and apply diff on the same - /// + /// /// This optimization makes sense for text with many lines (~100s), defaults to `true` pub fn set_checklines(&mut self, checklines: bool) { self.checklines = checklines; @@ -151,9 +156,9 @@ impl DiffMatchPatch { } /// Set a timeout in number of `milliseconds`. This creates a cutoff for internal `recursive` function calls - /// + /// /// Defaults to `1000ms` (1 second) - /// + /// /// None means `infinite time` pub fn set_timeout(&mut self, tout: Option<u32>) { self.timeout = tout; @@ -172,10 +177,10 @@ impl DiffMatchPatch { } /// The `match_threshold` property determines the cut-off value for a valid match. - /// If `match_threshold` is closer to 0, the requirements for accuracy increase. + /// If `match_threshold` is closer to 0, the requirements for accuracy increase. /// If `match_threshold` is closer to 1 then it is more likely that a match will be found. /// The `match_threshold` is, the slower `match_main()` may take to compute. - /// + /// /// defaults to 0.5 pub fn set_match_threshold(&mut self, threshold: f32) { self.match_threshold = threshold @@ -191,6 +196,16 @@ impl DiffMatchPatch { self.delete_threshold } + /// When deleting a large block of text (over ~64 characters), how close does + /// the contents have to match the expected contents. (0.0 = perfection, + /// 1.0 = very loose). Note that `match_threshold` controls how closely the + /// end points of a delete need to match. + /// + /// Defaults to `0.5` + pub fn set_delete_threshold(&mut self, threshold: f32) { + self.delete_threshold = threshold; + } + // returns the configured max_bits fn match_max_bits(&self) -> usize { self.match_max_bits @@ -200,7 +215,12 @@ impl DiffMatchPatch { self.match_distance } - + /// How far to search for a match (0 = exact location, 1000+ = broad match). + /// A match this many characters away from the expected location will add + /// 1.0 to the score (0.0 is a perfect match). + pub fn set_match_distance(&mut self, distance: usize) { + self.match_distance = distance + } pub(crate) fn diff_internal<'a>( &self, @@ -1395,24 +1415,29 @@ impl DiffMatchPatch { } pub fn to_delta(diffs: &[Diff<u8>]) -> Vec<u8> { - let mut data = diffs.iter() + let mut data = diffs + .iter() .map(|diff| { - match diff.op() { - Ops::Insert => { - let encoded = percent_encode(diff.data(), ENCODE_SET).map(|v| v.as_bytes()).collect::<Vec<_>>().concat(); - // format!("+{encoded}") - ["+".as_bytes(), &encoded, "\t".as_bytes()].concat() - } - Ops::Delete => { - [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() - } - Ops::Equal => { - // format!("={}", diff.size()) - [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() + match diff.op() { + Ops::Insert => { + let encoded = percent_encode(diff.data(), ENCODE_SET) + .map(|v| v.as_bytes()) + .collect::<Vec<_>>() + .concat(); + // format!("+{encoded}") + ["+".as_bytes(), &encoded, "\t".as_bytes()].concat() + } + Ops::Delete => { + [b"-", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() + } + Ops::Equal => { + // format!("={}", diff.size()) + [b"=", diff.size().to_string().as_bytes(), "\t".as_bytes()].concat() + } } - } - }) - .collect::<Vec<_>>().concat(); + }) + .collect::<Vec<_>>() + .concat(); data.pop(); @@ -1434,22 +1459,20 @@ impl DiffMatchPatch { let param = &token[1..]; if opcode == Some(&b'+') { - let param = percent_decode(param).collect::<Vec<_>>(); diffs.push(Diff::insert(¶m)); - } else if opcode == Some(&b'-') || opcode == Some(&b'=') { - let n = match std::str::from_utf8(param) + let n = match std::str::from_utf8(param) .map_err(|_| crate::errors::Error::Utf8Error) - .and_then(|t| - t.parse::<isize>().map_err(|_| crate::errors::Error::InvalidInput - ) - ) { - Ok(n) => n, - Err(_) => { - return Err(crate::errors::Error::InvalidInput); - } - }; + .and_then(|t| { + t.parse::<isize>() + .map_err(|_| crate::errors::Error::InvalidInput) + }) { + Ok(n) => n, + Err(_) => { + return Err(crate::errors::Error::InvalidInput); + } + }; if n < 0 { return Err(crate::errors::Error::InvalidInput); @@ -1461,7 +1484,7 @@ impl DiffMatchPatch { return Err(crate::errors::Error::InvalidInput); } - let txt = &old[pointer .. new_pointer]; + let txt = &old[pointer..new_pointer]; pointer = new_pointer; if opcode == Some(&b'=') { @@ -1473,11 +1496,11 @@ impl DiffMatchPatch { return Err(crate::errors::Error::InvalidInput); } } - + if pointer != old.len() { return Err(crate::errors::Error::InvalidInput); } - + Ok(diffs) } @@ -2151,7 +2174,7 @@ impl Display for Patch { }; let segment = format!("{sign}{}\n", percent_encode(diff.data(), ENCODE_SET)); - + segments.push(segment) }); @@ -2590,7 +2613,7 @@ impl DiffMatchPatch { /// # Example /// ``` /// use diff_match_patch_rs::{DiffMatchPatch, Error}; - /// + /// /// # fn main() -> Result<(), Error> { /// let mut dmp = DiffMatchPatch::new(); /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines @@ -2610,7 +2633,7 @@ impl DiffMatchPatch { /// # Example /// ``` /// use diff_match_patch_rs::{DiffMatchPatch, Error}; - /// + /// /// # fn main() -> Result<(), Error> { /// let mut dmp = DiffMatchPatch::new(); /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines @@ -2646,7 +2669,7 @@ impl DiffMatchPatch { /// <div class="warning">Not Implemented</div> /// This function is similar to diff_cleanupSemantic, except that instead of optimising a diff to be human-readable, it optimises the diff to be efficient for machine processing. /// The results of both cleanup types are often the same. - /// + /// /// The efficiency cleanup is based on the observation that a diff made up of large numbers of small diffs edits may take longer to process (in downstream applications) or take more capacity to store or transmit than a smaller number of larger diffs. /// The diff_match_patch.Diff_EditCost property sets what the cost of handling a new edit is in terms of handling extra characters in an existing edit. /// The default value is 4, which means if expanding the length of a diff by three characters can eliminate one edit, then that optimisation will reduce the total costs. @@ -2813,7 +2836,7 @@ impl DiffMatchPatch { /// Given a text to search, a pattern to search for and an expected location in the text near which to find the pattern, return the location which matches closest. /// The function will search for the best match based on both the number of character errors between the pattern and the potential match, /// as well as the distance between the expected location and the potential match. - /// + /// /// The following example is a classic dilemma. There are two potential matches, one is close to the expected location but contains a one character error, /// the other is far from the expected location but is exactly the pattern sought after: match_main("abc12345678901234567890abbc", "abc", 26) /// Which result is returned (0 or 24) is determined by the diff_match_patch.match_distance property. @@ -2821,18 +2844,17 @@ impl DiffMatchPatch { /// For example, a distance of '0' requires the match be at the exact location specified, whereas a threshold of '1000' would require a perfect match to be within 800 /// characters of the expected location to be found using a 0.8 threshold (see below). /// The larger match_distance is, the slower match_main() may take to compute. This variable defaults to 1000. - /// + /// /// Another property is `diff_match_patch.match_threshold` which determines the cut-off value for a valid match. /// If `match_threshold` is closer to 0, the requirements for accuracy increase. /// If `match_threshold` is closer to 1 then it is more likely that a match will be found. /// The larger `match_threshold` is, the slower match_main() may take to compute. `match_threshold` defaults to 0.5 and can be updated by `dmp.set_match_threshold()` method. - /// + /// /// If no match is found, the function returns -1. pub fn match_main(&self, text: &str, pattern: &str, loc: usize) -> Option<usize> { self.match_internal(text.as_bytes(), pattern.as_bytes(), loc) } - /// Given two texts, or an already computed list of differences, return an array of patch objects. /// The third form PatchInput::TextDiffs(...) is preferred, use it if you happen to have that data available, otherwise this function will compute the missing pieces. /// TODO: add example @@ -2965,7 +2987,7 @@ impl DiffMatchPatch { /// The second element is an array of true/false values indicating which of the patches were successfully applied. /// [Note that this second element is not too useful since large patches may get broken up internally, resulting in a longer results list than the input with no way to figure out which patch succeeded or failed. /// A more informative API is in development.] - /// + /// /// The `match_distance` and `match_threshold` properties are used to evaluate patch application on text which does not match exactly. /// In addition, the diff_match_patch.patch_delete_threshold property determines how closely the text within a major (~64 character) delete needs to match the expected text. /// If patch_delete_threshold is closer to 0, then the deleted text must match the expected text more closely. @@ -2988,7 +3010,12 @@ impl DiffMatchPatch { #[cfg(test)] mod tests { - use crate::{dmp::{Diff, HalfMatch, LineToChars}, DiffMatchPatch, Error, Patch, PatchInput}; + use std::collections::HashMap; + + use crate::{ + dmp::{Diff, HalfMatch, LineToChars}, + DiffMatchPatch, Error, Patch, PatchInput, + }; #[test] fn test_prefix() { @@ -3736,10 +3763,7 @@ mod tests { let dmp = DiffMatchPatch::default(); // Both edges full. let mut patches = dmp.patch_make(PatchInput::Texts("", "test"))?; - assert_eq!( - "@@ -0,0 +1,4 @@\n+test\n", - dmp.patch_to_text(&patches) - ); + assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches)); dmp.patch_add_padding(&mut patches); assert_eq!( "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", @@ -3820,4 +3844,75 @@ mod tests { Ok(()) } + + #[test] + fn test_match_alphabet() { + // Initialise the bitmasks for Bitap. + // Unique. + assert_eq!( + HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]), + DiffMatchPatch::match_alphabet(b"abc") + ); + + // Duplicates. + assert_eq!( + HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]), + DiffMatchPatch::match_alphabet(b"abcaba") + ) + } + + #[test] + fn test_match_bitap() { + // Bitap algorithm. + let mut dmp = DiffMatchPatch { + match_distance: 100, + ..Default::default() + }; + + // Exact matches. + assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5)); + assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0)); + + // Fuzzy matches. + assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0)); + assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5)); + assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1)); + + // Overflow. + assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2)); + + // Threshold test. + dmp.match_threshold = 0.4; + assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); + + // dmp.`match_threshold` = 0.3; + dmp.match_threshold = 0.3; + assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); + + dmp.match_threshold = 0.; + assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1)); + + dmp.match_threshold = 0.5; + + // Multiple select. + assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3)); + assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5)); + + // Distance test. + dmp.match_distance = 10; + assert_eq!( + None, + dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) + ); + assert_eq!( + Some(0), + dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1) + ); + + dmp.match_distance = 1000; + assert_eq!( + Some(0), + dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) + ); + } } @@ -2,5 +2,5 @@ pub mod dmp; pub mod errors; pub mod traits; -pub use dmp::{DiffMatchPatch, PatchInput, Ops, Patch, Patches}; -pub use errors::Error;
\ No newline at end of file +pub use dmp::{DiffMatchPatch, Ops, Patch, PatchInput, Patches}; +pub use errors::Error; diff --git a/tests/test.rs b/tests/test.rs index 2c043b5..fd00a4e 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,10 +1,10 @@ -use std::{collections::HashMap, time::Instant}; +use std::time::Instant; use chrono::Utc; use diff_match_patch_rs::dmp::Diff; -use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, Patch, PatchInput}; +use diff_match_patch_rs::{DiffMatchPatch, Error, Ops, PatchInput}; // const tests = [ // 'testDiffIsDestructurable', @@ -339,7 +339,7 @@ fn test_diff_main() -> Result<(), Error> { let mut dmp = DiffMatchPatch::default(); let old = std::fs::read_to_string("testdata/txt_old.txt").unwrap(); let new = std::fs::read_to_string("testdata/txt_new.txt").unwrap(); - + let start = Instant::now(); let diff_yes_lm = dmp.diff_main(&old, &new); let yes_lm_dur = Instant::now() - start; @@ -350,8 +350,9 @@ fn test_diff_main() -> Result<(), Error> { let diff_no_lm = dmp.diff_main(&old, &new); let no_lm_dur = Instant::now() - start; assert!(diff_no_lm.is_ok()); - + assert!(no_lm_dur > yes_lm_dur); + Ok(()) } @@ -368,27 +369,24 @@ fn test_diff_delta() -> Result<(), Error> { Diff::insert(b"old dog"), ]; let txt_old = "jumps over the lazy".as_bytes(); - assert_eq!( - txt_old, - DiffMatchPatch::diff_text_old(&diffs) - ); + assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs)); let delta = DiffMatchPatch::to_delta(&diffs); assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta); // Convert delta string into a diff. assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?); - + // Generates error (19 != 20). assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err()); - + // Generates error (19 != 18). assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err()); - + // Test deltas with special characters. let diffs = vec![ Diff::equal("\u{0680} \x00 \t %".as_bytes()), Diff::delete("\u{0681} \x01 \n ^".as_bytes()), - Diff::insert("\u{0682} \x02 \\ |".as_bytes()) + Diff::insert("\u{0682} \x02 \\ |".as_bytes()), ]; let txt_old = DiffMatchPatch::diff_text_old(&diffs); assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old); @@ -399,14 +397,20 @@ fn test_diff_delta() -> Result<(), Error> { assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?); // Verify pool of unchanged characters. - let diffs = vec![ - Diff::insert("A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes()) - ]; + let diffs = vec![Diff::insert( + "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ".as_bytes(), + )]; let txt_new = DiffMatchPatch::diff_text_new(&diffs); - assert_eq!("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&txt_new).unwrap()); + assert_eq!( + "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", + std::str::from_utf8(&txt_new).unwrap() + ); let delta = DiffMatchPatch::to_delta(&diffs); - assert_eq!("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", std::str::from_utf8(&delta).unwrap()); + assert_eq!( + "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", + std::str::from_utf8(&delta).unwrap() + ); // Convert delta string into a diff. assert_eq!(diffs, DiffMatchPatch::from_delta("".as_bytes(), &delta)?); @@ -441,12 +445,7 @@ fn test_patch_from_text() -> Result<(), Error> { assert!(dmp.patch_from_text("")?.is_empty()); let strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assert_eq!( - strp, - dmp.patch_from_text( - strp - )?[0].to_string() - ); + assert_eq!(strp, dmp.patch_from_text(strp)?[0].to_string()); assert_eq!( "@@ -1 +1 @@\n-a\n+b\n", @@ -515,7 +514,7 @@ fn test_patch_make() -> Result<(), Error> { "`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?", ))?; - + assert_eq!( "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_to_text(&patches) @@ -565,17 +564,13 @@ fn test_diff_text() { ); } - #[test] fn test_patch_apply() -> Result<(), Error> { let mut dmp = DiffMatchPatch::default(); let patches = dmp.patch_make(PatchInput::Texts("", ""))?; - let (txt, results) = dmp.patch_apply_internal(&patches, b"Hello world.")?; - assert_eq!( - format!("{}\t{}", std::str::from_utf8(&txt).unwrap(), results.len()), - "Hello world.\t0" - ); + let (txt, results) = dmp.patch_apply(&patches, "Hello world.")?; + assert_eq!(format!("{}\t{}", txt, results.len()), "Hello world.\t0"); let patches = dmp.patch_make(PatchInput::Texts( "The quick brown fox jumps over the lazy dog.", @@ -585,31 +580,28 @@ fn test_patch_apply() -> Result<(), Error> { // Exact match assert_eq!( ( - b"That quick brown fox jumped over a lazy dog.".to_vec(), + "That quick brown fox jumped over a lazy dog.".to_string(), vec![true, true] ), - dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")? + dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")? ); // Partial match assert_eq!( ( - b"That quick red rabbit jumped over a tired tiger.".to_vec(), + "That quick red rabbit jumped over a tired tiger.".to_string(), vec![true, true] ), - dmp.patch_apply_internal( - &patches, - b"The quick red rabbit jumps over the tired tiger." - )? + dmp.patch_apply(&patches, "The quick red rabbit jumps over the tired tiger.")? ); // Failed match assert_eq!( ( - b"I am the very model of a modern major general.".to_vec(), + "I am the very model of a modern major general.".to_string(), vec![false, false] ), - dmp.patch_apply_internal(&patches, b"I am the very model of a modern major general.")? + dmp.patch_apply(&patches, "I am the very model of a modern major general.")? ); // Big delete, small change @@ -617,47 +609,70 @@ fn test_patch_apply() -> Result<(), Error> { "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; - assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y")?); + assert_eq!( + ("xabcy".to_string(), vec![true, true]), + dmp.patch_apply( + &patches, + "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y" + )? + ); // Big delete, large change let patches = dmp.patch_make(PatchInput::Texts( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; - assert_eq!((b"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y".to_vec(), vec![false, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?); + assert_eq!( + ( + "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y" + .to_string(), + vec![false, true] + ), + dmp.patch_apply( + &patches, + "x12345678901234567890---------------++++++++++---------------12345678901234567890y" + )? + ); - dmp.delete_threshold = 0.6; + dmp.set_delete_threshold(0.6); let patches = dmp.patch_make(PatchInput::Texts( "x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy", ))?; - assert_eq!((b"xabcy".to_vec(), vec![true, true]), dmp.patch_apply_internal(&patches, b"x12345678901234567890---------------++++++++++---------------12345678901234567890y")?); - dmp.delete_threshold = 0.5; + assert_eq!( + ("xabcy".to_string(), vec![true, true]), + dmp.patch_apply( + &patches, + "x12345678901234567890---------------++++++++++---------------12345678901234567890y" + )? + ); + dmp.set_delete_threshold(0.5); // Compesate for failed patch - dmp.match_threshold = 0.; - dmp.match_distance = 0; + dmp.set_match_threshold(0.); + dmp.set_match_distance(0); let patches = dmp.patch_make(PatchInput::Texts( "abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890", ))?; assert_eq!( ( - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_vec(), + "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890".to_string(), vec![false, true] ), - dmp.patch_apply_internal( + dmp.patch_apply( &patches, - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890" )? ); - dmp.match_threshold = 0.5; - dmp.match_distance = 1000; + + dmp.set_match_threshold(0.5); + dmp.set_match_distance(1000); // No side-effects - kinds useless cos patches is not mutable in rust let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; let srcstr = dmp.patch_to_text(&patches); - dmp.patch_apply_internal(&patches, b"")?; + dmp.patch_apply(&patches, "")?; assert_eq!(srcstr, dmp.patch_to_text(&patches)); let patches = dmp.patch_make(PatchInput::Texts( @@ -665,127 +680,56 @@ fn test_patch_apply() -> Result<(), Error> { "Woof", ))?; let srcstr = dmp.patch_to_text(&patches); - dmp.patch_apply_internal(&patches, b"The quick brown fox jumps over the lazy dog.")?; + dmp.patch_apply(&patches, "The quick brown fox jumps over the lazy dog.")?; assert_eq!(srcstr, dmp.patch_to_text(&patches)); // Edge exact match let patches = dmp.patch_make(PatchInput::Texts("", "test"))?; assert_eq!( - (b"test".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"")? + ("test".to_string(), vec![true]), + dmp.patch_apply(&patches, "")? ); // Near edge exact match let patches = dmp.patch_make(PatchInput::Texts("XY", "XtestY"))?; assert_eq!( - (b"XtestY".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"XY")? + ("XtestY".to_string(), vec![true]), + dmp.patch_apply(&patches, "XY")? ); // Edge partial match let patches = dmp.patch_make(PatchInput::Texts("y", "y123"))?; assert_eq!( - (b"x123".to_vec(), vec![true]), - dmp.patch_apply_internal(&patches, b"x")? + ("x123".to_string(), vec![true]), + dmp.patch_apply(&patches, "x")? ); Ok(()) } #[test] -fn test_match_alphabet() { - // Initialise the bitmasks for Bitap. - // Unique. - assert_eq!( - HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]), - DiffMatchPatch::match_alphabet(b"abc") - ); - - // Duplicates. - assert_eq!( - HashMap::from([(b'a', 37), (b'b', 18), (b'c', 8)]), - DiffMatchPatch::match_alphabet(b"abcaba") - ) -} - -#[test] -fn test_match_bitap() { - // Bitap algorithm. - let mut dmp = DiffMatchPatch { - match_distance: 100, - ..Default::default() - }; - - // Exact matches. - assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5)); - assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0)); - - // Fuzzy matches. - assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0)); - assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5)); - assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1)); - - // Overflow. - assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2)); - - // Threshold test. - dmp.match_threshold = 0.4; - assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); - - // dmp.`match_threshold` = 0.3; - dmp.match_threshold = 0.3; - assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1)); - - dmp.match_threshold = 0.; - assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1)); - - dmp.match_threshold = 0.5; - - // Multiple select. - assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3)); - assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5)); - - // Distance test. - dmp.match_distance = 10; - assert_eq!( - None, - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) - ); - assert_eq!( - Some(0), - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1) - ); - - dmp.match_distance = 1000; - assert_eq!( - Some(0), - dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24) - ); -} - -#[test] fn test_match_main() { let dmp = DiffMatchPatch::default(); // Full match. // Shortcut matches. - assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdef", 1000)); - assert_eq!(None, dmp.match_internal(b"", b"abcdef", 1)); - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"", 3)); - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"de", 3)); + assert_eq!(Some(0), dmp.match_main("abcdef", "abcdef", 1000)); + assert_eq!(None, dmp.match_main("", "abcdef", 1)); + assert_eq!(Some(3), dmp.match_main("abcdef", "", 3)); + assert_eq!(Some(3), dmp.match_main("abcdef", "de", 3)); // Beyond end match. - assert_eq!(Some(3), dmp.match_internal(b"abcdef", b"defy", 4)); + assert_eq!(Some(3), dmp.match_main("abcdef", "defy", 4)); // Oversized pattern. - assert_eq!(Some(0), dmp.match_internal(b"abcdef", b"abcdefy", 0)); + assert_eq!(Some(0), dmp.match_main("abcdef", "abcdefy", 0)); // Complex match. assert_eq!( Some(4), - dmp.match_internal( - b"I am the very model of a modern major general.", - b" that berry ", + dmp.match_main( + "I am the very model of a modern major general.", + " that berry ", 5 ) ); -}
\ No newline at end of file +} |