my fork of dmp
Added `delta` example
| -rw-r--r-- | examples/delta.rs | 30 | ||||
| -rw-r--r-- | src/dmp.rs | 174 | ||||
| -rw-r--r-- | tests/test.rs | 69 |
3 files changed, 142 insertions, 131 deletions
diff --git a/examples/delta.rs b/examples/delta.rs index 691e9f9..4368625 100644 --- a/examples/delta.rs +++ b/examples/delta.rs @@ -27,26 +27,26 @@ fn at_source() -> Result<String, Error> { // create a list of diffs let diffs = dmp.diff_main::<Efficient>(TXT_OLD, TXT_NEW)?; - // Now, we are going to create a list of `patches` to be applied to the old text to get the new text - let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?; + // When dealing with large text blocks and if you want to transmit this diff, `delta` will give you a minimal over the air representation of diffs + // We'll use this delta string to recreate `diffs` at the destination and then create patches to apply + let delta = dmp.diff_to_delta(&diffs)?; - // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese - let patch_txt = dmp.patch_to_text(&patches); - - // lets see how our patches look - println!("{patch_txt:?}"); + // lets see how our delta looks + println!("{delta:?}"); // You should see something like this - // @@ -22,225 +22,250 @@\n f a \n-m\n+carto\n o\n-der\n n \n-Major-Ge\n+i\n n\n-er\n+dividu\n al,%0A\n-I've\n+My\n \n-i\n+a\n n\n-for\n+i\n mation\n+'s\n \n-veget\n+comic\n a\n-b\n l\n-e\n , \n-a\n+u\n n\n-im\n+usu\n al, and \n+whi\n m\n+s\n i\n-ner\n+c\n al,%0AI\n+'m\n \n-know \n+qui\n t\n-h\n e \n-kings of Engl\n a\n-n\n d\n-,\n+ept\n a\n+t fu\n n\n-d\n+ny\n \n-I\n+gags,\n \n-qu\n+c\n o\n-t\n+m\n e\n+dic\n the\n+ory\n \n-fights\n+I\n h\n-isto\n+ave \n r\n-ic\n+e\n a\n-l\n+d\n ,%0AFrom \n-M\n+wicked puns \n a\n-ra\n+nd s\n t\n-h\n+upid j\n o\n-n\n+kes\n to \n-W\n+anvils th\n at\n-e\n+ d\n r\n-l\n o\n+p \n o\n-, i\n n \n+y\n o\n-rde\n+u\n r \n-cat\n+h\n e\n-goric\n a\n-l\n+d\n .%0A%0A\n-L\n+Now, l\n et's \n-sta\n+explo\n r\n-t with\n+e\n some \n-bas\n+emot\n i\n-c\n+onal extreme\n s %F0%9F\n-%98\n+%8C\n %8A. W\n@@ -282,55 +282,53 @@\n our \n+ec\n sta\n-ndard sm\n+t\n i\n-ley\n+c\n face %F0%9F\n-%99%82\n+%A4%A9\n , your \n+deva\n s\n+t\n a\n+te\n d face \n-%E2\n+%F0%9F\n %98\n-%B9%EF%B8%8F\n+%AD\n , an\n@@ -338,53 +338,60 @@\n our \n-ang\n+utte\n r\n+l\n y \n+confused \n face %F0%9F\n-%98%A0\n+%A4%AF\n . But \n-w\n+th\n a\n-i\n t\n-, there\n 's \n-m\n+n\n o\n-re\n+t all\n ! %F0%9F%A4\n-%A9\n+%94\n We'\n@@ -411,20 +411,14 @@\n ome \n-more comp\n+subt\n le\n-x\n emo\n@@ -435,78 +435,15 @@\n %F0%9F%98\n-%8D, %F0%9F%A4%A4, and %F0%9F%9A%80. And let's not forget about the classics: %F0%9F%98%89\n+%90\n , %F0%9F\n-%91%8D\n+%99%83\n , an\n@@ -451,6 +451,6 @@\n %F0%9F%91\n-%8F\n+%80\n .\n + // =25\t-1\t+carto\t=1\t-3\t=2\t-8\t+i\t=1\t-2\t+dividu\t=4\t-4\t+My\t=1\t-1\t+a\t=1\t-3\t+i\t=6\t+'s\t=1\t-5\t+comic\t=1\t-1\t=1\t-1\t=2\t-1\t+u\t=1\t-2\t+usu\t=8\t+whi\t=1\t+s\t=1\t-3\t+c\t=5\t+'m\t=1\t-5\t+qui\t=1\t-1\t=2\t-13\t=1\t-1\t=1\t-1\t+ept\t=2\t+t fu\t=1\t-1\t+ny\t=1\t-1\t+gags,\t=1\t-2\t+c\t=1\t-1\t+m\t=1\t+dic\t=4\t+ory\t=1\t-6\t+I\t=2\t-4\t+ave \t=1\t-2\t+e\t=1\t-1\t+d\t=7\t-1\t+wicked puns \t=1\t-2\t+nd s\t=1\t-1\t+upid j\t=1\t-1\t+kes\t=4\t-1\t+anvils th\t=2\t-1\t+ d\t=1\t-1\t=1\t+p \t=1\t-3\t=2\t+y\t=1\t-3\t+u\t=2\t-3\t+h\t=1\t-5\t=1\t-1\t+d\t=3\t-1\t+Now, l\t=5\t-3\t+explo\t=1\t-6\t+e\t=6\t-3\t+emot\t=1\t-1\t+onal extreme\t=4\t-1\t+%8C\t=18\t+ec\t=3\t-8\t+t\t=1\t-3\t+c\t=8\t-2\t+%A4%A9\t=7\t+deva\t=1\t+t\t=1\t+te\t=7\t-1\t+%F0%9F\t=1\t-4\t+%AD\t=11\t-3\t+utte\t=1\t+l\t=2\t+confused \t=7\t-2\t+%A4%AF\t=6\t-1\t+th\t=1\t-1\t=1\t-7\t=3\t-1\t+n\t=1\t-2\t+t all\t=5\t-1\t+%94\t=21\t-9\t+subt\t=2\t-1\t=18\t-64\t+%90\t=4\t-2\t+%99%83\t=9\t-1\t+%80\t=1 - Ok(patch_txt) + Ok(delta) } -fn at_destination(patches: &str) -> Result<(), Error> { +fn at_destination(delta: &str) -> Result<(), Error> { // initializing the module let dmp = DiffMatchPatch::new(); - // lets recreate the diffs from patches - let patches = dmp.patch_from_text::<Efficient>(patches)?; + // lets recreate the diffs from the minimal `delta` string + let delta = dmp.diff_from_delta::<Efficient>(TXT_OLD, delta)?; + // Additional step of conveting `delta` -> `patches` + let patches = dmp.patch_make(PatchInput::new_diffs(&delta))?; // Now, lets apply these patches to the `old_txt` which is the original to get the new text let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?; @@ -71,11 +71,11 @@ fn at_destination(patches: &str) -> Result<(), Error> { } fn main() -> Result<(), Error> { - // At the source of diff where the old text is being edited we'll create a set of patches - let patches = at_source()?; + // At the source of diff where the old text is being edited we'll create a `delta` - a `delta` is a minimal representation of `diffs` + let delta = at_source()?; // We'll send this diff to some destination e.g. db or the client where these changes are going to be applied // The destination will receive the patch string and will apply the patches to recreate the edits - at_destination(&patches) + at_destination(&delta) } @@ -1409,93 +1409,6 @@ impl DiffMatchPatch { } } - #[inline] - pub fn to_delta<T: DType>(diffs: &[Diff<T>]) -> Vec<T> { - let mut data = diffs - .iter() - .map(|diff| { - match diff.op() { - Ops::Insert => { - let encoded = T::percent_encode(diff.data()); - // format!("+{encoded}") - [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat() - } - Ops::Delete => [ - &[T::from_char('-')], - &T::from_str(diff.size().to_string().as_str())[..], - &[T::from_char('\t')], - ] - .concat(), - Ops::Equal => [ - &[T::from_char('=')], - &T::from_str(diff.size().to_string().as_str())[..], - &[T::from_char('\t')], - ] - .concat(), - } - }) - .collect::<Vec<_>>() - .concat(); - - data.pop(); - - data - } - - pub fn from_delta<T: DType>( - old: &[T], - delta: &[T], - ) -> Result<Vec<Diff<T>>, crate::errors::Error> { - let mut pointer = 0; // cursor to text - let mut diffs = vec![]; - - for token in delta.split(|&k| k == T::from_char('\t')) { - if token.is_empty() { - continue; - } - - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - let opcode = token.first(); - let param = &token[1..]; - - if opcode == Some(&T::from_char('+')) { - let param = T::percent_decode(param); - diffs.push(Diff::insert(¶m)); - } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) { - let n = T::to_string(param)? - .parse::<isize>() - .map_err(|_| Error::Utf8Error)?; - if n < 0 { - return Err(crate::errors::Error::InvalidInput); - } - - let n = n as usize; - let new_pointer = pointer + n; - if new_pointer > old.len() { - return Err(crate::errors::Error::InvalidInput); - } - - let txt = &old[pointer..new_pointer]; - pointer = new_pointer; - - if opcode == Some(&T::from_char('=')) { - diffs.push(Diff::equal(txt)) - } else { - diffs.push(Diff::delete(txt)) - } - } else { - return Err(crate::errors::Error::InvalidInput); - } - } - - if pointer != old.len() { - return Err(crate::errors::Error::InvalidInput); - } - - Ok(diffs) - } - // Reduce the number of edits by eliminating operationally trivial equalities. #[inline] fn cleanup_efficiency<T: DType>(&self, diffs: &mut Vec<Diff<T>>) { @@ -2903,6 +2816,93 @@ impl DiffMatchPatch { Ok(patches) } + pub fn diff_to_delta<T: DType>(&self, diffs: &[Diff<T>]) -> Result<String, crate::Error> { + let mut data = diffs + .iter() + .map(|diff| match diff.op() { + Ops::Insert => { + let encoded = T::percent_encode(diff.data()); + [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat() + } + Ops::Delete => [ + &[T::from_char('-')], + &T::from_str(diff.size().to_string().as_str())[..], + &[T::from_char('\t')], + ] + .concat(), + Ops::Equal => [ + &[T::from_char('=')], + &T::from_str(diff.size().to_string().as_str())[..], + &[T::from_char('\t')], + ] + .concat(), + }) + .collect::<Vec<_>>() + .concat(); + + data.pop(); + + T::to_string(&data) + } + + pub fn diff_from_delta<T: DType>( + &self, + old: &str, + delta: &str, + ) -> Result<Vec<Diff<T>>, crate::errors::Error> { + let mut pointer = 0; // cursor to text + let mut diffs = vec![]; + + let old = T::from_str(old); + let delta = T::from_str(delta); + + for token in delta.split(|&k| k == T::from_char('\t')) { + if token.is_empty() { + continue; + } + + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + let opcode = token.first(); + let param = &token[1..]; + + if opcode == Some(&T::from_char('+')) { + let param = T::percent_decode(param); + diffs.push(Diff::insert(¶m)); + } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) { + let n = T::to_string(param)? + .parse::<isize>() + .map_err(|_| Error::Utf8Error)?; + if n < 0 { + return Err(crate::errors::Error::InvalidInput); + } + + let n = n as usize; + let new_pointer = pointer + n; + if new_pointer > old.len() { + return Err(crate::errors::Error::InvalidInput); + } + + let txt = &old[pointer..new_pointer]; + pointer = new_pointer; + + if opcode == Some(&T::from_char('=')) { + diffs.push(Diff::equal(txt)) + } else { + diffs.push(Diff::delete(txt)) + } + } else { + return Err(crate::errors::Error::InvalidInput); + } + } + + if pointer != old.len() { + return Err(crate::errors::Error::InvalidInput); + } + + Ok(diffs) + } + /// Applies a list of patches to text1. The first element of the return value is the newly patched text. /// The second element is an array of true/false values indicating which of the patches were successfully applied. /// [Note that this second element is not too useful since large patches may get broken up internally, resulting in a longer results list than the input with no way to figure out which patch succeeded or failed. diff --git a/tests/test.rs b/tests/test.rs index a05ee1c..4f86462 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -654,6 +654,8 @@ fn test_diff_main_compat() -> Result<(), Error> { #[test] fn test_diff_delta() -> Result<(), Error> { + let dmp = DiffMatchPatch::new(); + let diffs = vec![ Diff::equal(b"jump"), Diff::delete(b"s"), @@ -664,19 +666,23 @@ fn test_diff_delta() -> Result<(), Error> { Diff::equal(b" lazy"), Diff::insert(b"old dog"), ]; - let txt_old = "jumps over the lazy".as_bytes(); - assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs)); + let txt_old = "jumps over the lazy"; + assert_eq!(txt_old.as_bytes(), DiffMatchPatch::diff_text_old(&diffs)); - let delta = DiffMatchPatch::to_delta(&diffs); - assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta); + let delta = dmp.diff_to_delta(&diffs)?; + assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", &delta); // Convert delta string into a diff. - assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?); + assert_eq!(diffs, dmp.diff_from_delta(txt_old, &delta)?); // Generates error (19 != 20). - assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err()); + assert!(dmp + .diff_from_delta::<Efficient>(&[txt_old, "+"].concat()[..], &delta) + .is_err()); // Generates error (19 != 18). - assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err()); + assert!(dmp + .diff_from_delta::<Efficient>(&txt_old[1..], &delta) + .is_err()); let diffs = vec![ Diff::equal(&"jump".chars().collect::<Vec<_>>()[..]), @@ -688,24 +694,26 @@ fn test_diff_delta() -> Result<(), Error> { Diff::equal(&" lazy".chars().collect::<Vec<_>>()[..]), Diff::insert(&"old dog".chars().collect::<Vec<_>>()[..]), ]; - let txt_old = &"jumps over the lazy".chars().collect::<Vec<_>>()[..]; - assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs)); - - let delta = DiffMatchPatch::to_delta(&diffs); + // let txt_old = &"jumps over the lazy".chars().collect::<Vec<_>>()[..]; assert_eq!( - &"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog" - .chars() - .collect::<Vec<_>>()[..], - &delta + &txt_old.chars().collect::<Vec<_>>()[..], + DiffMatchPatch::diff_text_old(&diffs) ); + + let delta = dmp.diff_to_delta(&diffs)?; + assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", &delta); // Convert delta string into a diff. - assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?); + assert_eq!(diffs, dmp.diff_from_delta(txt_old, &delta)?); // Generates error (19 != 20). - assert!(DiffMatchPatch::from_delta(&[txt_old, &['+']].concat()[..], &delta).is_err()); + assert!(dmp + .diff_from_delta::<Compat>(&[txt_old, "+"].concat()[..], &delta) + .is_err()); // Generates error (19 != 18). - assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err()); + assert!(dmp + .diff_from_delta::<Compat>(&txt_old[1..], &delta) + .is_err()); // Test deltas with special characters. let diffs = vec![ @@ -715,11 +723,14 @@ fn test_diff_delta() -> Result<(), Error> { ]; let txt_old = DiffMatchPatch::diff_text_old(&diffs); assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old); - let delta = DiffMatchPatch::to_delta(&diffs); + let delta = dmp.diff_to_delta(&diffs)?; - assert_eq!(b"=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]); + assert_eq!("=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]); // Convert delta string into a diff. - assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?); + assert_eq!( + &diffs, + &dmp.diff_from_delta(std::str::from_utf8(&txt_old).unwrap(), &delta)? + ); let diffs = vec![ Diff::equal(&"\u{0680} \x00 \t %".chars().collect::<Vec<_>>()[..]), @@ -733,14 +744,14 @@ fn test_diff_delta() -> Result<(), Error> { .collect::<Vec<_>>()[..], txt_old ); - let delta = DiffMatchPatch::to_delta(&diffs); + let delta = dmp.diff_to_delta(&diffs)?; + assert_eq!("=7\t-7\t+%DA%82 %02 %5C %7C", &delta); + // Convert delta string into a diff. assert_eq!( - &"=7\t-7\t+%DA%82 %02 %5C %7C".chars().collect::<Vec<_>>()[..], - &delta[..] + &diffs, + &dmp.diff_from_delta(&txt_old.iter().collect::<String>(), &delta)? ); - // Convert delta string into a diff. - assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?); // Verify pool of unchanged characters. let diffs = vec![Diff::insert( @@ -763,14 +774,14 @@ fn test_diff_delta() -> Result<(), Error> { &txt_new.iter().collect::<String>() ); - let delta = DiffMatchPatch::to_delta(&diffs); + let delta = dmp.diff_to_delta(&diffs)?; assert_eq!( "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", - &delta.iter().collect::<String>() + &delta ); // Convert delta string into a diff. - assert_eq!(diffs, DiffMatchPatch::from_delta(&[], &delta)?); + assert_eq!(diffs, dmp.diff_from_delta("", &delta)?); Ok(()) } |