my fork of dmp
Added `delta` example
Anubhab Bandyopadhyay 2024-08-27
parent 6eeb88f · commit 8e8ae4a
-rw-r--r--examples/delta.rs30
-rw-r--r--src/dmp.rs174
-rw-r--r--tests/test.rs69
3 files changed, 142 insertions, 131 deletions
diff --git a/examples/delta.rs b/examples/delta.rs
index 691e9f9..4368625 100644
--- a/examples/delta.rs
+++ b/examples/delta.rs
@@ -27,26 +27,26 @@ fn at_source() -> Result<String, Error> {
// create a list of diffs
let diffs = dmp.diff_main::<Efficient>(TXT_OLD, TXT_NEW)?;
- // Now, we are going to create a list of `patches` to be applied to the old text to get the new text
- let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
+ // When dealing with large text blocks and if you want to transmit this diff, `delta` will give you a minimal over the air representation of diffs
+ // We'll use this delta string to recreate `diffs` at the destination and then create patches to apply
+ let delta = dmp.diff_to_delta(&diffs)?;
- // in the real world you are going to transmit or store this diff serialized to undiff format to be consumed or used somewhere elese
- let patch_txt = dmp.patch_to_text(&patches);
-
- // lets see how our patches look
- println!("{patch_txt:?}");
+ // lets see how our delta looks
+ println!("{delta:?}");
// You should see something like this
- // @@ -22,225 +22,250 @@\n f a \n-m\n+carto\n o\n-der\n n \n-Major-Ge\n+i\n n\n-er\n+dividu\n al,%0A\n-I've\n+My\n \n-i\n+a\n n\n-for\n+i\n mation\n+'s\n \n-veget\n+comic\n a\n-b\n l\n-e\n , \n-a\n+u\n n\n-im\n+usu\n al, and \n+whi\n m\n+s\n i\n-ner\n+c\n al,%0AI\n+'m\n \n-know \n+qui\n t\n-h\n e \n-kings of Engl\n a\n-n\n d\n-,\n+ept\n a\n+t fu\n n\n-d\n+ny\n \n-I\n+gags,\n \n-qu\n+c\n o\n-t\n+m\n e\n+dic\n the\n+ory\n \n-fights\n+I\n h\n-isto\n+ave \n r\n-ic\n+e\n a\n-l\n+d\n ,%0AFrom \n-M\n+wicked puns \n a\n-ra\n+nd s\n t\n-h\n+upid j\n o\n-n\n+kes\n to \n-W\n+anvils th\n at\n-e\n+ d\n r\n-l\n o\n+p \n o\n-, i\n n \n+y\n o\n-rde\n+u\n r \n-cat\n+h\n e\n-goric\n a\n-l\n+d\n .%0A%0A\n-L\n+Now, l\n et's \n-sta\n+explo\n r\n-t with\n+e\n some \n-bas\n+emot\n i\n-c\n+onal extreme\n s %F0%9F\n-%98\n+%8C\n %8A. W\n@@ -282,55 +282,53 @@\n our \n+ec\n sta\n-ndard sm\n+t\n i\n-ley\n+c\n face %F0%9F\n-%99%82\n+%A4%A9\n , your \n+deva\n s\n+t\n a\n+te\n d face \n-%E2\n+%F0%9F\n %98\n-%B9%EF%B8%8F\n+%AD\n , an\n@@ -338,53 +338,60 @@\n our \n-ang\n+utte\n r\n+l\n y \n+confused \n face %F0%9F\n-%98%A0\n+%A4%AF\n . But \n-w\n+th\n a\n-i\n t\n-, there\n 's \n-m\n+n\n o\n-re\n+t all\n ! %F0%9F%A4\n-%A9\n+%94\n We'\n@@ -411,20 +411,14 @@\n ome \n-more comp\n+subt\n le\n-x\n emo\n@@ -435,78 +435,15 @@\n %F0%9F%98\n-%8D, %F0%9F%A4%A4, and %F0%9F%9A%80. And let's not forget about the classics: %F0%9F%98%89\n+%90\n , %F0%9F\n-%91%8D\n+%99%83\n , an\n@@ -451,6 +451,6 @@\n %F0%9F%91\n-%8F\n+%80\n .\n
+ // =25\t-1\t+carto\t=1\t-3\t=2\t-8\t+i\t=1\t-2\t+dividu\t=4\t-4\t+My\t=1\t-1\t+a\t=1\t-3\t+i\t=6\t+'s\t=1\t-5\t+comic\t=1\t-1\t=1\t-1\t=2\t-1\t+u\t=1\t-2\t+usu\t=8\t+whi\t=1\t+s\t=1\t-3\t+c\t=5\t+'m\t=1\t-5\t+qui\t=1\t-1\t=2\t-13\t=1\t-1\t=1\t-1\t+ept\t=2\t+t fu\t=1\t-1\t+ny\t=1\t-1\t+gags,\t=1\t-2\t+c\t=1\t-1\t+m\t=1\t+dic\t=4\t+ory\t=1\t-6\t+I\t=2\t-4\t+ave \t=1\t-2\t+e\t=1\t-1\t+d\t=7\t-1\t+wicked puns \t=1\t-2\t+nd s\t=1\t-1\t+upid j\t=1\t-1\t+kes\t=4\t-1\t+anvils th\t=2\t-1\t+ d\t=1\t-1\t=1\t+p \t=1\t-3\t=2\t+y\t=1\t-3\t+u\t=2\t-3\t+h\t=1\t-5\t=1\t-1\t+d\t=3\t-1\t+Now, l\t=5\t-3\t+explo\t=1\t-6\t+e\t=6\t-3\t+emot\t=1\t-1\t+onal extreme\t=4\t-1\t+%8C\t=18\t+ec\t=3\t-8\t+t\t=1\t-3\t+c\t=8\t-2\t+%A4%A9\t=7\t+deva\t=1\t+t\t=1\t+te\t=7\t-1\t+%F0%9F\t=1\t-4\t+%AD\t=11\t-3\t+utte\t=1\t+l\t=2\t+confused \t=7\t-2\t+%A4%AF\t=6\t-1\t+th\t=1\t-1\t=1\t-7\t=3\t-1\t+n\t=1\t-2\t+t all\t=5\t-1\t+%94\t=21\t-9\t+subt\t=2\t-1\t=18\t-64\t+%90\t=4\t-2\t+%99%83\t=9\t-1\t+%80\t=1
- Ok(patch_txt)
+ Ok(delta)
}
-fn at_destination(patches: &str) -> Result<(), Error> {
+fn at_destination(delta: &str) -> Result<(), Error> {
// initializing the module
let dmp = DiffMatchPatch::new();
- // lets recreate the diffs from patches
- let patches = dmp.patch_from_text::<Efficient>(patches)?;
+ // lets recreate the diffs from the minimal `delta` string
+ let delta = dmp.diff_from_delta::<Efficient>(TXT_OLD, delta)?;
+ // Additional step of conveting `delta` -> `patches`
+ let patches = dmp.patch_make(PatchInput::new_diffs(&delta))?;
// Now, lets apply these patches to the `old_txt` which is the original to get the new text
let (new_txt, ops) = dmp.patch_apply(&patches, TXT_OLD)?;
@@ -71,11 +71,11 @@ fn at_destination(patches: &str) -> Result<(), Error> {
}
fn main() -> Result<(), Error> {
- // At the source of diff where the old text is being edited we'll create a set of patches
- let patches = at_source()?;
+ // At the source of diff where the old text is being edited we'll create a `delta` - a `delta` is a minimal representation of `diffs`
+ let delta = at_source()?;
// We'll send this diff to some destination e.g. db or the client where these changes are going to be applied
// The destination will receive the patch string and will apply the patches to recreate the edits
- at_destination(&patches)
+ at_destination(&delta)
}
diff --git a/src/dmp.rs b/src/dmp.rs
index 9177707..d8a3f67 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1409,93 +1409,6 @@ impl DiffMatchPatch {
}
}
- #[inline]
- pub fn to_delta<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
- let mut data = diffs
- .iter()
- .map(|diff| {
- match diff.op() {
- Ops::Insert => {
- let encoded = T::percent_encode(diff.data());
- // format!("+{encoded}")
- [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat()
- }
- Ops::Delete => [
- &[T::from_char('-')],
- &T::from_str(diff.size().to_string().as_str())[..],
- &[T::from_char('\t')],
- ]
- .concat(),
- Ops::Equal => [
- &[T::from_char('=')],
- &T::from_str(diff.size().to_string().as_str())[..],
- &[T::from_char('\t')],
- ]
- .concat(),
- }
- })
- .collect::<Vec<_>>()
- .concat();
-
- data.pop();
-
- data
- }
-
- pub fn from_delta<T: DType>(
- old: &[T],
- delta: &[T],
- ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
- let mut pointer = 0; // cursor to text
- let mut diffs = vec![];
-
- for token in delta.split(|&k| k == T::from_char('\t')) {
- if token.is_empty() {
- continue;
- }
-
- // Each token begins with a one character parameter which specifies the
- // operation of this token (delete, insert, equality).
- let opcode = token.first();
- let param = &token[1..];
-
- if opcode == Some(&T::from_char('+')) {
- let param = T::percent_decode(param);
- diffs.push(Diff::insert(&param));
- } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) {
- let n = T::to_string(param)?
- .parse::<isize>()
- .map_err(|_| Error::Utf8Error)?;
- if n < 0 {
- return Err(crate::errors::Error::InvalidInput);
- }
-
- let n = n as usize;
- let new_pointer = pointer + n;
- if new_pointer > old.len() {
- return Err(crate::errors::Error::InvalidInput);
- }
-
- let txt = &old[pointer..new_pointer];
- pointer = new_pointer;
-
- if opcode == Some(&T::from_char('=')) {
- diffs.push(Diff::equal(txt))
- } else {
- diffs.push(Diff::delete(txt))
- }
- } else {
- return Err(crate::errors::Error::InvalidInput);
- }
- }
-
- if pointer != old.len() {
- return Err(crate::errors::Error::InvalidInput);
- }
-
- Ok(diffs)
- }
-
// Reduce the number of edits by eliminating operationally trivial equalities.
#[inline]
fn cleanup_efficiency<T: DType>(&self, diffs: &mut Vec<Diff<T>>) {
@@ -2903,6 +2816,93 @@ impl DiffMatchPatch {
Ok(patches)
}
+ pub fn diff_to_delta<T: DType>(&self, diffs: &[Diff<T>]) -> Result<String, crate::Error> {
+ let mut data = diffs
+ .iter()
+ .map(|diff| match diff.op() {
+ Ops::Insert => {
+ let encoded = T::percent_encode(diff.data());
+ [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat()
+ }
+ Ops::Delete => [
+ &[T::from_char('-')],
+ &T::from_str(diff.size().to_string().as_str())[..],
+ &[T::from_char('\t')],
+ ]
+ .concat(),
+ Ops::Equal => [
+ &[T::from_char('=')],
+ &T::from_str(diff.size().to_string().as_str())[..],
+ &[T::from_char('\t')],
+ ]
+ .concat(),
+ })
+ .collect::<Vec<_>>()
+ .concat();
+
+ data.pop();
+
+ T::to_string(&data)
+ }
+
+ pub fn diff_from_delta<T: DType>(
+ &self,
+ old: &str,
+ delta: &str,
+ ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
+ let mut pointer = 0; // cursor to text
+ let mut diffs = vec![];
+
+ let old = T::from_str(old);
+ let delta = T::from_str(delta);
+
+ for token in delta.split(|&k| k == T::from_char('\t')) {
+ if token.is_empty() {
+ continue;
+ }
+
+ // Each token begins with a one character parameter which specifies the
+ // operation of this token (delete, insert, equality).
+ let opcode = token.first();
+ let param = &token[1..];
+
+ if opcode == Some(&T::from_char('+')) {
+ let param = T::percent_decode(param);
+ diffs.push(Diff::insert(&param));
+ } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) {
+ let n = T::to_string(param)?
+ .parse::<isize>()
+ .map_err(|_| Error::Utf8Error)?;
+ if n < 0 {
+ return Err(crate::errors::Error::InvalidInput);
+ }
+
+ let n = n as usize;
+ let new_pointer = pointer + n;
+ if new_pointer > old.len() {
+ return Err(crate::errors::Error::InvalidInput);
+ }
+
+ let txt = &old[pointer..new_pointer];
+ pointer = new_pointer;
+
+ if opcode == Some(&T::from_char('=')) {
+ diffs.push(Diff::equal(txt))
+ } else {
+ diffs.push(Diff::delete(txt))
+ }
+ } else {
+ return Err(crate::errors::Error::InvalidInput);
+ }
+ }
+
+ if pointer != old.len() {
+ return Err(crate::errors::Error::InvalidInput);
+ }
+
+ Ok(diffs)
+ }
+
/// Applies a list of patches to text1. The first element of the return value is the newly patched text.
/// The second element is an array of true/false values indicating which of the patches were successfully applied.
/// [Note that this second element is not too useful since large patches may get broken up internally, resulting in a longer results list than the input with no way to figure out which patch succeeded or failed.
diff --git a/tests/test.rs b/tests/test.rs
index a05ee1c..4f86462 100644
--- a/tests/test.rs
+++ b/tests/test.rs
@@ -654,6 +654,8 @@ fn test_diff_main_compat() -> Result<(), Error> {
#[test]
fn test_diff_delta() -> Result<(), Error> {
+ let dmp = DiffMatchPatch::new();
+
let diffs = vec![
Diff::equal(b"jump"),
Diff::delete(b"s"),
@@ -664,19 +666,23 @@ fn test_diff_delta() -> Result<(), Error> {
Diff::equal(b" lazy"),
Diff::insert(b"old dog"),
];
- let txt_old = "jumps over the lazy".as_bytes();
- assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs));
+ let txt_old = "jumps over the lazy";
+ assert_eq!(txt_old.as_bytes(), DiffMatchPatch::diff_text_old(&diffs));
- let delta = DiffMatchPatch::to_delta(&diffs);
- assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog".as_bytes(), &delta);
+ let delta = dmp.diff_to_delta(&diffs)?;
+ assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", &delta);
// Convert delta string into a diff.
- assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?);
+ assert_eq!(diffs, dmp.diff_from_delta(txt_old, &delta)?);
// Generates error (19 != 20).
- assert!(DiffMatchPatch::from_delta(&[txt_old, "+".as_bytes()].concat()[..], &delta).is_err());
+ assert!(dmp
+ .diff_from_delta::<Efficient>(&[txt_old, "+"].concat()[..], &delta)
+ .is_err());
// Generates error (19 != 18).
- assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err());
+ assert!(dmp
+ .diff_from_delta::<Efficient>(&txt_old[1..], &delta)
+ .is_err());
let diffs = vec![
Diff::equal(&"jump".chars().collect::<Vec<_>>()[..]),
@@ -688,24 +694,26 @@ fn test_diff_delta() -> Result<(), Error> {
Diff::equal(&" lazy".chars().collect::<Vec<_>>()[..]),
Diff::insert(&"old dog".chars().collect::<Vec<_>>()[..]),
];
- let txt_old = &"jumps over the lazy".chars().collect::<Vec<_>>()[..];
- assert_eq!(txt_old, DiffMatchPatch::diff_text_old(&diffs));
-
- let delta = DiffMatchPatch::to_delta(&diffs);
+ // let txt_old = &"jumps over the lazy".chars().collect::<Vec<_>>()[..];
assert_eq!(
- &"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"
- .chars()
- .collect::<Vec<_>>()[..],
- &delta
+ &txt_old.chars().collect::<Vec<_>>()[..],
+ DiffMatchPatch::diff_text_old(&diffs)
);
+
+ let delta = dmp.diff_to_delta(&diffs)?;
+ assert_eq!("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", &delta);
// Convert delta string into a diff.
- assert_eq!(diffs, DiffMatchPatch::from_delta(txt_old, &delta)?);
+ assert_eq!(diffs, dmp.diff_from_delta(txt_old, &delta)?);
// Generates error (19 != 20).
- assert!(DiffMatchPatch::from_delta(&[txt_old, &['+']].concat()[..], &delta).is_err());
+ assert!(dmp
+ .diff_from_delta::<Compat>(&[txt_old, "+"].concat()[..], &delta)
+ .is_err());
// Generates error (19 != 18).
- assert!(DiffMatchPatch::from_delta(&txt_old[1..], &delta).is_err());
+ assert!(dmp
+ .diff_from_delta::<Compat>(&txt_old[1..], &delta)
+ .is_err());
// Test deltas with special characters.
let diffs = vec![
@@ -715,11 +723,14 @@ fn test_diff_delta() -> Result<(), Error> {
];
let txt_old = DiffMatchPatch::diff_text_old(&diffs);
assert_eq!("\u{0680} \x00 \t %\u{0681} \x01 \n ^".as_bytes(), txt_old);
- let delta = DiffMatchPatch::to_delta(&diffs);
+ let delta = dmp.diff_to_delta(&diffs)?;
- assert_eq!(b"=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]);
+ assert_eq!("=8\t-8\t+%DA%82 %02 %5C %7C", &delta[..]);
// Convert delta string into a diff.
- assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?);
+ assert_eq!(
+ &diffs,
+ &dmp.diff_from_delta(std::str::from_utf8(&txt_old).unwrap(), &delta)?
+ );
let diffs = vec![
Diff::equal(&"\u{0680} \x00 \t %".chars().collect::<Vec<_>>()[..]),
@@ -733,14 +744,14 @@ fn test_diff_delta() -> Result<(), Error> {
.collect::<Vec<_>>()[..],
txt_old
);
- let delta = DiffMatchPatch::to_delta(&diffs);
+ let delta = dmp.diff_to_delta(&diffs)?;
+ assert_eq!("=7\t-7\t+%DA%82 %02 %5C %7C", &delta);
+ // Convert delta string into a diff.
assert_eq!(
- &"=7\t-7\t+%DA%82 %02 %5C %7C".chars().collect::<Vec<_>>()[..],
- &delta[..]
+ &diffs,
+ &dmp.diff_from_delta(&txt_old.iter().collect::<String>(), &delta)?
);
- // Convert delta string into a diff.
- assert_eq!(&diffs, &DiffMatchPatch::from_delta(&txt_old, &delta)?);
// Verify pool of unchanged characters.
let diffs = vec![Diff::insert(
@@ -763,14 +774,14 @@ fn test_diff_delta() -> Result<(), Error> {
&txt_new.iter().collect::<String>()
);
- let delta = DiffMatchPatch::to_delta(&diffs);
+ let delta = dmp.diff_to_delta(&diffs)?;
assert_eq!(
"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ",
- &delta.iter().collect::<String>()
+ &delta
);
// Convert delta string into a diff.
- assert_eq!(diffs, DiffMatchPatch::from_delta(&[], &delta)?);
+ assert_eq!(diffs, dmp.diff_from_delta("", &delta)?);
Ok(())
}