my fork of dmp
WIP: Patch
Anubhab Bandyopadhyay 2024-08-08
parent d4834fd · commit 07bd29e
-rw-r--r--Cargo.lock14
-rw-r--r--Cargo.toml3
-rw-r--r--src/dmp.rs93
3 files changed, 71 insertions, 39 deletions
diff --git a/Cargo.lock b/Cargo.lock
index bb2d6a4..24fe235 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -171,6 +171,9 @@ name = "diff-match-patch-rs"
version = "0.1.0"
dependencies = [
"criterion",
+ "serde",
+ "serde_json",
+ "serde_repr",
]
[[package]]
@@ -412,6 +415,17 @@ dependencies = [
]
[[package]]
+name = "serde_repr"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
name = "syn"
version = "2.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 4a60b82..c1c4fe0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,9 +4,12 @@ version = "0.1.0"
edition = "2021"
[dependencies]
+serde = { version = "1", features = ["derive"] }
+serde_repr = { version = "0" }
[dev-dependencies]
criterion = "0"
+serde_json = "1"
[[bench]]
name = "prefix"
diff --git a/src/dmp.rs b/src/dmp.rs
index 62d4154..647ae22 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1,10 +1,12 @@
use std::{
- collections::HashMap,
- time::{Duration, Instant},
+ collections::HashMap, time::{Duration, Instant}
};
+use serde::{Deserialize, Serialize};
+use serde_repr::{Deserialize_repr, Serialize_repr};
+
/// Enum representing the different ops of diff
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize_repr, Deserialize_repr)]
#[repr(i8)]
pub enum Ops {
Delete = -1,
@@ -16,8 +18,11 @@ pub enum Ops {
/// (Ops::Delete, String::new("Hello")) means delete `Hello`
/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
/// (Ops::Equal, String::new("World")) means keep world
-#[derive(Debug, PartialEq, Eq)]
-pub struct Diff(Ops, Vec<u8>);
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Diff(
+ Ops,
+ Vec<u8>
+);
impl Diff {
/// Create a new diff object
@@ -39,8 +44,16 @@ impl Diff {
}
}
+
+
pub struct Patch {}
+pub enum PatchInput<'a> {
+ Texts(&'a [u8], &'a [u8]),
+ Diffs(&'a [Diff]),
+ TextDiffs(&'a [u8], &'a [Diff])
+}
+
pub type Patches = Vec<Patch>;
pub struct DiffMatchPatch {
@@ -81,7 +94,7 @@ impl DiffMatchPatch {
.map_or(31536000_u64, |tout| if tout > 0 { tout } else { u64::MAX })
}
- fn main_internal<'a>(
+ fn diff_internal<'a>(
&self,
old_bytes: &'a [u8],
new_bytes: &'a [u8],
@@ -189,8 +202,8 @@ impl DiffMatchPatch {
let mid_common = half_match.common;
// Send both pairs off for separate processing.
- let mut diffs_a = self.main_internal(old_a, new_a, linemode, deadline);
- let mut diffs_b = self.main_internal(old_b, new_b, linemode, deadline);
+ let mut diffs_a = self.diff_internal(old_a, new_a, linemode, deadline);
+ let mut diffs_b = self.diff_internal(old_b, new_b, linemode, deadline);
// Merge the results
diffs_a.push(Diff::equal(mid_common));
@@ -275,7 +288,7 @@ impl DiffMatchPatch {
// This speedup can produce non-minimal diffs
fn line_mode<'a>(&self, old: &'a [u8], new: &'a [u8], deadline: Instant) -> Vec<Diff> {
let to_chars = Self::lines_to_chars(old, new);
- let mut diffs = self.main_internal(&to_chars.chars_old, &to_chars.chars_new, false, deadline);
+ let mut diffs = self.diff_internal(&to_chars.chars_old, &to_chars.chars_new, false, deadline);
// Convert diffs back to text
Self::chars_to_lines(&mut diffs[..], &to_chars.lines[..]);
@@ -321,7 +334,7 @@ impl DiffMatchPatch {
pointer = idxstart;
- let mut subdiffs = self.main_internal(&delete_data, &insert_data, false, deadline);
+ let mut subdiffs = self.diff_internal(&delete_data, &insert_data, false, deadline);
let subdifflen = subdiffs.len();
subdiffs.drain(..).rev().for_each(|d| {
diffs.insert(pointer, d);
@@ -496,8 +509,8 @@ impl DiffMatchPatch {
let new_b = &new[y..];
// Compute both diffs serially.
- let mut diffs_a = self.main_internal(old_a, new_a, false, deadline);
- let mut diffs_b = self.main_internal(old_b, new_b, false, deadline);
+ let mut diffs_a = self.diff_internal(old_a, new_a, false, deadline);
+ let mut diffs_b = self.diff_internal(old_b, new_b, false, deadline);
diffs_a.append(&mut diffs_b);
@@ -963,7 +976,6 @@ impl DiffMatchPatch {
} else {
diffs.insert(0, Diff::equal(&insert_data[..commonlen]));
pointer += 1;
- difflen = diffs.len();
}
insert_data = insert_data[commonlen..].to_vec();
delete_data = delete_data[commonlen..].to_vec();
@@ -1201,16 +1213,22 @@ impl DiffMatchPatch {
/// Returns:
/// Vec of changes (Diff).
pub fn diff_main<'a>(&self, old: &'a str, new: &'a str) -> Vec<Diff> {
- let deadline = Instant::now()
- .checked_add(Duration::from_millis(self.timeout()))
- .unwrap();
+ let deadline = if let Some(i) = Instant::now()
+ .checked_add(Duration::from_millis(self.timeout())) {
+ i
+ } else {
+ todo!()
+ };
- self.main_internal(old.as_bytes(), new.as_bytes(), self.checklines(), deadline)
+ self.diff_internal(old.as_bytes(), new.as_bytes(), self.checklines(), deadline)
}
- // Reduce the number of edits by eliminating semantically trivial equalities
- pub fn diff_cleanup_semantic(diffs: &mut [Diff]) {
- todo!()
+ /// A diff of two unrelated texts can be filled with coincidental matches.
+ /// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")].
+ /// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format.
+ /// The above example would become: [(-1, "mouse"), (1, "sofas")]. If a diff is to be human-readable, it should be passed to diff_cleanupSemantic.
+ pub fn diff_cleanup_semantic(diffs: &mut Vec<Diff>) {
+ Self::cleanup_semantic(diffs)
}
pub fn diff_cleanup_efficiency(diffs: &mut [Diff]) {
@@ -1221,6 +1239,7 @@ impl DiffMatchPatch {
todo!()
}
+ /// Takes a diff array and returns a pretty HTML sequence. This function is mainly intended as an example from which to write ones own display functions.
pub fn diff_pretty_html(diffs: &[Diff]) -> String {
todo!()
}
@@ -1229,11 +1248,7 @@ impl DiffMatchPatch {
todo!()
}
- pub fn patch_make_text_text(text1: &str, text2: &str) -> Patches {
- todo!()
- }
-
- pub fn patch_make_diff(diffs: &[Diff]) -> Patches {
+ pub fn patch_make(input: PatchInput) -> Patches {
todo!()
}
@@ -2203,18 +2218,18 @@ mod tests {
(String::from_utf8(txt1).unwrap(), String::from_utf8(txt2).unwrap())
}
- // function diff_rebuildtexts(diffs) {
- //
- // var text1 = '';
- // var text2 = '';
- // for (var x = 0; x < diffs.length; x++) {
- // if (diffs[x][0] != DIFF_INSERT) {
- // text1 += diffs[x][1];
- // }
- // if (diffs[x][0] != DIFF_DELETE) {
- // text2 += diffs[x][1];
- // }
- // }
- // return [text1, text2];
- // }
+
+ #[test]
+ fn test_serde() {
+ let diffs = vec![
+ Diff::delete(b"a"),
+ Diff::insert("\u{0680}".as_bytes()),
+ Diff::equal(b"x"),
+ Diff::delete(b"\t"),
+ Diff::insert(b"\0")
+ ];
+
+ let serialized = serde_json::to_string(&diffs).unwrap();
+ println!("{serialized}");
+ }
}