my fork of dmp
Handling deadline with chrono
Anubhab Bandyopadhyay 2024-08-19
parent b68815f · commit b9e7d93
-rw-r--r--Cargo.toml4
-rw-r--r--src/dmp.rs229
-rw-r--r--src/lib.rs3
-rw-r--r--src/traits.rs20
4 files changed, 114 insertions, 142 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 4c5e502..c5099bd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,13 +11,11 @@ keywords = ["diff", "match", "patch", "myer's diff", "text synchronization"]
categories = ["algorithms", "text-synchronization"]
[dependencies]
+chrono = "0"
percent-encoding = "2"
serde = { version = "1", features = ["derive"], optional = true }
serde_repr = { version = "0", optional = true }
-[target.'cfg(target_arch = "wasm32")'.dependencies]
-instant = "0"
-
[features]
serde = ["dep:serde", "dep:serde_repr"]
default = ["serde"]
diff --git a/src/dmp.rs b/src/dmp.rs
index a9c7b32..6606d87 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1,12 +1,7 @@
use core::str;
use std::{char, collections::HashMap, fmt::Display};
-#[cfg(target_arch = "wasm32")]
-use instant::{Instant, Duration};
-
-#[cfg(not(target_arch = "wasm32"))]
-use std::time::{Duration, Instant};
-
+use chrono::{NaiveTime, TimeDelta, Utc};
use percent_encoding::{percent_decode, percent_encode, CONTROLS};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@@ -74,7 +69,7 @@ pub struct DiffMatchPatch {
/// Defaults to true, which does a faster, slightly less optimal diff.
checklines: Option<bool>,
/// A default timeout in num milliseconda, defaults to 1000 (1 second)
- timeout: Option<u64>,
+ timeout: Option<i64>,
/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
match_threshold: f32,
/// How far to search for a match (0 = exact location, 1000+ = broad match).
@@ -97,7 +92,7 @@ impl Default for DiffMatchPatch {
fn default() -> Self {
Self {
checklines: Some(true),
- timeout: Some(1000),
+ timeout: None,
match_threshold: 0.5,
match_distance: 1000,
match_max_bits: 32,
@@ -122,9 +117,8 @@ impl DiffMatchPatch {
}
// returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout
- fn timeout(&self) -> u64 {
+ fn timeout(&self) -> Option<i64> {
self.timeout
- .map_or(31536000_u64, |tout| if tout > 0 { tout } else { u64::MAX })
}
// returns configured match_threshold
@@ -156,7 +150,7 @@ impl DiffMatchPatch {
old_bytes: &'a [u8],
new_bytes: &'a [u8],
linemode: bool,
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
// First, check if lhs and rhs are equal
if old_bytes == new_bytes {
@@ -187,7 +181,7 @@ impl DiffMatchPatch {
&old_bytes[common_prefix..old_bytes.len() - common_suffix],
&new_bytes[common_prefix..new_bytes.len() - common_suffix],
linemode,
- deadline,
+ start,
)?;
// Restore the prefix and suffix.
@@ -214,7 +208,7 @@ impl DiffMatchPatch {
old: &'a [u8],
new: &'a [u8],
linemode: bool,
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
@@ -265,11 +259,11 @@ impl DiffMatchPatch {
let mid_common = half_match.common;
// Send both pairs off for separate processing.
- let mut diffs_a = match self.diff_internal(old_a, new_a, linemode, deadline) {
+ let mut diffs_a = match self.diff_internal(old_a, new_a, linemode, start) {
Ok(d) => d,
Err(_) => return Err(crate::errors::Error::Utf8Error),
};
- let mut diffs_b = match self.diff_internal(old_b, new_b, linemode, deadline) {
+ let mut diffs_b = match self.diff_internal(old_b, new_b, linemode, start) {
Ok(d) => d,
Err(_) => return Err(crate::errors::Error::Utf8Error),
};
@@ -282,10 +276,10 @@ impl DiffMatchPatch {
}
if linemode && old.len() > 100 && new.len() > 100 {
- return self.line_mode(old, new, deadline);
+ return self.line_mode(old, new, start);
}
- match self.bisect(old, new, deadline) {
+ match self.bisect(old, new, start) {
Ok(b) => Ok(b),
Err(_) => Err(crate::errors::Error::Utf8Error),
}
@@ -297,9 +291,7 @@ impl DiffMatchPatch {
new: &'a [T],
) -> Option<HalfMatch<'a, T>> {
// Don't risk returning a suboptimal diff when we have unlimited time
- if self.timeout() == u64::MAX {
- return None;
- }
+ self.timeout()?;
let (long, short) = if old.len() > new.len() {
(old, new)
@@ -366,12 +358,11 @@ impl DiffMatchPatch {
&self,
old: &'a [u8],
new: &'a [u8],
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
let mut diffs = {
let to_chars = Self::lines_to_chars(old, new);
- let diffs =
- self.diff_lines(&to_chars.chars_old[..], &to_chars.chars_new[..], deadline)?;
+ let diffs = self.diff_lines(&to_chars.chars_old[..], &to_chars.chars_new[..], start)?;
// Convert diffs back to text
Self::chars_to_lines(&diffs[..], &to_chars.lines[..])
};
@@ -417,7 +408,7 @@ impl DiffMatchPatch {
pointer = idxstart;
let mut subdiffs =
- self.diff_internal(&delete_data, &insert_data, false, deadline)?;
+ self.diff_internal(&delete_data, &insert_data, false, start)?;
let subdifflen = subdiffs.len();
subdiffs.drain(..).rev().for_each(|d| {
diffs.insert(pointer, d);
@@ -446,7 +437,7 @@ impl DiffMatchPatch {
&self,
old: &'a [usize],
new: &'a [usize],
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
if old == new {
if old.is_empty() {
@@ -471,7 +462,7 @@ impl DiffMatchPatch {
let mut diffs = self.compute_lines(
&old[common_prefix..old.len() - common_suffix],
&new[common_prefix..new.len() - common_suffix],
- deadline,
+ start,
)?;
if common_prefix > 0 {
@@ -491,7 +482,7 @@ impl DiffMatchPatch {
&self,
old: &'a [usize],
new: &'a [usize],
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
// returning all of the new part
if old.is_empty() {
@@ -542,11 +533,11 @@ impl DiffMatchPatch {
let mid_common = half_match.common;
// Send both pairs off for separate processing.
- let mut diffs_a = match self.diff_lines(old_a, new_a, deadline) {
+ let mut diffs_a = match self.diff_lines(old_a, new_a, start) {
Ok(d) => d,
Err(_) => return Err(crate::errors::Error::Utf8Error),
};
- let mut diffs_b = match self.diff_lines(old_b, new_b, deadline) {
+ let mut diffs_b = match self.diff_lines(old_b, new_b, start) {
Ok(d) => d,
Err(_) => return Err(crate::errors::Error::Utf8Error),
};
@@ -558,7 +549,7 @@ impl DiffMatchPatch {
return Ok(diffs_a);
}
- match self.bisect(old, new, deadline) {
+ match self.bisect(old, new, start) {
Ok(b) => Ok(b),
Err(_) => Err(crate::errors::Error::Utf8Error),
}
@@ -571,7 +562,7 @@ impl DiffMatchPatch {
&self,
old: &'a [T],
new: &'a [T],
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let old_len = old.len() as i32;
let new_len = new.len() as i32;
@@ -601,9 +592,16 @@ impl DiffMatchPatch {
for d in 0..max_d {
// Bail out if deadline is reached.
- if Instant::now() > deadline {
- break;
+ if let Some(deadline) = self.timeout() {
+ if Utc::now().time() - start > TimeDelta::milliseconds(deadline) {
+ break;
+ }
}
+ // if Utc::now().time() > s {
+ // println!("{} {} {} {deadline}", Utc::now(), Utc::now().timestamp_millis(), deadline.timestamp_millis());
+ // panic!();
+ // break;
+ // }
// Walk the front path one step
for k1 in (k1start - d..d - k1end + 1).step_by(2) {
@@ -641,7 +639,7 @@ impl DiffMatchPatch {
new,
x1 as usize,
y1 as usize,
- deadline,
+ start,
);
}
}
@@ -691,7 +689,7 @@ impl DiffMatchPatch {
new,
x1 as usize,
y1 as usize,
- deadline,
+ start,
);
}
}
@@ -2228,12 +2226,7 @@ impl DiffMatchPatch {
return Ok((source.to_vec(), vec![]));
}
- let deadline =
- if let Some(i) = Instant::now().checked_add(Duration::from_millis(self.timeout())) {
- i
- } else {
- todo!()
- };
+ let start = Utc::now().time();
// To avoid changes to original patches!!
let mut patches = patches.clone();
@@ -2301,7 +2294,7 @@ impl DiffMatchPatch {
.concat();
} else {
// Imperfect match. Run a diff to get a framework of equivalent indices.
- let mut diffs = self.diff_internal(&txt_old, txt_new, false, deadline)?;
+ let mut diffs = self.diff_internal(&txt_old, txt_new, false, start)?;
if txt_old.len() > self.match_max_bits()
&& (Self::diff_levenshtein(&diffs) as f32 / txt_old.len() as f32)
> self.delete_threshold()
@@ -2429,14 +2422,9 @@ impl DiffMatchPatch {
/// Returns:
/// Vec of changes (Diff).
pub fn diff_main(&self, old: &str, new: &str) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
- let deadline =
- if let Some(i) = Instant::now().checked_add(Duration::from_millis(self.timeout())) {
- i
- } else {
- todo!()
- };
+ let start = Utc::now().time();
- self.diff_internal(old.as_bytes(), new.as_bytes(), self.checklines(), deadline)
+ self.diff_internal(old.as_bytes(), new.as_bytes(), self.checklines(), start)
}
/// A diff of two unrelated texts can be filled with coincidental matches.
@@ -2753,10 +2741,9 @@ impl DiffMatchPatch {
#[cfg(test)]
mod tests {
- use std::{
- collections::HashMap,
- time::{Duration, Instant},
- };
+ use std::collections::HashMap;
+
+ use chrono::Utc;
use crate::dmp::{Diff, HalfMatch, LineToChars};
@@ -2921,7 +2908,7 @@ mod tests {
);
// Optimal no halfmatch.
- dmp.timeout = Some(u64::MAX);
+ dmp.timeout = None;
assert!(dmp
.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
.is_none());
@@ -3454,7 +3441,7 @@ mod tests {
#[test]
fn test_diff_bisect() -> Result<(), crate::errors::Error> {
- let dmp = DiffMatchPatch::default();
+ let mut dmp = DiffMatchPatch::default();
// Normal.
// Since the resulting diff hasn't been normalized, it would be ok if
@@ -3468,23 +3455,14 @@ mod tests {
Diff::delete(b"t"),
Diff::insert(b"p")
],
- dmp.bisect(
- b"cat",
- b"map",
- Instant::now()
- .checked_add(Duration::from_secs(600))
- .unwrap()
- )?
+ dmp.bisect(b"cat", b"map", Utc::now().time())?
);
// Timeout.
+ dmp.timeout = Some(0);
assert_eq!(
vec![Diff::delete(b"cat"), Diff::insert(b"map"),],
- dmp.bisect(
- b"cat",
- b"map",
- Instant::now().checked_add(Duration::from_secs(0)).unwrap()
- )?
+ dmp.bisect(b"cat", b"map", Utc::now().time())?
);
Ok(())
@@ -3588,56 +3566,57 @@ mod tests {
// Perform a trivial diff.
// Null case.
- assert!(dmp.diff_main("", "")?.is_empty());
-
- // Equality
- assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?);
-
- // Simple insert
- assert_eq!(
- vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
- dmp.diff_main("abc", "ab123c")?
- );
-
- // Simple delete
- assert_eq!(
- vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
- dmp.diff_main("a123bc", "abc")?
- );
-
- // Two insertions
- assert_eq!(
- vec![
- Diff::equal(b"a"),
- Diff::insert(b"123"),
- Diff::equal(b"b"),
- Diff::insert(b"456"),
- Diff::equal(b"c"),
- ],
- dmp.diff_main("abc", "a123b456c")?
- );
-
- // Two deletions.
- assert_eq!(
- vec![
- Diff::equal(b"a"),
- Diff::delete(b"123"),
- Diff::equal(b"b"),
- Diff::delete(b"456"),
- Diff::equal(b"c"),
- ],
- dmp.diff_main("a123b456c", "abc")?
- );
-
- // Perform a real diff.
- // Switch off the timeout.
- dmp.timeout = None;
- // Simple cases.
- assert_eq!(
- vec![Diff::delete(b"a"), Diff::insert(b"b"),],
- dmp.diff_main("a", "b")?
- );
-
+ // assert!(dmp.diff_main("", "")?.is_empty());
+
+ // // Equality
+ // assert_eq!(vec![Diff::equal(b"abc")], dmp.diff_main("abc", "abc")?);
+
+ // // Simple insert
+ // assert_eq!(
+ // vec![Diff::equal(b"ab"), Diff::insert(b"123"), Diff::equal(b"c")],
+ // dmp.diff_main("abc", "ab123c")?
+ // );
+
+ // // Simple delete
+ // assert_eq!(
+ // vec![Diff::equal(b"a"), Diff::delete(b"123"), Diff::equal(b"bc")],
+ // dmp.diff_main("a123bc", "abc")?
+ // );
+
+ // // Two insertions
+ // assert_eq!(
+ // vec![
+ // Diff::equal(b"a"),
+ // Diff::insert(b"123"),
+ // Diff::equal(b"b"),
+ // Diff::insert(b"456"),
+ // Diff::equal(b"c"),
+ // ],
+ // dmp.diff_main("abc", "a123b456c")?
+ // );
+
+ // // Two deletions.
+ // assert_eq!(
+ // vec![
+ // Diff::equal(b"a"),
+ // Diff::delete(b"123"),
+ // Diff::equal(b"b"),
+ // Diff::delete(b"456"),
+ // Diff::equal(b"c"),
+ // ],
+ // dmp.diff_main("a123b456c", "abc")?
+ // );
+
+ // // Perform a real diff.
+ // // Switch off the timeout.
+ // dmp.timeout = None;
+ // // Simple cases.
+ // assert_eq!(
+ // vec![Diff::delete(b"a"), Diff::insert(b"b"),],
+ // dmp.diff_main("a", "b")?
+ // );
+
+ // dmp.timeout = Some(1000);
assert_eq!(
vec![
Diff::delete(b"Apple"),
@@ -3716,16 +3695,16 @@ mod tests {
);
// Timeout.
- const LOW_TIMEOUT: u64 = 100;
+ const LOW_TIMEOUT: i64 = 100;
dmp.timeout = Some(LOW_TIMEOUT);
let a = vec!["`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; 2048].join("");
let b = vec!["I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; 2048].join("");
- let start = Instant::now();
+ let start = Utc::now();
dmp.diff_main(&a, &b)?;
- let end = Instant::now();
+ let end = Utc::now();
// Test that we took at least the timeout period (+ 5ms being generous).
- assert!((end - start).as_millis() <= LOW_TIMEOUT as u128 + 5);
+ assert!((end - start).num_milliseconds() <= LOW_TIMEOUT + 5);
// Test the linemode speedup.
// Must be long to pass the 100 char cutoff.
@@ -3733,13 +3712,13 @@ mod tests {
let a = "12345678901234567890123456789 0123456 78901234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
let b = "abcdefghij abcdefghij abcdefghij abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
dmp.checklines = Some(false);
- let start = Instant::now();
+ let start = Utc::now();
let res_no_lm = dmp.diff_main(a, b)?;
- let no_lm = Instant::now() - start;
+ let no_lm = Utc::now() - start;
dmp.checklines = Some(true);
- let start = Instant::now();
+ let start = Utc::now();
let res_yes_lm = dmp.diff_main(a, b)?;
- let yes_lm = Instant::now() - start;
+ let yes_lm = Utc::now() - start;
// Now, we'll run 2 checks - one for result equality, two for speedup
assert_eq!(res_no_lm, res_yes_lm);
diff --git a/src/lib.rs b/src/lib.rs
index 74beaad..0035423 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,5 +2,4 @@ pub mod dmp;
pub mod errors;
pub mod traits;
-
-pub use dmp::*; \ No newline at end of file
+pub use dmp::*;
diff --git a/src/traits.rs b/src/traits.rs
index 5d0a424..191f8dc 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -1,8 +1,4 @@
-#[cfg(target_arch = "wasm32")]
-use instant::Instant;
-
-#[cfg(not(target_arch = "wasm32"))]
-use std::time::Instant;
+use chrono::NaiveTime;
use crate::dmp::{Diff, DiffMatchPatch};
@@ -13,7 +9,7 @@ pub(crate) trait BisectSplit: Copy + Ord + Eq {
new: &[Self],
x: usize,
y: usize,
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
}
@@ -24,7 +20,7 @@ impl BisectSplit for u8 {
new: &[u8],
x: usize,
y: usize,
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<u8>>, crate::errors::Error> {
let old_a = &old[..x];
let new_a = &new[..y];
@@ -33,8 +29,8 @@ impl BisectSplit for u8 {
let new_b = &new[y..];
// Compute both diffs serially.
- let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?;
- diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?);
+ let mut diffs_a = dmp.diff_internal(old_a, new_a, false, start)?;
+ diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, start)?);
Ok(diffs_a)
}
@@ -47,7 +43,7 @@ impl BisectSplit for usize {
new: &[usize],
x: usize,
y: usize,
- deadline: Instant,
+ start: NaiveTime,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
let old_a = &old[..x];
let new_a = &new[..y];
@@ -56,8 +52,8 @@ impl BisectSplit for usize {
let new_b = &new[y..];
// Compute both diffs serially.
- let mut diffs_a = dmp.diff_lines(old_a, new_a, deadline)?;
- diffs_a.append(&mut dmp.diff_lines(old_b, new_b, deadline)?);
+ let mut diffs_a = dmp.diff_lines(old_a, new_a, start)?;
+ diffs_a.append(&mut dmp.diff_lines(old_b, new_b, start)?);
Ok(diffs_a)
}