use core::str; use std::{char, collections::HashMap, fmt::Display}; #[cfg(target_arch = "wasm32")] use chrono::{NaiveTime, TimeDelta, Utc}; use serde_derive::{Deserialize, Serialize}; #[cfg(not(target_arch = "wasm32"))] use std::time::{Duration, Instant}; #[cfg(target_arch = "wasm32")] pub(crate) type Time = NaiveTime; #[cfg(not(target_arch = "wasm32"))] pub(crate) type Time = Instant; use crate::{errors::Error, html::HtmlConfig, DType, PatchInput}; /// Enum representing the different ops of diff #[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)] pub enum Ops { Delete = -1, Equal, Insert, } /// A structure representing a diff /// (Ops::Delete, String::new("Hello")) means delete `Hello` /// (Ops::Insert, String::new("Goodbye")) means add `Goodbye` /// (Ops::Equal, String::new("World")) means keep world #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct Diff(pub Ops, pub Vec); impl Display for Diff { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "({:?}, {})", self.op(), std::str::from_utf8(self.data()).unwrap() ) } } impl Display for Diff { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "({:?}, {})", self.op(), self.data().iter().collect::() ) } } impl Diff { /// Create a new diff object pub fn new(op: Ops, data: &[T]) -> Self { Self(op, data.to_vec()) } /// helper functions to create op::Delete pub fn delete(data: &[T]) -> Self { Self::new(Ops::Delete, data) } /// helper functions to create op::Insert pub fn insert(data: &[T]) -> Self { Self::new(Ops::Insert, data) } /// helper functions to create op::Equal pub fn equal(data: &[T]) -> Self { Self::new(Ops::Equal, data) } /// returns the operation of the current diff pub fn op(&self) -> Ops { self.0 } /// returns slice of the data pub fn data(&self) -> &[T] { &self.1[..] } // returns length of data pub fn size(&self) -> usize { self.1.len() } } pub struct DiffMatchPatch { /// a speedup flag, If present and false, then don't run /// a line-level diff first to identify the changed areas. /// Defaults to true, which does a faster, slightly less optimal diff. pub checklines: bool, /// A default timeout in num milliseconda, defaults to 1000 (1 second) pub timeout: Option, // Cost of an empty edit operation in terms of edit characters. Defaults to 4 pub edit_cost: usize, /// At what point is no match declared (0.0 = perfection, 1.0 = very loose). pub match_threshold: f32, /// How far to search for a match (0 = exact location, 1000+ = broad match). /// A match this many characters away from the expected location will add /// 1.0 to the score (0.0 is a perfect match). /// int Match_Distance; pub match_distance: usize, /// The number of bits in an int. pub match_max_bits: usize, /// When deleting a large block of text (over ~64 characters), how close does /// the contents have to match the expected contents. (0.0 = perfection, /// 1.0 = very loose). Note that `match_threshold` controls how closely the /// end points of a delete need to match. pub delete_threshold: f32, /// Chunk size for context length. pub patch_margin: u8, } impl Default for DiffMatchPatch { fn default() -> Self { Self { checklines: true, timeout: Some(1000), edit_cost: 4, match_threshold: 0.5, match_distance: 1000, match_max_bits: 32, patch_margin: 4, delete_threshold: 0.5, } } } #[derive(Debug, PartialEq, Eq)] struct HalfMatch<'a, T: DType> { prefix_long: &'a [T], suffix_long: &'a [T], prefix_short: &'a [T], suffix_short: &'a [T], common: &'a [T], } impl DiffMatchPatch { fn checklines(&self) -> bool { self.checklines } /// Enables or disables `line mode` optimization. /// When enabled, the diff algorithm tries to find the `lines` that have changes and apply diff on the same /// /// This optimization makes sense for text with many lines (~100s), defaults to `true` pub fn set_checklines(&mut self, checklines: bool) { self.checklines = checklines; } // returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout fn timeout(&self) -> Option { self.timeout.map(|t| t as i64) } // returns the current edit cost saved fn edit_cost(&self) -> usize { self.edit_cost } /// Update edit cost pub fn set_edit_cost(&mut self, edit_cost: usize) { self.edit_cost = edit_cost; } /// Set a timeout in number of `milliseconds`. This creates a cutoff for internal `recursive` function calls /// /// Defaults to `1000ms` (1 second) /// /// None means `infinite time` pub fn set_timeout(&mut self, tout: Option) { self.timeout = tout; } /// creates a deadline from the given timeout #[cfg(target_arch = "wasm32")] pub fn deadline(&self) -> Option