my fork of dmp
Diffstat (limited to 'src/traits.rs')
-rw-r--r--src/traits.rs200
1 files changed, 197 insertions, 3 deletions
diff --git a/src/traits.rs b/src/traits.rs
index 8a5c9bb..3818837 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -1,8 +1,30 @@
+use std::hash::Hash;
+
use chrono::NaiveTime;
+use percent_encoding::{percent_decode, AsciiSet, CONTROLS};
use crate::dmp::{Diff, DiffMatchPatch};
-pub trait BisectSplit: Copy + Ord + Eq {
+pub type Efficient = u8;
+pub type Compat = char;
+
+// Appending controls to ensure exact same encoding as cpp variant
+const ENCODE_SET: &AsciiSet = &CONTROLS
+ .add(b'"')
+ .add(b'<')
+ .add(b'>')
+ .add(b'`')
+ .add(b'{')
+ .add(b'}')
+ .add(b'%')
+ .add(b'[')
+ .add(b'\\')
+ .add(b']')
+ .add(b'^')
+ .add(b'|');
+
+pub trait DType: Copy + Ord + Eq + Hash {
+ // fn differ(dmp: &DiffMatchPatch, txt_old: &str, txt_new: &str) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[Self],
@@ -11,9 +33,20 @@ pub trait BisectSplit: Copy + Ord + Eq {
y: usize,
deadline: Option<NaiveTime>,
) -> Result<Vec<Diff<Self>>, crate::errors::Error>;
+
+ fn from_char(c: char) -> Self;
+ fn as_char(&self) -> Option<char>;
+ fn from_str(str: &str) -> Vec<Self>;
+ fn to_string(data: &[Self]) -> Result<String, crate::Error>;
+
+ fn is_linebreak_end(input: &[Self]) -> bool;
+ fn is_linebreak_start(input: &[Self]) -> bool;
+
+ fn percent_encode(input: &[Self]) -> Vec<Self>;
+ fn percent_decode(input: &[Self]) -> Vec<Self>;
}
-impl BisectSplit for u8 {
+impl DType for u8 {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[u8],
@@ -34,9 +67,135 @@ impl BisectSplit for u8 {
Ok(diffs_a)
}
+
+ fn from_char(c: char) -> Self {
+ c as u8
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self as char)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.as_bytes().to_vec()
+ }
+
+ #[inline]
+ fn to_string(data: &[Self]) -> Result<String, crate::Error> {
+ std::str::from_utf8(data)
+ .map_err(|_| crate::Error::Utf8Error)
+ .map(|s| s.to_string())
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(b"\n\n") || input.ends_with(b"\n\r\n")
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(b"\r\n\n")
+ || input.starts_with(b"\r\n\r\n")
+ || input.starts_with(b"\n\r\n")
+ || input.starts_with(b"\n\n")
+ }
+
+ #[inline]
+ fn percent_encode(input: &[Self]) -> Vec<Self> {
+ percent_encoding::percent_encode(input, ENCODE_SET)
+ .collect::<String>()
+ .as_bytes()
+ .to_vec()
+ }
+
+ #[inline]
+ fn percent_decode(input: &[Self]) -> Vec<Self> {
+ percent_decode(input).collect()
+ }
+}
+
+impl DType for char {
+ fn bisect_split(
+ dmp: &DiffMatchPatch,
+ old: &[char],
+ new: &[char],
+ x: usize,
+ y: usize,
+ deadline: Option<NaiveTime>,
+ ) -> Result<Vec<Diff<char>>, crate::errors::Error> {
+ let old_a = &old[..x];
+ let new_a = &new[..y];
+
+ let old_b = &old[x..];
+ let new_b = &new[y..];
+
+ // Compute both diffs serially.
+ let mut diffs_a = dmp.diff_internal(old_a, new_a, false, deadline)?;
+ diffs_a.append(&mut dmp.diff_internal(old_b, new_b, false, deadline)?);
+
+ Ok(diffs_a)
+ }
+
+ fn from_char(c: char) -> Self {
+ c
+ }
+
+ fn as_char(&self) -> Option<char> {
+ Some(*self)
+ }
+
+ fn from_str(str: &str) -> Vec<Self> {
+ str.chars().collect::<Vec<_>>()
+ }
+
+ #[inline]
+ fn to_string(data: &[Self]) -> Result<String, crate::Error> {
+ Ok(data.iter().collect::<String>())
+ }
+
+ #[inline]
+ fn is_linebreak_end(input: &[Self]) -> bool {
+ input.ends_with(&['\n', '\n']) || input.ends_with(&['\n', '\r', '\n'])
+ }
+
+ #[inline]
+ fn is_linebreak_start(input: &[Self]) -> bool {
+ input.starts_with(&['\r', '\n', '\n'])
+ || input.starts_with(&['\r', '\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\r', '\n'])
+ || input.starts_with(&['\n', '\n'])
+ }
+
+ #[inline]
+ fn percent_encode(input: &[Self]) -> Vec<Self> {
+ let d = input
+ .iter()
+ .map(|c| {
+ let mut b = vec![0; c.len_utf8()];
+ c.encode_utf8(&mut b);
+
+ b
+ })
+ .collect::<Vec<_>>()
+ .concat();
+
+ let encoded = percent_encoding::percent_encode(&d[..], ENCODE_SET).collect::<String>();
+
+ Self::from_str(&encoded)
+ }
+
+ #[inline]
+ fn percent_decode(input: &[Self]) -> Vec<Self> {
+ let ip = input.iter().collect::<String>();
+ percent_decode(ip.as_bytes())
+ .decode_utf8()
+ .unwrap()
+ .chars()
+ .collect()
+ }
}
-impl BisectSplit for usize {
+impl DType for usize {
fn bisect_split(
dmp: &DiffMatchPatch,
old: &[usize],
@@ -57,4 +216,39 @@ impl BisectSplit for usize {
Ok(diffs_a)
}
+
+ fn from_char(c: char) -> Self {
+ (c as u8) as usize
+ }
+
+ fn as_char(&self) -> Option<char> {
+ char::from_digit(*self as u32, 10)
+ }
+
+ fn from_str(_: &str) -> Vec<Self> {
+ unimplemented!()
+ }
+
+ fn to_string(_: &[Self]) -> Result<String, crate::Error> {
+ unimplemented!()
+ }
+
+ fn is_linebreak_end(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn is_linebreak_start(_: &[Self]) -> bool {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn percent_encode(_: &[Self]) -> Vec<Self> {
+ unimplemented!()
+ }
+
+ #[inline]
+ fn percent_decode(_: &[Self]) -> Vec<Self> {
+ unimplemented!()
+ }
}