my fork of dmp
Diffstat (limited to 'src/dmp.rs')
-rw-r--r--src/dmp.rs91
1 files changed, 46 insertions, 45 deletions
diff --git a/src/dmp.rs b/src/dmp.rs
index fef0ebb..223e232 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -449,10 +449,10 @@ impl DiffMatchPatch {
// Quick line-level diff on both strings, then rediff the parts for greater accuracy
// This speedup can produce non-minimal diffs
- fn line_mode<'a, T: DType>(
+ fn line_mode<T: DType>(
&self,
- old: &'a [T],
- new: &'a [T],
+ old: &[T],
+ new: &[T],
deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
let mut diffs = {
@@ -530,10 +530,10 @@ impl DiffMatchPatch {
Ok(diffs)
}
- pub(crate) fn diff_lines<'a>(
+ pub(crate) fn diff_lines(
&self,
- old: &'a [usize],
- new: &'a [usize],
+ old: &[usize],
+ new: &[usize],
deadline: Option<Time>,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
if old == new {
@@ -586,10 +586,10 @@ impl DiffMatchPatch {
Ok(diffs)
}
- fn compute_lines<'a>(
+ fn compute_lines(
&self,
- old: &'a [usize],
- new: &'a [usize],
+ old: &[usize],
+ new: &[usize],
deadline: Option<Time>,
) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
// returning all of the new part
@@ -662,31 +662,38 @@ impl DiffMatchPatch {
// Find the 'middle snake' of a diff, split the problem in two
// and return the recursively constructed diff.
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
- pub fn bisect<'a, T: DType>(
+ pub fn bisect<T: DType>(
&self,
- old: &'a [T],
- new: &'a [T],
+ old: &[T],
+ new: &[T],
deadline: Option<Time>,
) -> Result<Vec<Diff<T>>, crate::errors::Error> {
- let old_len = old.len() as isize;
- let new_len = new.len() as isize;
-
- let v_offset = (old_len + new_len + 1) >> 1; // same as `max_d`, (/ 2)
- let v_len = v_offset << 1; // (* 2)
-
- let mut v = vec![-1_isize; (v_len << 1) as usize];
- let (v1, v2) = v.split_at_mut(v_len as usize);
-
- {
- let v_trg = v_offset as usize + 1;
+ // Micro optimization:
+ // Do all setup before casting to isize
+ let mut v;
+ let (v_offset, v_len, v1, v2, old_len, new_len) = {
+ let v_offset = (old.len() + new.len() + 1) / 2;
+ let v_len = v_offset * 2;
+
+ v = vec![-1_isize; v_len * 2];
+ let (v1, v2) = v.split_at_mut(v_len);
+ let v_trg = v_offset + 1;
if v_trg < v1.len() {
v1[v_trg] = 0;
}
-
if v_trg < v2.len() {
v2[v_trg] = 0;
}
- }
+
+ (
+ v_offset as isize,
+ v_len as isize,
+ v1,
+ v2,
+ old.len() as isize,
+ new.len() as isize,
+ )
+ };
let delta = old_len - new_len;
// If the total number of characters is odd, then the front path will collide
@@ -856,9 +863,9 @@ impl DiffMatchPatch {
Ok(vec![Diff::delete(old), Diff::insert(new)])
}
- fn bisect_fwd_path_i<'a, T: DType>(
- old: &'a [T],
- new: &'a [T],
+ fn bisect_fwd_path_i<T: DType>(
+ old: &[T],
+ new: &[T],
x1: usize,
y1: usize,
) -> (usize, usize) {
@@ -877,9 +884,9 @@ impl DiffMatchPatch {
(x1, y1)
}
- fn bisect_rev_path_i<'a, T: DType>(
- old: &'a [T],
- new: &'a [T],
+ fn bisect_rev_path_i<T: DType>(
+ old: &[T],
+ new: &[T],
x2: usize,
y2: usize,
) -> (usize, usize) {
@@ -1300,11 +1307,7 @@ impl DiffMatchPatch {
// Scores range from 6 (best) to 0 (worst)
fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 {
let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
- if let (Some(c1), Some(c2)) = (char1.as_char(), char2.as_char()) {
- (c1, c2)
- } else {
- return 6;
- }
+ (char1, char2)
} else {
return 6;
};
@@ -1318,8 +1321,8 @@ impl DiffMatchPatch {
let whitespace_1 = char1.is_whitespace();
let whitespace_2 = char2.is_whitespace();
- let linebreak_1 = whitespace_1 && (char1 == '\n' || char1 == '\r');
- let linebreak_2 = whitespace_2 && (char2 == '\n' || char2 == '\r');
+ let linebreak_1 = whitespace_1 && (char1.is_newline() || char1.is_carriage());
+ let linebreak_2 = whitespace_2 && (char2.is_newline() || char2.is_carriage());
let blankline_1 = linebreak_1 && T::is_linebreak_end(one);
let blankline_2 = linebreak_2 && T::is_linebreak_start(two);
@@ -1330,13 +1333,13 @@ impl DiffMatchPatch {
} else if linebreak_1 || linebreak_2 {
// Four points for line breaks.
4
- } else if !char1.is_alphanumeric() && !whitespace_1 && whitespace_2 {
+ } else if !char1.is_alphanum() && !whitespace_1 && whitespace_2 {
// Three points for end of sentences.
3
} else if whitespace_1 || whitespace_2 {
// 2 for whitespace
2
- } else if !char1.is_alphanumeric() || !char2.is_alphanumeric() {
+ } else if !char1.is_alphanum() || !char2.is_alphanum() {
// 1 for not alphanumeric
1
} else {
@@ -1871,12 +1874,10 @@ impl DiffMatchPatch {
let mut linehash: HashMap<&'a [T], usize> = HashMap::new();
// Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2.
- let mut maxlines = 40000;
- let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, maxlines);
+ let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, 40000);
// This basically represents the U16::MAX value
- maxlines = 65535;
- let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, maxlines);
+ let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, 65535);
LineToChars {
chars_old,
@@ -1898,7 +1899,7 @@ impl DiffMatchPatch {
let mut broke = false;
let mut cursor = 0;
- text.split_inclusive(|u| *u == T::from_char('\n'))
+ text.split_inclusive(|u| u.is_newline())
.enumerate()
.take(take)
.for_each(|(idx, line)| {