my fork of dmp
| -rw-r--r-- | Cargo.lock | 597 | ||||
| -rw-r--r-- | Cargo.toml | 7 | ||||
| -rw-r--r-- | benches/Bench.xlsx | bin | 0 -> 14975 bytes | |||
| -rw-r--r-- | benches/prefix.rs | 72 | ||||
| -rw-r--r-- | src/dmp.rs | 393 |
5 files changed, 1061 insertions, 8 deletions
@@ -3,5 +3,602 @@ version = 3 [[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] name = "diff-match-patch-rs" version = "0.1.0" +dependencies = [ + "criterion", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + +[[package]] +name = "plotters" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" + +[[package]] +name = "plotters-svg" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" @@ -4,3 +4,10 @@ version = "0.1.0" edition = "2021" [dependencies] + +[dev-dependencies] +criterion = "0" + +[[bench]] +name = "prefix" +harness = false
\ No newline at end of file diff --git a/benches/Bench.xlsx b/benches/Bench.xlsx Binary files differnew file mode 100644 index 0000000..e0ec0b6 --- /dev/null +++ b/benches/Bench.xlsx diff --git a/benches/prefix.rs b/benches/prefix.rs new file mode 100644 index 0000000..4ea6650 --- /dev/null +++ b/benches/prefix.rs @@ -0,0 +1,72 @@ +use core::panic; +use std::path::Path; + +use criterion::{criterion_group, criterion_main, Criterion}; + +fn prefix_linear(a: &str, b: &str, res: usize) { + let found = a.bytes() + .zip(b.bytes()) + .take_while(|(a, b)| a == b ) + .count(); + + if found != res { + panic!() + } +} + +fn prefix_binary(a: &str, b: &str, res: usize) { + let a = a.as_bytes(); + let b = b.as_bytes(); + + let mut pointmin = 0; + let mut pointmax = a.len().min(b.len()); + let mut pointmid = pointmax; + + let mut pointstart = 0; + + while pointmin < pointmid { + if a[pointstart .. pointmid] == + b[pointstart .. pointmid] { + pointmin = pointmid; + pointstart = pointmin; + } else { + pointmax = pointmid; + } + + pointmid = (pointmax - pointmin) / 2 + pointmin; + } + + if pointmid != res { + panic!("Not desired res") + } +} + +fn create_data() -> Vec<(String, String, usize, usize)> { + let basedir = Path::new("testdata"); + let data = [100, 1000, 10000, 100000, 1000000, 10000000] + .iter() + .map(|&n| { + let old = std::fs::read_to_string(basedir.join(format!("old_{n}.txt"))).unwrap(); + let new = std::fs::read_to_string(basedir.join(format!("new_{n}.txt"))).unwrap(); + + let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt"))).unwrap().parse::<usize>().unwrap(); + + (old, new, res, n) + }).collect(); + + data +} + +pub fn prefix_bench(c: &mut Criterion) { + let d = create_data(); + + for (old, new, res, n) in d.iter() { + println!("For N={n}"); + c.bench_function("prefix linear", |bencher| bencher.iter(|| prefix_linear(old, new, *res))); + c.bench_function("prefix binary", |bencher| bencher.iter(|| prefix_binary(old, new, *res))); + } +} + + +criterion_group!(prefix, prefix_bench); +criterion_main!(prefix);
\ No newline at end of file @@ -1,3 +1,5 @@ +use std::{time::{Duration, Instant}, u32}; + /** * The data structure representing a diff is an array of tuples: @@ -6,9 +8,10 @@ */ /// Enum representing the different ops of diff -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[repr(i8)] pub enum Ops { - Delete, + Delete = -1, Insert, Equal } @@ -22,22 +25,174 @@ pub struct Diff(Ops, String); impl Diff { /// Create a new diff object - pub fn new(op: Ops, text: &str) -> Self { - Self(op, text.to_string()) + pub fn new(op: Ops, text: &[u8]) -> Self { + Self(op, String::from_utf8(text.to_vec()).unwrap()) } } +pub struct Patch {} + +pub type Patches = Vec<Patch>; + pub struct DiffMatchPatch { - checklines: Option<bool> + /// a speedup flag, If present and false, then don't run + /// a line-level diff first to identify the changed areas. + /// Defaults to true, which does a faster, slightly less optimal diff. + checklines: Option<bool>, + /// A default timeout in num seconds, defaults to 1 + timeout: Option<u64> +} + +impl Default for DiffMatchPatch { + fn default() -> Self { + Self { + checklines: Some(true), + timeout: Some(1) + } + } } impl DiffMatchPatch { fn checklines(&self) -> bool { self.checklines.map_or(true, |c| c) } + + // returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout + fn timeout(&self) -> u64 { + self.timeout.map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX }) + } + + + fn diff_compute(&self, old: &[u8], new: &[u8]) -> Vec<Diff> { + // returning all of the new part + if old.is_empty() { + return vec![Diff::new(Ops::Insert, new)] + } + + // return everything deleted + if new.is_empty() { + return vec![Diff::new(Ops::Delete, old)] + } + + let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) }; + + let idx = long.windows(short.len()).step_by(1).position(|k| k == short); + // found a subsequence which contains the short text + if let Some(idx) = idx { + // Shorter text is inside the longer text (speedup). + let op = if old_gt_new { Ops::Delete } else { Ops::Insert }; + let diffs = vec![ + Diff::new(op, &long[0 .. idx]), + Diff::new(Ops::Equal, short), + Diff::new(op, &long[idx .. short.len()]) + ]; + + return diffs; + } + + if short.len() == 1 { + // After previous case, this can't be an equality + return vec![Diff::new(Ops::Delete, old), Diff::new(Ops::Insert, new)]; + } + + // Check if the problem can be split in two + if let Some(half_match) = self.diff_half_match(old, new) { + + } + // // Check to see if the problem can be split in two. + // var hm = this.diff_halfMatch_(text1, text2); + // if (hm) { + // // A half-match was found, sort out the return data. + // var text1_a = hm[0]; + // var text1_b = hm[1]; + // var text2_a = hm[2]; + // var text2_b = hm[3]; + // var mid_common = hm[4]; + // // Send both pairs off for separate processing. + // var diffs_a = this.diff_main(text1_a, text2_a, checklines, deadline); + // var diffs_b = this.diff_main(text1_b, text2_b, checklines, deadline); + // // Merge the results. + // return diffs_a.concat([new diff_match_patch.Diff(DIFF_EQUAL, mid_common)], + // diffs_b); + // } + + // if (checklines && text1.length > 100 && text2.length > 100) { + // return this.diff_lineMode_(text1, text2, deadline); + // } + + // return this.diff_bisect_(text1, text2, deadline); + + todo!() + } + + fn diff_half_match(&self, old: &[u8], new: &[u8]) -> Option<()> { + // Don't risk returning a suboptimal diff when we have unlimited time + if self.timeout() == u64::MAX { + return None; + } + + let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) }; + // pointless - two small for this algo + if long.len() < 4 || short.len() * 2 < long.len() { + return None; + } + + + None + } + + // returns the number of bytes common in both the str - this is the position in bytes not chars, [0 .. n] is your prefix + // We are doing a binary search here, and I've observed similar performance as noted by https://neil.fraser.name/news/2007/10/09/ + // Some benchmark code can be found in benches/prefix.rs + // Reverse prefix is suffix + // TODO: investigate this further + fn common_prefix(lhs: &[u8], rhs: &[u8], reverse: bool) -> usize { + if lhs.is_empty() || rhs.is_empty() || + (!reverse && (lhs.first() != rhs.first())) || + (reverse && (lhs.last() != rhs.last())) { + return 0; + } + + + let mut pointmin = 0; + let mut pointmax = lhs.len().min(rhs.len()); + let mut pointmid = pointmax; + + let mut pointstart = 0; + + while pointmin < pointmid { + let (lhsrange, rhsrange) = if !reverse { + (pointstart .. pointmid, pointstart .. pointmid) + } else { + (lhs.len() - pointmid .. lhs.len() - pointstart, rhs.len() - pointmid .. rhs.len() - pointstart) + }; + + if lhs[lhsrange] == rhs[rhsrange] { + pointmin = pointmid; + pointstart = pointmin; + } else { + pointmax = pointmid; + } + + pointmid = (pointmax - pointmin) / 2 + pointmin; + } + + pointmid + } + + } impl DiffMatchPatch { + /// Find the differences between two texts. Simplifies the problem by stripping any common prefix or suffix off the texts before diffing. + /// Args: + /// old: Old string to be diffed. + /// new: New string to be diffed. + /// deadline: Optional time when the diff should be complete by. Used + /// internally for recursive calls. Users should set DiffTimeout instead. + /// + /// Returns: + /// Vec of changes (Diff). pub fn diff_main(&self, old: &str, new: &str) -> Vec<Diff> { // First, check if lhs and rhs are equal if old == new { @@ -45,18 +200,240 @@ impl DiffMatchPatch { return Vec::new(); } - return vec![Diff::new(Ops::Equal, old)]; + return vec![Diff::new(Ops::Equal, old.as_bytes())]; } if old.is_empty() { - return vec![Diff::new(Ops::Insert, new)] + return vec![Diff::new(Ops::Insert, new.as_bytes())] } if new.is_empty() { - return vec![Diff::new(Ops::Delete, old)] + return vec![Diff::new(Ops::Delete, old.as_bytes())] } + let deadline = Instant::now().checked_add(Duration::from_secs(self.timeout())).unwrap(); + + let old_bytes = old.as_bytes(); + let new_bytes = new.as_bytes(); + + // Trim common prefix + let common_prefix = Self::common_prefix(old_bytes, new_bytes, false); + let common_suffix = Self::common_prefix(&old_bytes[common_prefix..], &new_bytes[common_prefix..], true); + + let diffs = self.diff_compute(&old_bytes[common_prefix .. old_bytes.len() - common_suffix], &new_bytes[common_prefix .. new_bytes.len() - common_suffix]); + + todo!() + } + + pub fn diff_cleanup_semantic(diffs: &mut [Diff]) { + todo!() + } + + pub fn diff_cleanup_efficiency(diffs: &mut [Diff]) { + todo!() + } + + pub fn diff_levenshtein(diffs: &[Diff]) -> u32 { + todo!() + } + + pub fn diff_pretty_html(diffs: &[Diff]) -> String { + todo!() + } + + pub fn match_main(text: &str, pattern: &str, loc: ()) -> () { + todo!() + } + + pub fn patch_make_text_text(text1: &str, text2: &str) -> Patches { + todo!() + } + + pub fn patch_make_diff(diffs: &[Diff]) -> Patches { + todo!() + } + pub fn patch_make_text_diff(text1: &str, diffs: &[Diff]) -> Patches { todo!() } + + pub fn patch_to_text(patches: Patches) -> String { + todo!() + } + + pub fn patch_from_text(text: &str) -> Patches { + todo!() + } + + pub fn patch_apply(patches: &[Patch], text: &str) -> (String, ()) { + todo!() + } +} + + +#[cfg(test)] +mod tests { + use crate::dmp::{Diff, Ops}; + + use super::DiffMatchPatch; + + // const tests = [ + // 'testDiffIsDestructurable', // TODO + // 'testDiffCommonOverlap', + // 'testDiffHalfMatch', + // 'testDiffLinesToChars', + // 'testDiffCharsToLines', + // 'testDiffCleanupMerge', + // 'testDiffCleanupSemanticLossless', + // 'testDiffCleanupSemantic', + // 'testDiffCleanupEfficiency', + // 'testDiffPrettyHtml', + // 'testDiffText', + // 'testDiffDelta', + // 'testDiffXIndex', + // 'testDiffLevenshtein', + // 'testDiffBisect', + // 'testMatchAlphabet', + // 'testMatchBitap', + // 'testMatchMain', + // 'testPatchObj', + // 'testPatchFromText', + // 'testPatchToText', + // 'testPatchAddContext', + // 'testPatchMake', + // 'testPatchSplitMax', + // 'testPatchAddPadding', + // 'testPatchApply' + // ]; + + + #[test] + fn test_diff_is_destructurable() { + + } + + #[test] + fn test_prefix() { + // Detect any common prefix. + // Null case. + assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false)); + + // Non-null case. + assert_eq!(4, DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false)); + + // Whole case. + assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false)); + } + + #[test] + fn test_suffix() { + // Detect any common suffix. + // Null case. + assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true)); + + // Non-null case. + assert_eq!(4, DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true)); + + // Whole case. + assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true)); + } + + #[test] + fn test_diff_main() { + let dmp = DiffMatchPatch::default(); + + // Perform a trivial diff. + // Null case. + assert!(dmp.diff_main("", "").is_empty()); + + // Equality + assert_eq!(vec![Diff::new(Ops::Equal, "abc".as_bytes())], dmp.diff_main("abc", "abc")); + + // Simple insert + assert_eq!( + vec![Diff::new(Ops::Equal, "ab".as_bytes()), Diff::new(Ops::Insert, "123".as_bytes()), Diff::new(Ops::Equal, "c".as_bytes())], + dmp.diff_main("abc", "ab123c") + ); + + // Simple delete + assert_eq!( + vec![Diff::new(Ops::Equal, "a".as_bytes()), Diff::new(Ops::Delete, "123".as_bytes()), Diff::new(Ops::Equal, "bc".as_bytes())], + dmp.diff_main("a123bc", "abc") + ); + + + +// // Two insertions. +// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false)); + +// // Two deletions. +// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false)); + +// // Perform a real diff. +// // Switch off the timeout. +// dmp.Diff_Timeout = 0; +// // Simple cases. +// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false)); + +// assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false)); + +// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false)); + +// // Overlaps. +// assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false)); + +// assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false)); + +// assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false)); + +// // Large equality. +// assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false)); + +// // Timeout. +// dmp.Diff_Timeout = 0.1; // 100ms +// var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n'; +// var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n'; +// // Increase the text lengths by 1024 times to ensure a timeout. +// for (var i = 0; i < 10; i++) { +// a += a; +// b += b; +// } +// var startTime = (new Date()).getTime(); +// dmp.diff_main(a, b); +// var endTime = (new Date()).getTime(); +// // Test that we took at least the timeout period. +// assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime); +// // Test that we didn't take forever (be forgiving). +// // Theoretically this test could fail very occasionally if the +// // OS task swaps or locks up for a second at the wrong moment. +// assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime); +// dmp.Diff_Timeout = 0; + +// // Test the linemode speedup. +// // Must be long to pass the 100 char cutoff. +// // Simple line-mode. +// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; +// b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n'; +// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)); + +// // Single line-mode. +// a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'; +// b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij'; +// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)); + +// // Overlap line-mode. +// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; +// b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n'; +// var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); +// var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); +// assertEquivalent(texts_textmode, texts_linemode); + +// // Test null inputs. +// try { +// dmp.diff_main(null, null); +// assertEquals(Error, null); +// } catch (e) { +// // Exception expected. +// } + } }
\ No newline at end of file |