my fork of dmp
Anubhab Bandyopadhyay 2024-08-04
parent 8fc7be0 · commit 9e9f686
-rw-r--r--Cargo.lock597
-rw-r--r--Cargo.toml7
-rw-r--r--benches/Bench.xlsxbin0 -> 14975 bytes
-rw-r--r--benches/prefix.rs72
-rw-r--r--src/dmp.rs393
5 files changed, 1061 insertions, 8 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 6e73aa2..bb2d6a4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,5 +3,602 @@
version = 3
[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
+
+[[package]]
+name = "autocfg"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
+
+[[package]]
+name = "bumpalo"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
+[[package]]
name = "diff-match-patch-rs"
version = "0.1.0"
+dependencies = [
+ "criterion",
+]
+
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+
+[[package]]
+name = "js-sys"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.155"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "log"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "oorandom"
+version = "11.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
+
+[[package]]
+name = "plotters"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
+
+[[package]]
+name = "ryu"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.204"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.204"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+
+[[package]]
+name = "web-sys"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
diff --git a/Cargo.toml b/Cargo.toml
index 6c47647..4a60b82 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,3 +4,10 @@ version = "0.1.0"
edition = "2021"
[dependencies]
+
+[dev-dependencies]
+criterion = "0"
+
+[[bench]]
+name = "prefix"
+harness = false \ No newline at end of file
diff --git a/benches/Bench.xlsx b/benches/Bench.xlsx
new file mode 100644
index 0000000..e0ec0b6
--- /dev/null
+++ b/benches/Bench.xlsx
Binary files differ
diff --git a/benches/prefix.rs b/benches/prefix.rs
new file mode 100644
index 0000000..4ea6650
--- /dev/null
+++ b/benches/prefix.rs
@@ -0,0 +1,72 @@
+use core::panic;
+use std::path::Path;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+fn prefix_linear(a: &str, b: &str, res: usize) {
+ let found = a.bytes()
+ .zip(b.bytes())
+ .take_while(|(a, b)| a == b )
+ .count();
+
+ if found != res {
+ panic!()
+ }
+}
+
+fn prefix_binary(a: &str, b: &str, res: usize) {
+ let a = a.as_bytes();
+ let b = b.as_bytes();
+
+ let mut pointmin = 0;
+ let mut pointmax = a.len().min(b.len());
+ let mut pointmid = pointmax;
+
+ let mut pointstart = 0;
+
+ while pointmin < pointmid {
+ if a[pointstart .. pointmid] ==
+ b[pointstart .. pointmid] {
+ pointmin = pointmid;
+ pointstart = pointmin;
+ } else {
+ pointmax = pointmid;
+ }
+
+ pointmid = (pointmax - pointmin) / 2 + pointmin;
+ }
+
+ if pointmid != res {
+ panic!("Not desired res")
+ }
+}
+
+fn create_data() -> Vec<(String, String, usize, usize)> {
+ let basedir = Path::new("testdata");
+ let data = [100, 1000, 10000, 100000, 1000000, 10000000]
+ .iter()
+ .map(|&n| {
+ let old = std::fs::read_to_string(basedir.join(format!("old_{n}.txt"))).unwrap();
+ let new = std::fs::read_to_string(basedir.join(format!("new_{n}.txt"))).unwrap();
+
+ let res = std::fs::read_to_string(basedir.join(format!("ans_{n}.txt"))).unwrap().parse::<usize>().unwrap();
+
+ (old, new, res, n)
+ }).collect();
+
+ data
+}
+
+pub fn prefix_bench(c: &mut Criterion) {
+ let d = create_data();
+
+ for (old, new, res, n) in d.iter() {
+ println!("For N={n}");
+ c.bench_function("prefix linear", |bencher| bencher.iter(|| prefix_linear(old, new, *res)));
+ c.bench_function("prefix binary", |bencher| bencher.iter(|| prefix_binary(old, new, *res)));
+ }
+}
+
+
+criterion_group!(prefix, prefix_bench);
+criterion_main!(prefix); \ No newline at end of file
diff --git a/src/dmp.rs b/src/dmp.rs
index 72af7cf..71f1c73 100644
--- a/src/dmp.rs
+++ b/src/dmp.rs
@@ -1,3 +1,5 @@
+use std::{time::{Duration, Instant}, u32};
+
/**
* The data structure representing a diff is an array of tuples:
@@ -6,9 +8,10 @@
*/
/// Enum representing the different ops of diff
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[repr(i8)]
pub enum Ops {
- Delete,
+ Delete = -1,
Insert,
Equal
}
@@ -22,22 +25,174 @@ pub struct Diff(Ops, String);
impl Diff {
/// Create a new diff object
- pub fn new(op: Ops, text: &str) -> Self {
- Self(op, text.to_string())
+ pub fn new(op: Ops, text: &[u8]) -> Self {
+ Self(op, String::from_utf8(text.to_vec()).unwrap())
}
}
+pub struct Patch {}
+
+pub type Patches = Vec<Patch>;
+
pub struct DiffMatchPatch {
- checklines: Option<bool>
+ /// a speedup flag, If present and false, then don't run
+ /// a line-level diff first to identify the changed areas.
+ /// Defaults to true, which does a faster, slightly less optimal diff.
+ checklines: Option<bool>,
+ /// A default timeout in num seconds, defaults to 1
+ timeout: Option<u64>
+}
+
+impl Default for DiffMatchPatch {
+ fn default() -> Self {
+ Self {
+ checklines: Some(true),
+ timeout: Some(1)
+ }
+ }
}
impl DiffMatchPatch {
fn checklines(&self) -> bool {
self.checklines.map_or(true, |c| c)
}
+
+ // returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout
+ fn timeout(&self) -> u64 {
+ self.timeout.map_or(u64::MAX, |tout| if tout > 0 { tout } else { u64::MAX })
+ }
+
+
+ fn diff_compute(&self, old: &[u8], new: &[u8]) -> Vec<Diff> {
+ // returning all of the new part
+ if old.is_empty() {
+ return vec![Diff::new(Ops::Insert, new)]
+ }
+
+ // return everything deleted
+ if new.is_empty() {
+ return vec![Diff::new(Ops::Delete, old)]
+ }
+
+ let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) };
+
+ let idx = long.windows(short.len()).step_by(1).position(|k| k == short);
+ // found a subsequence which contains the short text
+ if let Some(idx) = idx {
+ // Shorter text is inside the longer text (speedup).
+ let op = if old_gt_new { Ops::Delete } else { Ops::Insert };
+ let diffs = vec![
+ Diff::new(op, &long[0 .. idx]),
+ Diff::new(Ops::Equal, short),
+ Diff::new(op, &long[idx .. short.len()])
+ ];
+
+ return diffs;
+ }
+
+ if short.len() == 1 {
+ // After previous case, this can't be an equality
+ return vec![Diff::new(Ops::Delete, old), Diff::new(Ops::Insert, new)];
+ }
+
+ // Check if the problem can be split in two
+ if let Some(half_match) = self.diff_half_match(old, new) {
+
+ }
+ // // Check to see if the problem can be split in two.
+ // var hm = this.diff_halfMatch_(text1, text2);
+ // if (hm) {
+ // // A half-match was found, sort out the return data.
+ // var text1_a = hm[0];
+ // var text1_b = hm[1];
+ // var text2_a = hm[2];
+ // var text2_b = hm[3];
+ // var mid_common = hm[4];
+ // // Send both pairs off for separate processing.
+ // var diffs_a = this.diff_main(text1_a, text2_a, checklines, deadline);
+ // var diffs_b = this.diff_main(text1_b, text2_b, checklines, deadline);
+ // // Merge the results.
+ // return diffs_a.concat([new diff_match_patch.Diff(DIFF_EQUAL, mid_common)],
+ // diffs_b);
+ // }
+
+ // if (checklines && text1.length > 100 && text2.length > 100) {
+ // return this.diff_lineMode_(text1, text2, deadline);
+ // }
+
+ // return this.diff_bisect_(text1, text2, deadline);
+
+ todo!()
+ }
+
+ fn diff_half_match(&self, old: &[u8], new: &[u8]) -> Option<()> {
+ // Don't risk returning a suboptimal diff when we have unlimited time
+ if self.timeout() == u64::MAX {
+ return None;
+ }
+
+ let (long, short, old_gt_new) = if old.len() > new.len() { (old, new, true) } else { (new, old, false) };
+ // pointless - two small for this algo
+ if long.len() < 4 || short.len() * 2 < long.len() {
+ return None;
+ }
+
+
+ None
+ }
+
+ // returns the number of bytes common in both the str - this is the position in bytes not chars, [0 .. n] is your prefix
+ // We are doing a binary search here, and I've observed similar performance as noted by https://neil.fraser.name/news/2007/10/09/
+ // Some benchmark code can be found in benches/prefix.rs
+ // Reverse prefix is suffix
+ // TODO: investigate this further
+ fn common_prefix(lhs: &[u8], rhs: &[u8], reverse: bool) -> usize {
+ if lhs.is_empty() || rhs.is_empty() ||
+ (!reverse && (lhs.first() != rhs.first())) ||
+ (reverse && (lhs.last() != rhs.last())) {
+ return 0;
+ }
+
+
+ let mut pointmin = 0;
+ let mut pointmax = lhs.len().min(rhs.len());
+ let mut pointmid = pointmax;
+
+ let mut pointstart = 0;
+
+ while pointmin < pointmid {
+ let (lhsrange, rhsrange) = if !reverse {
+ (pointstart .. pointmid, pointstart .. pointmid)
+ } else {
+ (lhs.len() - pointmid .. lhs.len() - pointstart, rhs.len() - pointmid .. rhs.len() - pointstart)
+ };
+
+ if lhs[lhsrange] == rhs[rhsrange] {
+ pointmin = pointmid;
+ pointstart = pointmin;
+ } else {
+ pointmax = pointmid;
+ }
+
+ pointmid = (pointmax - pointmin) / 2 + pointmin;
+ }
+
+ pointmid
+ }
+
+
}
impl DiffMatchPatch {
+ /// Find the differences between two texts. Simplifies the problem by stripping any common prefix or suffix off the texts before diffing.
+ /// Args:
+ /// old: Old string to be diffed.
+ /// new: New string to be diffed.
+ /// deadline: Optional time when the diff should be complete by. Used
+ /// internally for recursive calls. Users should set DiffTimeout instead.
+ ///
+ /// Returns:
+ /// Vec of changes (Diff).
pub fn diff_main(&self, old: &str, new: &str) -> Vec<Diff> {
// First, check if lhs and rhs are equal
if old == new {
@@ -45,18 +200,240 @@ impl DiffMatchPatch {
return Vec::new();
}
- return vec![Diff::new(Ops::Equal, old)];
+ return vec![Diff::new(Ops::Equal, old.as_bytes())];
}
if old.is_empty() {
- return vec![Diff::new(Ops::Insert, new)]
+ return vec![Diff::new(Ops::Insert, new.as_bytes())]
}
if new.is_empty() {
- return vec![Diff::new(Ops::Delete, old)]
+ return vec![Diff::new(Ops::Delete, old.as_bytes())]
}
+ let deadline = Instant::now().checked_add(Duration::from_secs(self.timeout())).unwrap();
+
+ let old_bytes = old.as_bytes();
+ let new_bytes = new.as_bytes();
+
+ // Trim common prefix
+ let common_prefix = Self::common_prefix(old_bytes, new_bytes, false);
+ let common_suffix = Self::common_prefix(&old_bytes[common_prefix..], &new_bytes[common_prefix..], true);
+
+ let diffs = self.diff_compute(&old_bytes[common_prefix .. old_bytes.len() - common_suffix], &new_bytes[common_prefix .. new_bytes.len() - common_suffix]);
+
+ todo!()
+ }
+
+ pub fn diff_cleanup_semantic(diffs: &mut [Diff]) {
+ todo!()
+ }
+
+ pub fn diff_cleanup_efficiency(diffs: &mut [Diff]) {
+ todo!()
+ }
+
+ pub fn diff_levenshtein(diffs: &[Diff]) -> u32 {
+ todo!()
+ }
+
+ pub fn diff_pretty_html(diffs: &[Diff]) -> String {
+ todo!()
+ }
+
+ pub fn match_main(text: &str, pattern: &str, loc: ()) -> () {
+ todo!()
+ }
+
+ pub fn patch_make_text_text(text1: &str, text2: &str) -> Patches {
+ todo!()
+ }
+
+ pub fn patch_make_diff(diffs: &[Diff]) -> Patches {
+ todo!()
+ }
+ pub fn patch_make_text_diff(text1: &str, diffs: &[Diff]) -> Patches {
todo!()
}
+
+ pub fn patch_to_text(patches: Patches) -> String {
+ todo!()
+ }
+
+ pub fn patch_from_text(text: &str) -> Patches {
+ todo!()
+ }
+
+ pub fn patch_apply(patches: &[Patch], text: &str) -> (String, ()) {
+ todo!()
+ }
+}
+
+
+#[cfg(test)]
+mod tests {
+ use crate::dmp::{Diff, Ops};
+
+ use super::DiffMatchPatch;
+
+ // const tests = [
+ // 'testDiffIsDestructurable', // TODO
+ // 'testDiffCommonOverlap',
+ // 'testDiffHalfMatch',
+ // 'testDiffLinesToChars',
+ // 'testDiffCharsToLines',
+ // 'testDiffCleanupMerge',
+ // 'testDiffCleanupSemanticLossless',
+ // 'testDiffCleanupSemantic',
+ // 'testDiffCleanupEfficiency',
+ // 'testDiffPrettyHtml',
+ // 'testDiffText',
+ // 'testDiffDelta',
+ // 'testDiffXIndex',
+ // 'testDiffLevenshtein',
+ // 'testDiffBisect',
+ // 'testMatchAlphabet',
+ // 'testMatchBitap',
+ // 'testMatchMain',
+ // 'testPatchObj',
+ // 'testPatchFromText',
+ // 'testPatchToText',
+ // 'testPatchAddContext',
+ // 'testPatchMake',
+ // 'testPatchSplitMax',
+ // 'testPatchAddPadding',
+ // 'testPatchApply'
+ // ];
+
+
+ #[test]
+ fn test_diff_is_destructurable() {
+
+ }
+
+ #[test]
+ fn test_prefix() {
+ // Detect any common prefix.
+ // Null case.
+ assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false));
+
+ // Non-null case.
+ assert_eq!(4, DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false));
+
+ // Whole case.
+ assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false));
+ }
+
+ #[test]
+ fn test_suffix() {
+ // Detect any common suffix.
+ // Null case.
+ assert_eq!(0, DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true));
+
+ // Non-null case.
+ assert_eq!(4, DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true));
+
+ // Whole case.
+ assert_eq!(4, DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true));
+ }
+
+ #[test]
+ fn test_diff_main() {
+ let dmp = DiffMatchPatch::default();
+
+ // Perform a trivial diff.
+ // Null case.
+ assert!(dmp.diff_main("", "").is_empty());
+
+ // Equality
+ assert_eq!(vec![Diff::new(Ops::Equal, "abc".as_bytes())], dmp.diff_main("abc", "abc"));
+
+ // Simple insert
+ assert_eq!(
+ vec![Diff::new(Ops::Equal, "ab".as_bytes()), Diff::new(Ops::Insert, "123".as_bytes()), Diff::new(Ops::Equal, "c".as_bytes())],
+ dmp.diff_main("abc", "ab123c")
+ );
+
+ // Simple delete
+ assert_eq!(
+ vec![Diff::new(Ops::Equal, "a".as_bytes()), Diff::new(Ops::Delete, "123".as_bytes()), Diff::new(Ops::Equal, "bc".as_bytes())],
+ dmp.diff_main("a123bc", "abc")
+ );
+
+
+
+// // Two insertions.
+// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false));
+
+// // Two deletions.
+// assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false));
+
+// // Perform a real diff.
+// // Switch off the timeout.
+// dmp.Diff_Timeout = 0;
+// // Simple cases.
+// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false));
+
+// assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false));
+
+// assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false));
+
+// // Overlaps.
+// assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false));
+
+// assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false));
+
+// assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false));
+
+// // Large equality.
+// assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false));
+
+// // Timeout.
+// dmp.Diff_Timeout = 0.1; // 100ms
+// var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n';
+// var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n';
+// // Increase the text lengths by 1024 times to ensure a timeout.
+// for (var i = 0; i < 10; i++) {
+// a += a;
+// b += b;
+// }
+// var startTime = (new Date()).getTime();
+// dmp.diff_main(a, b);
+// var endTime = (new Date()).getTime();
+// // Test that we took at least the timeout period.
+// assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime);
+// // Test that we didn't take forever (be forgiving).
+// // Theoretically this test could fail very occasionally if the
+// // OS task swaps or locks up for a second at the wrong moment.
+// assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime);
+// dmp.Diff_Timeout = 0;
+
+// // Test the linemode speedup.
+// // Must be long to pass the 100 char cutoff.
+// // Simple line-mode.
+// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
+// b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n';
+// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
+
+// // Single line-mode.
+// a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890';
+// b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij';
+// assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true));
+
+// // Overlap line-mode.
+// a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n';
+// b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n';
+// var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true));
+// var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false));
+// assertEquivalent(texts_textmode, texts_linemode);
+
+// // Test null inputs.
+// try {
+// dmp.diff_main(null, null);
+// assertEquals(Error, null);
+// } catch (e) {
+// // Exception expected.
+// }
+ }
} \ No newline at end of file