Unnamed repository; edit this file 'description' to name the repository.
Add SmolStr vs String benchmarks
Alex Butler 6 months ago
parent f8612bc · commit 09ecb46
-rw-r--r--lib/smol_str/Cargo.toml9
-rw-r--r--lib/smol_str/README.md6
-rw-r--r--lib/smol_str/benches/bench.rs157
3 files changed, 172 insertions, 0 deletions
diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml
index d938e40ac2..e6f10a2715 100644
--- a/lib/smol_str/Cargo.toml
+++ b/lib/smol_str/Cargo.toml
@@ -20,8 +20,17 @@ arbitrary = { version = "1.3", optional = true }
proptest = "1.5"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
+criterion = "0.7"
+rand = "0.9.2"
[features]
default = ["std"]
std = ["serde_core?/std", "borsh?/std"]
serde = ["dep:serde_core"]
+
+[[bench]]
+name = "bench"
+harness = false
+
+[profile.bench]
+lto = "fat"
diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md
index ce16759e81..56296fb53f 100644
--- a/lib/smol_str/README.md
+++ b/lib/smol_str/README.md
@@ -22,6 +22,12 @@ languages. Strings consisting of a series of newlines, followed by a series of
whitespace are a typical pattern in computer programs because of indentation.
Note that a specialized interner might be a better solution for some use cases.
+## Benchmarks
+Run criterion benches with
+```sh
+cargo bench --bench \* -- --quick
+```
+
## MSRV Policy
Minimal Supported Rust Version: latest stable.
diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs
new file mode 100644
index 0000000000..fa4c58832d
--- /dev/null
+++ b/lib/smol_str/benches/bench.rs
@@ -0,0 +1,157 @@
+//! SmolStr vs String benchmarks.
+use criterion::{criterion_group, criterion_main, Criterion};
+use rand::distr::{Alphanumeric, SampleString};
+use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr};
+use std::hint::black_box;
+
+/// 12: small (inline)
+/// 50: medium (heap)
+/// 1000: large (heap)
+const TEST_LENS: [usize; 3] = [12, 50, 1000];
+
+fn format_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let n = rand::random_range(10000..99999);
+ let str_len = len.checked_sub(n.to_smolstr().len()).unwrap();
+ let str = Alphanumeric.sample_string(&mut rand::rng(), str_len);
+
+ c.bench_function(&format!("SmolStr format_smolstr! len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = format_smolstr!("{str}-{n}"));
+ assert_eq!(v, format!("{str}-{n}"));
+ });
+ c.bench_function(&format!("std format! len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = format!("{str}-{n}"));
+ assert_eq!(v, format!("{str}-{n}"));
+ });
+ }
+}
+
+fn from_str_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+
+ c.bench_function(&format!("SmolStr::from len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = SmolStr::from(black_box(&str)));
+ assert_eq!(v, str);
+ });
+ c.bench_function(&format!("std String::from len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = String::from(black_box(&str)));
+ assert_eq!(v, str);
+ });
+ }
+}
+
+fn clone_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let smolstr = SmolStr::new(&str);
+
+ c.bench_function(&format!("SmolStr::clone len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = smolstr.clone());
+ assert_eq!(v, str);
+ });
+ c.bench_function(&format!("std String::clone len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.clone());
+ assert_eq!(v, str);
+ });
+ }
+}
+
+fn eq_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let smolstr = SmolStr::new(&str);
+
+ c.bench_function(&format!("SmolStr::eq len={len}"), |b| {
+ let mut v = false;
+ b.iter(|| v = smolstr == black_box(&str));
+ assert!(v);
+ });
+ c.bench_function(&format!("std String::eq len={len}"), |b| {
+ let mut v = false;
+ b.iter(|| v = &str == black_box(&str));
+ assert!(v);
+ });
+ }
+}
+
+fn to_lowercase_bench(c: &mut Criterion) {
+ const END_CHAR: char = 'İ';
+
+ for len in TEST_LENS {
+ // mostly ascii seq with some non-ascii at the end
+ let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8());
+ str.push(END_CHAR);
+ let str = str.as_str();
+
+ c.bench_function(&format!("SmolStr to_lowercase_smolstr len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_lowercase_smolstr());
+ assert_eq!(v, str.to_lowercase());
+ });
+ c.bench_function(&format!("std to_lowercase len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_lowercase());
+ assert_eq!(v, str.to_lowercase());
+ });
+ }
+}
+
+fn to_ascii_lowercase_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let str = str.as_str();
+
+ c.bench_function(
+ &format!("SmolStr to_ascii_lowercase_smolstr len={len}"),
+ |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_ascii_lowercase_smolstr());
+ assert_eq!(v, str.to_ascii_lowercase());
+ },
+ );
+ c.bench_function(&format!("std to_ascii_lowercase len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_ascii_lowercase());
+ assert_eq!(v, str.to_ascii_lowercase());
+ });
+ }
+}
+
+fn replace_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2)
+ + "-"
+ + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2);
+ let str = s_dash_s.as_str();
+
+ c.bench_function(&format!("SmolStr replace_smolstr len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.replace_smolstr("-", "_"));
+ assert_eq!(v, str.replace("-", "_"));
+ });
+ c.bench_function(&format!("std replace len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.replace("-", "_"));
+ assert_eq!(v, str.replace("-", "_"));
+ });
+ }
+}
+
+criterion_group!(
+ benches,
+ format_bench,
+ from_str_bench,
+ clone_bench,
+ eq_bench,
+ to_lowercase_bench,
+ to_ascii_lowercase_bench,
+ replace_bench,
+);
+criterion_main!(benches);