Unnamed repository; edit this file 'description' to name the repository.
Optimise replacen_smolstr for single ascii replace
Alex Butler 6 months ago
parent b4ce652 · commit a2b3d25
-rw-r--r--lib/smol_str/CHANGELOG.md2
-rw-r--r--lib/smol_str/src/lib.rs35
-rw-r--r--lib/smol_str/tests/test.rs7
3 files changed, 44 insertions, 0 deletions
diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md
index 2577011ffe..c0193f6fcb 100644
--- a/lib/smol_str/CHANGELOG.md
+++ b/lib/smol_str/CHANGELOG.md
@@ -5,6 +5,8 @@
- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr`
~2x speedup inline, ~4-22x for heap.
- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap.
+- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace.
+ ~3x speedup inline, ~1.8x for heap (len=50).
## 0.3.2 - 2024-10-23
diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs
index 5ef6260f56..d55ba20522 100644
--- a/lib/smol_str/src/lib.rs
+++ b/lib/smol_str/src/lib.rs
@@ -715,6 +715,13 @@ impl StrExt for str {
#[inline]
fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr {
+ // Fast path for replacing a single ASCII character with another inline.
+ if let [from_u8] = from.as_bytes() {
+ if let [to_u8] = to.as_bytes() {
+ return replacen_1_ascii(self, *from_u8, *to_u8, count);
+ }
+ }
+
let mut result = SmolStrBuilder::new();
let mut last_end = 0;
for (start, part) in self.match_indices(from).take(count) {
@@ -731,6 +738,34 @@ impl StrExt for str {
}
}
+#[inline]
+fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr {
+ let mut replaced = 0;
+ let mut ascii_replace = |b: &u8| {
+ if *b == from && replaced != count {
+ replaced += 1;
+ to
+ } else {
+ *b
+ }
+ };
+ if src.len() <= INLINE_CAP {
+ let mut buf = [0u8; INLINE_CAP];
+ for (idx, b) in src.as_bytes().iter().enumerate() {
+ buf[idx] = ascii_replace(b);
+ }
+ SmolStr(Repr::Inline {
+ // SAFETY: `len` is in bounds
+ len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },
+ buf,
+ })
+ } else {
+ let out = src.as_bytes().iter().map(ascii_replace).collect();
+ // SAFETY: We replaced ascii with ascii on valid utf8 strings.
+ unsafe { String::from_utf8_unchecked(out).into() }
+ }
+}
+
/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.
#[inline]
fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {
diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs
index 0070b3a5ec..8f7d9ec39a 100644
--- a/lib/smol_str/tests/test.rs
+++ b/lib/smol_str/tests/test.rs
@@ -389,6 +389,13 @@ mod test_str_ext {
assert_eq!(result, "foo_dor_baz");
assert!(!result.is_heap_allocated());
}
+
+ #[test]
+ fn replacen_1_ascii() {
+ let result = "foo_bar_baz".replacen_smolstr("o", "u", 1);
+ assert_eq!(result, "fuo_bar_baz");
+ assert!(!result.is_heap_allocated());
+ }
}
#[cfg(feature = "borsh")]