Unnamed repository; edit this file 'description' to name the repository.
Merge remote-tracking branch 'smol_str/master' into rebuild-master-again
Laurențiu Nicola 6 months ago
parent c823b06 · parent 2fcccbb · commit 13cfdb8
-rw-r--r--lib/smol_str/.github/ci.rs127
-rw-r--r--lib/smol_str/.github/workflows/ci.yaml36
-rw-r--r--lib/smol_str/.gitignore4
-rw-r--r--lib/smol_str/CHANGELOG.md38
-rw-r--r--lib/smol_str/Cargo.toml37
-rw-r--r--lib/smol_str/LICENSE-APACHE201
-rw-r--r--lib/smol_str/LICENSE-MIT23
-rw-r--r--lib/smol_str/README.md35
-rw-r--r--lib/smol_str/benches/bench.rs118
-rw-r--r--lib/smol_str/bors.toml2
-rw-r--r--lib/smol_str/src/borsh.rs40
-rw-r--r--lib/smol_str/src/lib.rs998
-rw-r--r--lib/smol_str/src/serde.rs97
-rw-r--r--lib/smol_str/tests/test.rs451
-rw-r--r--lib/smol_str/tests/tidy.rs47
15 files changed, 2254 insertions, 0 deletions
diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs
new file mode 100644
index 0000000000..c594e8973c
--- /dev/null
+++ b/lib/smol_str/.github/ci.rs
@@ -0,0 +1,127 @@
+use std::{
+ env, fs,
+ process::{self, Command, ExitStatus, Stdio},
+ time::Instant,
+};
+
+type Error = Box<dyn std::error::Error>;
+type Result<T> = std::result::Result<T, Error>;
+
+fn main() {
+ if let Err(err) = try_main() {
+ eprintln!("{}", err);
+ process::exit(1);
+ }
+}
+
+fn try_main() -> Result<()> {
+ let cwd = env::current_dir()?;
+ let cargo_toml = cwd.join("Cargo.toml");
+ assert!(
+ cargo_toml.exists(),
+ "Cargo.toml not found, cwd: {}",
+ cwd.display()
+ );
+
+ {
+ let _s = Section::new("BUILD_NO_DEFAULT_FEATURES");
+ shell("cargo test --all-features --workspace --no-run --no-default-features")?;
+ }
+
+ {
+ let _s = Section::new("BUILD");
+ shell("cargo test --all-features --workspace --no-run")?;
+ }
+
+ {
+ let _s = Section::new("TEST");
+ shell("cargo test --all-features --workspace")?;
+ shell("cargo test --no-default-features --workspace")?;
+ }
+
+ {
+ let _s = Section::new("TEST_BENCHES");
+ shell("cargo test --benches --all-features")?;
+ }
+
+ let current_branch = shell_output("git branch --show-current")?;
+ if &current_branch == "master" {
+ let _s = Section::new("PUBLISH");
+ let manifest = fs::read_to_string(&cargo_toml)?;
+ let version = get_field(&manifest, "version")?;
+ let tag = format!("v{}", version);
+ let tags = shell_output("git tag --list")?;
+
+ if !tags.contains(&tag) {
+ let token = env::var("CRATES_IO_TOKEN").unwrap();
+ shell(&format!("git tag v{}", version))?;
+ shell(&format!("cargo publish --token {}", token))?;
+ shell("git push --tags")?;
+ }
+ }
+ Ok(())
+}
+
+fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> {
+ for line in text.lines() {
+ let words = line.split_ascii_whitespace().collect::<Vec<_>>();
+ match words.as_slice() {
+ [n, "=", v, ..] if n.trim() == name => {
+ assert!(v.starts_with('"') && v.ends_with('"'));
+ return Ok(&v[1..v.len() - 1]);
+ }
+ _ => (),
+ }
+ }
+ Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))?
+}
+
+fn shell(cmd: &str) -> Result<()> {
+ let status = command(cmd).status()?;
+ check_status(status)
+}
+
+fn shell_output(cmd: &str) -> Result<String> {
+ let output = command(cmd).stderr(Stdio::inherit()).output()?;
+ check_status(output.status)?;
+ let res = String::from_utf8(output.stdout)?;
+ let res = res.trim().to_string();
+ println!("{}", res);
+ Ok(res)
+}
+
+fn command(cmd: &str) -> Command {
+ eprintln!("> {}", cmd);
+ let words = cmd.split_ascii_whitespace().collect::<Vec<_>>();
+ let (cmd, args) = words.split_first().unwrap();
+ let mut res = Command::new(cmd);
+ res.args(args);
+ res
+}
+
+fn check_status(status: ExitStatus) -> Result<()> {
+ if !status.success() {
+ Err(format!("$status: {}", status))?;
+ }
+ Ok(())
+}
+
+struct Section {
+ name: &'static str,
+ start: Instant,
+}
+
+impl Section {
+ fn new(name: &'static str) -> Section {
+ println!("::group::{}", name);
+ let start = Instant::now();
+ Section { name, start }
+ }
+}
+
+impl Drop for Section {
+ fn drop(&mut self) {
+ eprintln!("{}: {:.2?}", self.name, self.start.elapsed());
+ println!("::endgroup::");
+ }
+}
diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml
new file mode 100644
index 0000000000..1c2e347374
--- /dev/null
+++ b/lib/smol_str/.github/workflows/ci.yaml
@@ -0,0 +1,36 @@
+name: CI
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+ - staging
+ - trying
+
+env:
+ CARGO_INCREMENTAL: 0
+ CARGO_NET_RETRY: 10
+ CI: 1
+ RUST_BACKTRACE: short
+ RUSTFLAGS: -D warnings
+ RUSTUP_MAX_RETRIES: 10
+
+jobs:
+ rust:
+ name: Rust
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Install Rust toolchain
+ uses: actions-rust-lang/setup-rust-toolchain@v1
+ with:
+ cache: false
+
+ - run: rustc ./.github/ci.rs && ./ci
+ env:
+ CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore
new file mode 100644
index 0000000000..0c8227b253
--- /dev/null
+++ b/lib/smol_str/.gitignore
@@ -0,0 +1,4 @@
+/target
+/ci
+/.vscode
+Cargo.lock
diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md
new file mode 100644
index 0000000000..fb65d88ad1
--- /dev/null
+++ b/lib/smol_str/CHANGELOG.md
@@ -0,0 +1,38 @@
+# Changelog
+
+## Unreleased
+
+## 0.3.4 - 2025-10-23
+
+- Added `rust-version` field to `Cargo.toml`
+
+## 0.3.3 - 2025-10-23
+
+- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr`
+ ~2x speedup inline, ~4-22x for heap.
+- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap.
+- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace,
+ ~3x speedup inline & heap.
+
+## 0.3.2 - 2024-10-23
+
+- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a
+ multibyte character push
+
+## 0.3.1 - 2024-09-04
+
+- Fix `SmolStrBuilder` leaking implementation details
+
+## 0.3.0 - 2024-09-04
+
+- Remove deprecated `SmolStr::new_inline_from_ascii` function
+- Remove `SmolStr::to_string` in favor of `ToString::to_string`
+- Add `impl AsRef<[u8]> for SmolStr` impl
+- Add `impl AsRef<OsStr> for SmolStr` impl
+- Add `impl AsRef<Path> for SmolStr` impl
+- Add `SmolStrBuilder`
+
+## 0.2.2 - 2024-05-14
+
+- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar
+- Add `PartialEq` optimization for `ptr_eq`-able representations
diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml
new file mode 100644
index 0000000000..4752a84ed4
--- /dev/null
+++ b/lib/smol_str/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "smol_str"
+version = "0.3.4"
+description = "small-string optimized string type with O(1) clone"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-analyzer/smol_str"
+authors = ["Aleksey Kladov <[email protected]>", "Lukas Wirth <[email protected]>"]
+edition = "2021"
+rust-version = "1.89"
+
+[package.metadata.docs.rs]
+rustdoc-args = ["--cfg", "docsrs"]
+all-features = true
+
+[dependencies]
+serde_core = { version = "1.0.220", optional = true, default-features = false }
+borsh = { version = "1.4.0", optional = true, default-features = false }
+arbitrary = { version = "1.3", optional = true }
+
+[dev-dependencies]
+proptest = "1.5"
+serde_json = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+criterion = "0.7"
+rand = "0.9.2"
+
+[features]
+default = ["std"]
+std = ["serde_core?/std", "borsh?/std"]
+serde = ["dep:serde_core"]
+
+[[bench]]
+name = "bench"
+harness = false
+
+[profile.bench]
+lto = "fat"
diff --git a/lib/smol_str/LICENSE-APACHE b/lib/smol_str/LICENSE-APACHE
new file mode 100644
index 0000000000..16fe87b06e
--- /dev/null
+++ b/lib/smol_str/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/lib/smol_str/LICENSE-MIT b/lib/smol_str/LICENSE-MIT
new file mode 100644
index 0000000000..31aa79387f
--- /dev/null
+++ b/lib/smol_str/LICENSE-MIT
@@ -0,0 +1,23 @@
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md
new file mode 100644
index 0000000000..56296fb53f
--- /dev/null
+++ b/lib/smol_str/README.md
@@ -0,0 +1,35 @@
+# smol_str
+
+[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI)
+[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str)
+[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/)
+
+
+A `SmolStr` is a string type that has the following properties:
+
+* `size_of::<SmolStr>() == 24` (therefore `== size_of::<String>()` on 64 bit platforms)
+* `Clone` is `O(1)`
+* Strings are stack-allocated if they are:
+ * Up to 23 bytes long
+ * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist
+ solely of consecutive newlines, followed by consecutive spaces
+* If a string does not satisfy the aforementioned conditions, it is heap-allocated
+* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
+
+Unlike `String`, however, `SmolStr` is immutable. The primary use case for
+`SmolStr` is a good enough default storage for tokens of typical programming
+languages. Strings consisting of a series of newlines, followed by a series of
+whitespace are a typical pattern in computer programs because of indentation.
+Note that a specialized interner might be a better solution for some use cases.
+
+## Benchmarks
+Run criterion benches with
+```sh
+cargo bench --bench \* -- --quick
+```
+
+## MSRV Policy
+
+Minimal Supported Rust Version: latest stable.
+
+Bumping MSRV is not considered a semver-breaking change.
diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs
new file mode 100644
index 0000000000..2643b02557
--- /dev/null
+++ b/lib/smol_str/benches/bench.rs
@@ -0,0 +1,118 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use rand::distr::{Alphanumeric, SampleString};
+use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr};
+use std::hint::black_box;
+
+/// 12: small (inline)
+/// 50: medium (heap)
+/// 1000: large (heap)
+const TEST_LENS: [usize; 3] = [12, 50, 1000];
+
+fn format_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let n = rand::random_range(10000..99999);
+ let str_len = len.checked_sub(n.to_smolstr().len()).unwrap();
+ let str = Alphanumeric.sample_string(&mut rand::rng(), str_len);
+
+ c.bench_function(&format!("format_smolstr! len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = format_smolstr!("{str}-{n}"));
+ assert_eq!(v, format!("{str}-{n}"));
+ });
+ }
+}
+
+fn from_str_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+
+ c.bench_function(&format!("SmolStr::from len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = SmolStr::from(black_box(&str)));
+ assert_eq!(v, str);
+ });
+ }
+}
+
+fn clone_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let smolstr = SmolStr::new(&str);
+
+ c.bench_function(&format!("SmolStr::clone len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = smolstr.clone());
+ assert_eq!(v, str);
+ });
+ }
+}
+
+fn eq_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let smolstr = SmolStr::new(&str);
+
+ c.bench_function(&format!("SmolStr::eq len={len}"), |b| {
+ let mut v = false;
+ b.iter(|| v = smolstr == black_box(&str));
+ assert!(v);
+ });
+ }
+}
+
+fn to_lowercase_bench(c: &mut Criterion) {
+ const END_CHAR: char = 'İ';
+
+ for len in TEST_LENS {
+ // mostly ascii seq with some non-ascii at the end
+ let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8());
+ str.push(END_CHAR);
+ let str = str.as_str();
+
+ c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_lowercase_smolstr());
+ assert_eq!(v, str.to_lowercase());
+ });
+ }
+}
+
+fn to_ascii_lowercase_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);
+ let str = str.as_str();
+
+ c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.to_ascii_lowercase_smolstr());
+ assert_eq!(v, str.to_ascii_lowercase());
+ });
+ }
+}
+
+fn replace_bench(c: &mut Criterion) {
+ for len in TEST_LENS {
+ let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2)
+ + "-"
+ + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2);
+ let str = s_dash_s.as_str();
+
+ c.bench_function(&format!("replace_smolstr len={len}"), |b| {
+ let mut v = <_>::default();
+ b.iter(|| v = str.replace_smolstr("-", "_"));
+ assert_eq!(v, str.replace("-", "_"));
+ });
+ }
+}
+
+criterion_group!(
+ benches,
+ format_bench,
+ from_str_bench,
+ clone_bench,
+ eq_bench,
+ to_lowercase_bench,
+ to_ascii_lowercase_bench,
+ replace_bench,
+);
+criterion_main!(benches);
diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml
new file mode 100644
index 0000000000..b92b99ac30
--- /dev/null
+++ b/lib/smol_str/bors.toml
@@ -0,0 +1,2 @@
+status = [ "Rust" ]
+delete_merged_branches = true
diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs
new file mode 100644
index 0000000000..ebb20d71a0
--- /dev/null
+++ b/lib/smol_str/src/borsh.rs
@@ -0,0 +1,40 @@
+use crate::{Repr, SmolStr, INLINE_CAP};
+use alloc::string::{String, ToString};
+use borsh::{
+ io::{Error, ErrorKind, Read, Write},
+ BorshDeserialize, BorshSerialize,
+};
+use core::mem::transmute;
+
+impl BorshSerialize for SmolStr {
+ fn serialize<W: Write>(&self, writer: &mut W) -> borsh::io::Result<()> {
+ self.as_str().serialize(writer)
+ }
+}
+
+impl BorshDeserialize for SmolStr {
+ #[inline]
+ fn deserialize_reader<R: Read>(reader: &mut R) -> borsh::io::Result<Self> {
+ let len = u32::deserialize_reader(reader)?;
+ if (len as usize) < INLINE_CAP {
+ let mut buf = [0u8; INLINE_CAP];
+ reader.read_exact(&mut buf[..len as usize])?;
+ _ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| {
+ let msg = err.to_string();
+ Error::new(ErrorKind::InvalidData, msg)
+ })?;
+ Ok(SmolStr(Repr::Inline {
+ len: unsafe { transmute::<u8, crate::InlineSize>(len as u8) },
+ buf,
+ }))
+ } else {
+ // u8::vec_from_reader always returns Some on success in current implementation
+ let vec = u8::vec_from_reader(len, reader)?
+ .ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?;
+ Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| {
+ let msg = err.to_string();
+ Error::new(ErrorKind::InvalidData, msg)
+ })?))
+ }
+ }
+}
diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs
new file mode 100644
index 0000000000..ded07c61c6
--- /dev/null
+++ b/lib/smol_str/src/lib.rs
@@ -0,0 +1,998 @@
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+extern crate alloc;
+
+use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};
+use core::{
+ borrow::Borrow,
+ cmp::{self, Ordering},
+ convert::Infallible,
+ fmt, hash, iter, mem, ops,
+ str::FromStr,
+};
+
+/// A `SmolStr` is a string type that has the following properties:
+///
+/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)
+/// * `Clone` is `O(1)`
+/// * Strings are stack-allocated if they are:
+/// * Up to 23 bytes long
+/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist
+/// solely of consecutive newlines, followed by consecutive spaces
+/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
+/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
+///
+/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
+/// `SmolStr` is a good enough default storage for tokens of typical programming
+/// languages. Strings consisting of a series of newlines, followed by a series of
+/// whitespace are a typical pattern in computer programs because of indentation.
+/// Note that a specialized interner might be a better solution for some use cases.
+///
+/// `WS`: A string of 32 newlines followed by 128 spaces.
+pub struct SmolStr(Repr);
+
+impl SmolStr {
+ /// Constructs an inline variant of `SmolStr`.
+ ///
+ /// This never allocates.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `text.len() > 23`.
+ #[inline]
+ pub const fn new_inline(text: &str) -> SmolStr {
+ assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop
+
+ let text = text.as_bytes();
+ let mut buf = [0; INLINE_CAP];
+ let mut i = 0;
+ while i < text.len() {
+ buf[i] = text[i];
+ i += 1
+ }
+ SmolStr(Repr::Inline {
+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
+ // as we asserted it.
+ len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) },
+ buf,
+ })
+ }
+
+ /// Constructs a `SmolStr` from a statically allocated string.
+ ///
+ /// This never allocates.
+ #[inline(always)]
+ pub const fn new_static(text: &'static str) -> SmolStr {
+ // NOTE: this never uses the inline storage; if a canonical
+ // representation is needed, we could check for `len() < INLINE_CAP`
+ // and call `new_inline`, but this would mean an extra branch.
+ SmolStr(Repr::Static(text))
+ }
+
+ /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary.
+ #[inline(always)]
+ pub fn new(text: impl AsRef<str>) -> SmolStr {
+ SmolStr(Repr::new(text.as_ref()))
+ }
+
+ /// Returns a `&str` slice of this `SmolStr`.
+ #[inline(always)]
+ pub fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+
+ /// Returns the length of `self` in bytes.
+ #[inline(always)]
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Returns `true` if `self` has a length of zero bytes.
+ #[inline(always)]
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns `true` if `self` is heap-allocated.
+ #[inline(always)]
+ pub const fn is_heap_allocated(&self) -> bool {
+ matches!(self.0, Repr::Heap(..))
+ }
+}
+
+impl Clone for SmolStr {
+ #[inline]
+ fn clone(&self) -> Self {
+ if !self.is_heap_allocated() {
+ // SAFETY: We verified that the payload of `Repr` is a POD
+ return unsafe { core::ptr::read(self as *const SmolStr) };
+ }
+ Self(self.0.clone())
+ }
+}
+
+impl Default for SmolStr {
+ #[inline(always)]
+ fn default() -> SmolStr {
+ SmolStr(Repr::Inline {
+ len: InlineSize::_V0,
+ buf: [0; INLINE_CAP],
+ })
+ }
+}
+
+impl ops::Deref for SmolStr {
+ type Target = str;
+
+ #[inline(always)]
+ fn deref(&self) -> &str {
+ self.as_str()
+ }
+}
+
+// region: PartialEq implementations
+
+impl Eq for SmolStr {}
+impl PartialEq<SmolStr> for SmolStr {
+ fn eq(&self, other: &SmolStr) -> bool {
+ self.0.ptr_eq(&other.0) || self.as_str() == other.as_str()
+ }
+}
+
+impl PartialEq<str> for SmolStr {
+ #[inline(always)]
+ fn eq(&self, other: &str) -> bool {
+ self.as_str() == other
+ }
+}
+
+impl PartialEq<SmolStr> for str {
+ #[inline(always)]
+ fn eq(&self, other: &SmolStr) -> bool {
+ other == self
+ }
+}
+
+impl<'a> PartialEq<&'a str> for SmolStr {
+ #[inline(always)]
+ fn eq(&self, other: &&'a str) -> bool {
+ self == *other
+ }
+}
+
+impl PartialEq<SmolStr> for &str {
+ #[inline(always)]
+ fn eq(&self, other: &SmolStr) -> bool {
+ *self == other
+ }
+}
+
+impl PartialEq<String> for SmolStr {
+ #[inline(always)]
+ fn eq(&self, other: &String) -> bool {
+ self.as_str() == other
+ }
+}
+
+impl PartialEq<SmolStr> for String {
+ #[inline(always)]
+ fn eq(&self, other: &SmolStr) -> bool {
+ other == self
+ }
+}
+
+impl<'a> PartialEq<&'a String> for SmolStr {
+ #[inline(always)]
+ fn eq(&self, other: &&'a String) -> bool {
+ self == *other
+ }
+}
+
+impl PartialEq<SmolStr> for &String {
+ #[inline(always)]
+ fn eq(&self, other: &SmolStr) -> bool {
+ *self == other
+ }
+}
+// endregion: PartialEq implementations
+
+impl Ord for SmolStr {
+ fn cmp(&self, other: &SmolStr) -> Ordering {
+ self.as_str().cmp(other.as_str())
+ }
+}
+
+impl PartialOrd for SmolStr {
+ fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl hash::Hash for SmolStr {
+ fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
+ self.as_str().hash(hasher);
+ }
+}
+
+impl fmt::Debug for SmolStr {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self.as_str(), f)
+ }
+}
+
+impl fmt::Display for SmolStr {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self.as_str(), f)
+ }
+}
+
+impl iter::FromIterator<char> for SmolStr {
+ fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
+ from_char_iter(iter.into_iter())
+ }
+}
+
+#[inline]
+fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr {
+ from_buf_and_chars([0; _], 0, iter)
+}
+
+fn from_buf_and_chars(
+ mut buf: [u8; INLINE_CAP],
+ buf_len: usize,
+ mut iter: impl Iterator<Item = char>,
+) -> SmolStr {
+ let min_size = iter.size_hint().0 + buf_len;
+ if min_size > INLINE_CAP {
+ let heap: String = core::str::from_utf8(&buf[..buf_len])
+ .unwrap()
+ .chars()
+ .chain(iter)
+ .collect();
+ if heap.len() <= INLINE_CAP {
+ // size hint lied
+ return SmolStr::new_inline(&heap);
+ }
+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
+ }
+ let mut len = buf_len;
+ while let Some(ch) = iter.next() {
+ let size = ch.len_utf8();
+ if size + len > INLINE_CAP {
+ let (min_remaining, _) = iter.size_hint();
+ let mut heap = String::with_capacity(size + len + min_remaining);
+ heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
+ heap.push(ch);
+ heap.extend(iter);
+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
+ }
+ ch.encode_utf8(&mut buf[len..]);
+ len += size;
+ }
+ SmolStr(Repr::Inline {
+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
+ // as we otherwise return early.
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ })
+}
+
+fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
+where
+ T: AsRef<str>,
+ String: iter::Extend<T>,
+{
+ let mut len = 0;
+ let mut buf = [0u8; INLINE_CAP];
+ while let Some(slice) = iter.next() {
+ let slice = slice.as_ref();
+ let size = slice.len();
+ if size + len > INLINE_CAP {
+ let mut heap = String::with_capacity(size + len);
+ heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
+ heap.push_str(slice);
+ heap.extend(iter);
+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
+ }
+ buf[len..][..size].copy_from_slice(slice.as_bytes());
+ len += size;
+ }
+ SmolStr(Repr::Inline {
+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
+ // as we otherwise return early.
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ })
+}
+
+impl iter::FromIterator<String> for SmolStr {
+ fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
+ build_from_str_iter(iter.into_iter())
+ }
+}
+
+impl<'a> iter::FromIterator<&'a String> for SmolStr {
+ fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
+ SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
+ }
+}
+
+impl<'a> iter::FromIterator<&'a str> for SmolStr {
+ fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
+ build_from_str_iter(iter.into_iter())
+ }
+}
+
+impl AsRef<str> for SmolStr {
+ #[inline(always)]
+ fn as_ref(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl AsRef<[u8]> for SmolStr {
+ #[inline(always)]
+ fn as_ref(&self) -> &[u8] {
+ self.as_str().as_bytes()
+ }
+}
+
+#[cfg(feature = "std")]
+impl AsRef<std::ffi::OsStr> for SmolStr {
+ #[inline(always)]
+ fn as_ref(&self) -> &std::ffi::OsStr {
+ AsRef::<std::ffi::OsStr>::as_ref(self.as_str())
+ }
+}
+
+#[cfg(feature = "std")]
+impl AsRef<std::path::Path> for SmolStr {
+ #[inline(always)]
+ fn as_ref(&self) -> &std::path::Path {
+ AsRef::<std::path::Path>::as_ref(self.as_str())
+ }
+}
+
+impl From<&str> for SmolStr {
+ #[inline]
+ fn from(s: &str) -> SmolStr {
+ SmolStr::new(s)
+ }
+}
+
+impl From<&mut str> for SmolStr {
+ #[inline]
+ fn from(s: &mut str) -> SmolStr {
+ SmolStr::new(s)
+ }
+}
+
+impl From<&String> for SmolStr {
+ #[inline]
+ fn from(s: &String) -> SmolStr {
+ SmolStr::new(s)
+ }
+}
+
+impl From<String> for SmolStr {
+ #[inline(always)]
+ fn from(text: String) -> Self {
+ Self::new(text)
+ }
+}
+
+impl From<Box<str>> for SmolStr {
+ #[inline]
+ fn from(s: Box<str>) -> SmolStr {
+ SmolStr::new(s)
+ }
+}
+
+impl From<Arc<str>> for SmolStr {
+ #[inline]
+ fn from(s: Arc<str>) -> SmolStr {
+ let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s));
+ Self(repr)
+ }
+}
+
+impl<'a> From<Cow<'a, str>> for SmolStr {
+ #[inline]
+ fn from(s: Cow<'a, str>) -> SmolStr {
+ SmolStr::new(s)
+ }
+}
+
+impl From<SmolStr> for Arc<str> {
+ #[inline(always)]
+ fn from(text: SmolStr) -> Self {
+ match text.0 {
+ Repr::Heap(data) => data,
+ _ => text.as_str().into(),
+ }
+ }
+}
+
+impl From<SmolStr> for String {
+ #[inline(always)]
+ fn from(text: SmolStr) -> Self {
+ text.as_str().into()
+ }
+}
+
+impl Borrow<str> for SmolStr {
+ #[inline(always)]
+ fn borrow(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl FromStr for SmolStr {
+ type Err = Infallible;
+
+ #[inline]
+ fn from_str(s: &str) -> Result<SmolStr, Self::Err> {
+ Ok(SmolStr::from(s))
+ }
+}
+
+const INLINE_CAP: usize = InlineSize::_V23 as usize;
+const N_NEWLINES: usize = 32;
+const N_SPACES: usize = 128;
+const WS: &str =
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
+const _: () = {
+ assert!(WS.len() == N_NEWLINES + N_SPACES);
+ assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');
+ assert!(WS.as_bytes()[N_NEWLINES] == b' ');
+};
+
+/// A [`u8`] with a bunch of niches.
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[repr(u8)]
+enum InlineSize {
+ _V0 = 0,
+ _V1,
+ _V2,
+ _V3,
+ _V4,
+ _V5,
+ _V6,
+ _V7,
+ _V8,
+ _V9,
+ _V10,
+ _V11,
+ _V12,
+ _V13,
+ _V14,
+ _V15,
+ _V16,
+ _V17,
+ _V18,
+ _V19,
+ _V20,
+ _V21,
+ _V22,
+ _V23,
+}
+
+impl InlineSize {
+ /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`]
+ #[inline(always)]
+ const unsafe fn transmute_from_u8(value: u8) -> Self {
+ debug_assert!(value <= InlineSize::_V23 as u8);
+ // SAFETY: The caller is responsible to uphold this invariant
+ unsafe { mem::transmute::<u8, Self>(value) }
+ }
+}
+
+#[derive(Clone, Debug)]
+enum Repr {
+ Inline {
+ len: InlineSize,
+ buf: [u8; INLINE_CAP],
+ },
+ Static(&'static str),
+ Heap(Arc<str>),
+}
+
+impl Repr {
+ /// This function tries to create a new Repr::Inline or Repr::Static
+ /// If it isn't possible, this function returns None
+ fn new_on_stack<T>(text: T) -> Option<Self>
+ where
+ T: AsRef<str>,
+ {
+ let text = text.as_ref();
+
+ let len = text.len();
+ if len <= INLINE_CAP {
+ let mut buf = [0; INLINE_CAP];
+ buf[..len].copy_from_slice(text.as_bytes());
+ return Some(Repr::Inline {
+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ });
+ }
+
+ if len <= N_NEWLINES + N_SPACES {
+ let bytes = text.as_bytes();
+ let possible_newline_count = cmp::min(len, N_NEWLINES);
+ let newlines = bytes[..possible_newline_count]
+ .iter()
+ .take_while(|&&b| b == b'\n')
+ .count();
+ let possible_space_count = len - newlines;
+ if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {
+ let spaces = possible_space_count;
+ let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];
+ return Some(Repr::Static(substring));
+ }
+ }
+ None
+ }
+
+ fn new(text: &str) -> Self {
+ Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text)))
+ }
+
+ #[inline(always)]
+ fn len(&self) -> usize {
+ match self {
+ Repr::Heap(data) => data.len(),
+ Repr::Static(data) => data.len(),
+ Repr::Inline { len, .. } => *len as usize,
+ }
+ }
+
+ #[inline(always)]
+ fn is_empty(&self) -> bool {
+ match self {
+ Repr::Heap(data) => data.is_empty(),
+ Repr::Static(data) => data.is_empty(),
+ &Repr::Inline { len, .. } => len as u8 == 0,
+ }
+ }
+
+ #[inline]
+ fn as_str(&self) -> &str {
+ match self {
+ Repr::Heap(data) => data,
+ Repr::Static(data) => data,
+ Repr::Inline { len, buf } => {
+ let len = *len as usize;
+ // SAFETY: len is guaranteed to be <= INLINE_CAP
+ let buf = unsafe { buf.get_unchecked(..len) };
+ // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes
+ unsafe { ::core::str::from_utf8_unchecked(buf) }
+ }
+ }
+ }
+
+ fn ptr_eq(&self, other: &Self) -> bool {
+ match (self, other) {
+ (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0),
+ (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0),
+ (
+ Self::Inline {
+ len: l_len,
+ buf: l_buf,
+ },
+ Self::Inline {
+ len: r_len,
+ buf: r_buf,
+ },
+ ) => l_len == r_len && l_buf == r_buf,
+ _ => false,
+ }
+ }
+}
+
+/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.
+///
+/// Almost identical to [`ToString`], but converts to `SmolStr` instead.
+pub trait ToSmolStr {
+ fn to_smolstr(&self) -> SmolStr;
+}
+
+/// [`str`] methods producing [`SmolStr`]s.
+pub trait StrExt: private::Sealed {
+ /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::to_lowercase`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn to_lowercase_smolstr(&self) -> SmolStr;
+
+ /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::to_uppercase`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn to_uppercase_smolstr(&self) -> SmolStr;
+
+ /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::to_ascii_lowercase`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn to_ascii_lowercase_smolstr(&self) -> SmolStr;
+
+ /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::to_ascii_uppercase`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn to_ascii_uppercase_smolstr(&self) -> SmolStr;
+
+ /// Replaces all matches of a &str with another &str returning a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::replace`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr;
+
+ /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`],
+ /// potentially without allocating.
+ ///
+ /// See [`str::replacen`].
+ #[must_use = "this returns a new SmolStr without modifying the original"]
+ fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr;
+}
+
+impl StrExt for str {
+ #[inline]
+ fn to_lowercase_smolstr(&self) -> SmolStr {
+ let len = self.len();
+ if len <= INLINE_CAP {
+ let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase);
+ from_buf_and_chars(
+ buf,
+ len - rest.len(),
+ rest.chars().flat_map(|c| c.to_lowercase()),
+ )
+ } else {
+ self.to_lowercase().into()
+ }
+ }
+
+ #[inline]
+ fn to_uppercase_smolstr(&self) -> SmolStr {
+ let len = self.len();
+ if len <= INLINE_CAP {
+ let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase);
+ from_buf_and_chars(
+ buf,
+ len - rest.len(),
+ rest.chars().flat_map(|c| c.to_uppercase()),
+ )
+ } else {
+ self.to_uppercase().into()
+ }
+ }
+
+ #[inline]
+ fn to_ascii_lowercase_smolstr(&self) -> SmolStr {
+ let len = self.len();
+ if len <= INLINE_CAP {
+ let mut buf = [0u8; INLINE_CAP];
+ buf[..len].copy_from_slice(self.as_bytes());
+ buf[..len].make_ascii_lowercase();
+ SmolStr(Repr::Inline {
+ // SAFETY: `len` is in bounds
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ })
+ } else {
+ self.to_ascii_lowercase().into()
+ }
+ }
+
+ #[inline]
+ fn to_ascii_uppercase_smolstr(&self) -> SmolStr {
+ let len = self.len();
+ if len <= INLINE_CAP {
+ let mut buf = [0u8; INLINE_CAP];
+ buf[..len].copy_from_slice(self.as_bytes());
+ buf[..len].make_ascii_uppercase();
+ SmolStr(Repr::Inline {
+ // SAFETY: `len` is in bounds
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ })
+ } else {
+ self.to_ascii_uppercase().into()
+ }
+ }
+
+ #[inline]
+ fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr {
+ self.replacen_smolstr(from, to, usize::MAX)
+ }
+
+ #[inline]
+ fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr {
+ // Fast path for replacing a single ASCII character with another inline.
+ if let [from_u8] = from.as_bytes() {
+ if let [to_u8] = to.as_bytes() {
+ return if self.len() <= count {
+ // SAFETY: `from_u8` & `to_u8` are ascii
+ unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) }
+ } else {
+ unsafe {
+ replacen_1_ascii(self, |b| {
+ if b == from_u8 && count != 0 {
+ count -= 1;
+ *to_u8
+ } else {
+ *b
+ }
+ })
+ }
+ };
+ }
+ }
+
+ let mut result = SmolStrBuilder::new();
+ let mut last_end = 0;
+ for (start, part) in self.match_indices(from).take(count) {
+ // SAFETY: `start` is guaranteed to be within the bounds of `self` as per
+ // `match_indices` and last_end is always less than or equal to `start`
+ result.push_str(unsafe { self.get_unchecked(last_end..start) });
+ result.push_str(to);
+ last_end = start + part.len();
+ }
+ // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is
+ // always less than or equal to `self.len()`
+ result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
+ SmolStr::from(result)
+ }
+}
+
+/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes.
+#[inline]
+unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr {
+ if src.len() <= INLINE_CAP {
+ let mut buf = [0u8; INLINE_CAP];
+ for (idx, b) in src.as_bytes().iter().enumerate() {
+ buf[idx] = map(b);
+ }
+ SmolStr(Repr::Inline {
+ // SAFETY: `len` is in bounds
+ len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },
+ buf,
+ })
+ } else {
+ let out = src.as_bytes().iter().map(map).collect();
+ // SAFETY: We replaced ascii with ascii on valid utf8 strings.
+ unsafe { String::from_utf8_unchecked(out).into() }
+ }
+}
+
+/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.
+#[inline]
+fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {
+ // Process the input in chunks of 16 bytes to enable auto-vectorization.
+ // Previously the chunk size depended on the size of `usize`,
+ // but on 32-bit platforms with sse or neon is also the better choice.
+ // The only downside on other platforms would be a bit more loop-unrolling.
+ const N: usize = 16;
+
+ debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings");
+
+ let mut slice = s.as_bytes();
+ let mut out = [0u8; INLINE_CAP];
+ let mut out_slice = &mut out[..slice.len()];
+ let mut is_ascii = [false; N];
+
+ while slice.len() >= N {
+ // SAFETY: checked in loop condition
+ let chunk = unsafe { slice.get_unchecked(..N) };
+ // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets
+ let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
+
+ for j in 0..N {
+ is_ascii[j] = chunk[j] <= 127;
+ }
+
+ // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
+ // size gives the best result, specifically a pmovmsk instruction on x86.
+ // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not
+ // currently recognize other similar idioms.
+ if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
+ break;
+ }
+
+ for j in 0..N {
+ out_chunk[j] = convert(&chunk[j]);
+ }
+
+ slice = unsafe { slice.get_unchecked(N..) };
+ out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
+ }
+
+ // handle the remainder as individual bytes
+ while !slice.is_empty() {
+ let byte = slice[0];
+ if byte > 127 {
+ break;
+ }
+ // SAFETY: out_slice has at least same length as input slice
+ unsafe {
+ *out_slice.get_unchecked_mut(0) = convert(&byte);
+ }
+ slice = unsafe { slice.get_unchecked(1..) };
+ out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
+ }
+
+ unsafe {
+ // SAFETY: we know this is a valid char boundary
+ // since we only skipped over leading ascii bytes
+ let rest = core::str::from_utf8_unchecked(slice);
+ (out, rest)
+ }
+}
+
+impl<T> ToSmolStr for T
+where
+ T: fmt::Display + ?Sized,
+{
+ fn to_smolstr(&self) -> SmolStr {
+ format_smolstr!("{}", self)
+ }
+}
+
+mod private {
+ /// No downstream impls allowed.
+ pub trait Sealed {}
+ impl Sealed for str {}
+}
+
+/// Formats arguments to a [`SmolStr`], potentially without allocating.
+///
+/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.
+#[macro_export]
+macro_rules! format_smolstr {
+ ($($tt:tt)*) => {{
+ let mut w = $crate::SmolStrBuilder::new();
+ ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error");
+ w.finish()
+ }};
+}
+
+/// A builder that can be used to efficiently build a [`SmolStr`].
+///
+/// This won't allocate if the final string fits into the inline buffer.
+#[derive(Clone, Default, Debug, PartialEq, Eq)]
+pub struct SmolStrBuilder(SmolStrBuilderRepr);
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+enum SmolStrBuilderRepr {
+ Inline { len: usize, buf: [u8; INLINE_CAP] },
+ Heap(String),
+}
+
+impl Default for SmolStrBuilderRepr {
+ #[inline]
+ fn default() -> Self {
+ SmolStrBuilderRepr::Inline {
+ buf: [0; INLINE_CAP],
+ len: 0,
+ }
+ }
+}
+
+impl SmolStrBuilder {
+ /// Creates a new empty [`SmolStrBuilder`].
+ #[must_use]
+ pub const fn new() -> Self {
+ Self(SmolStrBuilderRepr::Inline {
+ buf: [0; INLINE_CAP],
+ len: 0,
+ })
+ }
+
+ /// Builds a [`SmolStr`] from `self`.
+ #[must_use]
+ pub fn finish(&self) -> SmolStr {
+ SmolStr(match &self.0 {
+ &SmolStrBuilderRepr::Inline { len, buf } => {
+ debug_assert!(len <= INLINE_CAP);
+ Repr::Inline {
+ // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize`
+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },
+ buf,
+ }
+ }
+ SmolStrBuilderRepr::Heap(heap) => Repr::new(heap),
+ })
+ }
+
+ /// Appends the given [`char`] to the end of `self`'s buffer.
+ pub fn push(&mut self, c: char) {
+ match &mut self.0 {
+ SmolStrBuilderRepr::Inline { len, buf } => {
+ let char_len = c.len_utf8();
+ let new_len = *len + char_len;
+ if new_len <= INLINE_CAP {
+ c.encode_utf8(&mut buf[*len..]);
+ *len += char_len;
+ } else {
+ let mut heap = String::with_capacity(new_len);
+ // copy existing inline bytes over to the heap
+ // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
+ unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) };
+ heap.push(c);
+ self.0 = SmolStrBuilderRepr::Heap(heap);
+ }
+ }
+ SmolStrBuilderRepr::Heap(h) => h.push(c),
+ }
+ }
+
+ /// Appends a given string slice onto the end of `self`'s buffer.
+ pub fn push_str(&mut self, s: &str) {
+ match &mut self.0 {
+ SmolStrBuilderRepr::Inline { len, buf } => {
+ let old_len = *len;
+ *len += s.len();
+
+ // if the new length will fit on the stack (even if it fills it entirely)
+ if *len <= INLINE_CAP {
+ buf[old_len..*len].copy_from_slice(s.as_bytes());
+ return; // skip the heap push below
+ }
+
+ let mut heap = String::with_capacity(*len);
+
+ // copy existing inline bytes over to the heap
+ // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
+ unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) };
+ heap.push_str(s);
+ self.0 = SmolStrBuilderRepr::Heap(heap);
+ }
+ SmolStrBuilderRepr::Heap(heap) => heap.push_str(s),
+ }
+ }
+}
+
+impl fmt::Write for SmolStrBuilder {
+ #[inline]
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ self.push_str(s);
+ Ok(())
+ }
+}
+
+impl From<SmolStrBuilder> for SmolStr {
+ fn from(value: SmolStrBuilder) -> Self {
+ value.finish()
+ }
+}
+
+#[cfg(feature = "arbitrary")]
+impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
+ fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
+ let s = <&str>::arbitrary(u)?;
+ Ok(SmolStr::new(s))
+ }
+}
+
+#[cfg(feature = "borsh")]
+mod borsh;
+#[cfg(feature = "serde")]
+mod serde;
+
+#[test]
+fn from_buf_and_chars_size_hinted_heap() {
+ let str = from_buf_and_chars(
+ *b"abcdefghijklmnopqr00000",
+ 18,
+ "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(),
+ );
+
+ assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13");
+}
diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs
new file mode 100644
index 0000000000..4f08b444c5
--- /dev/null
+++ b/lib/smol_str/src/serde.rs
@@ -0,0 +1,97 @@
+use alloc::{string::String, vec::Vec};
+use core::fmt;
+
+use serde::de::{Deserializer, Error, Unexpected, Visitor};
+use serde_core as serde;
+
+use crate::SmolStr;
+
+// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125
+fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>
+where
+ D: Deserializer<'de>,
+{
+ struct SmolStrVisitor;
+
+ impl<'a> Visitor<'a> for SmolStrVisitor {
+ type Value = SmolStr;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a string")
+ }
+
+ fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ Ok(SmolStr::from(v))
+ }
+
+ fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ Ok(SmolStr::from(v))
+ }
+
+ fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ Ok(SmolStr::from(v))
+ }
+
+ fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ match core::str::from_utf8(v) {
+ Ok(s) => Ok(SmolStr::from(s)),
+ Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
+ }
+ }
+
+ fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ match core::str::from_utf8(v) {
+ Ok(s) => Ok(SmolStr::from(s)),
+ Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
+ }
+ }
+
+ fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ match String::from_utf8(v) {
+ Ok(s) => Ok(SmolStr::from(s)),
+ Err(e) => Err(Error::invalid_value(
+ Unexpected::Bytes(&e.into_bytes()),
+ &self,
+ )),
+ }
+ }
+ }
+
+ deserializer.deserialize_str(SmolStrVisitor)
+}
+
+impl serde::Serialize for SmolStr {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ self.as_str().serialize(serializer)
+ }
+}
+
+impl<'de> serde::Deserialize<'de> for SmolStr {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ smol_str(deserializer)
+ }
+}
diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs
new file mode 100644
index 0000000000..8f7d9ec39a
--- /dev/null
+++ b/lib/smol_str/tests/test.rs
@@ -0,0 +1,451 @@
+use std::sync::Arc;
+
+#[cfg(not(miri))]
+use proptest::{prop_assert, prop_assert_eq, proptest};
+
+use smol_str::{SmolStr, SmolStrBuilder};
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn smol_str_is_smol() {
+ assert_eq!(
+ ::std::mem::size_of::<SmolStr>(),
+ ::std::mem::size_of::<String>(),
+ );
+}
+
+#[test]
+fn assert_traits() {
+ fn f<T: Send + Sync + ::std::fmt::Debug + Clone>() {}
+ f::<SmolStr>();
+}
+
+#[test]
+fn conversions() {
+ let s: SmolStr = "Hello, World!".into();
+ let s: String = s.into();
+ assert_eq!(s, "Hello, World!");
+
+ let s: SmolStr = Arc::<str>::from("Hello, World!").into();
+ let s: Arc<str> = s.into();
+ assert_eq!(s.as_ref(), "Hello, World!");
+}
+
+#[test]
+fn const_fn_ctor() {
+ const EMPTY: SmolStr = SmolStr::new_inline("");
+ const A: SmolStr = SmolStr::new_inline("A");
+ const HELLO: SmolStr = SmolStr::new_inline("HELLO");
+ const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW");
+
+ assert_eq!(EMPTY, SmolStr::from(""));
+ assert_eq!(A, SmolStr::from("A"));
+ assert_eq!(HELLO, SmolStr::from("HELLO"));
+ assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW"));
+}
+
+#[cfg(not(miri))]
+fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> {
+ prop_assert_eq!(smol.as_str(), std_str);
+ prop_assert_eq!(smol.len(), std_str.len());
+ prop_assert_eq!(smol.is_empty(), std_str.is_empty());
+ if smol.len() <= 23 {
+ prop_assert!(!smol.is_heap_allocated());
+ }
+ Ok(())
+}
+
+#[cfg(not(miri))]
+proptest! {
+ #[test]
+ fn roundtrip(s: String) {
+ check_props(s.as_str(), SmolStr::new(s.clone()))?;
+ }
+
+ #[test]
+ fn roundtrip_spaces(s in r"( )*") {
+ check_props(s.as_str(), SmolStr::new(s.clone()))?;
+ }
+
+ #[test]
+ fn roundtrip_newlines(s in r"\n*") {
+ check_props(s.as_str(), SmolStr::new(s.clone()))?;
+ }
+
+ #[test]
+ fn roundtrip_ws(s in r"( |\n)*") {
+ check_props(s.as_str(), SmolStr::new(s.clone()))?;
+ }
+
+ #[test]
+ fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) {
+ let string: String = slices.iter().map(|x| x.as_str()).collect();
+ let smol: SmolStr = slices.into_iter().collect();
+ check_props(string.as_str(), smol)?;
+ }
+
+ #[test]
+ fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) {
+ let string: String = slices.iter().map(|x| x.as_str()).collect();
+ let smol: SmolStr = slices.iter().collect();
+ check_props(string.as_str(), smol)?;
+ }
+}
+
+#[cfg(feature = "serde")]
+mod serde_tests {
+ use super::*;
+ use serde::{Deserialize, Serialize};
+ use std::collections::HashMap;
+
+ #[derive(Serialize, Deserialize)]
+ struct SmolStrStruct {
+ pub(crate) s: SmolStr,
+ pub(crate) vec: Vec<SmolStr>,
+ pub(crate) map: HashMap<SmolStr, SmolStr>,
+ }
+
+ #[test]
+ fn test_serde() {
+ let s = SmolStr::new("Hello, World");
+ let s = serde_json::to_string(&s).unwrap();
+ assert_eq!(s, "\"Hello, World\"");
+ let s: SmolStr = serde_json::from_str(&s).unwrap();
+ assert_eq!(s, "Hello, World");
+ }
+
+ #[test]
+ fn test_serde_reader() {
+ let s = SmolStr::new("Hello, World");
+ let s = serde_json::to_string(&s).unwrap();
+ assert_eq!(s, "\"Hello, World\"");
+ let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
+ assert_eq!(s, "Hello, World");
+ }
+
+ #[test]
+ fn test_serde_struct() {
+ let mut map = HashMap::new();
+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
+ let struct_ = SmolStrStruct {
+ s: SmolStr::new("Hello, World"),
+ vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],
+ map,
+ };
+ let s = serde_json::to_string(&struct_).unwrap();
+ let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap();
+ }
+
+ #[test]
+ fn test_serde_struct_reader() {
+ let mut map = HashMap::new();
+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
+ let struct_ = SmolStrStruct {
+ s: SmolStr::new("Hello, World"),
+ vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],
+ map,
+ };
+ let s = serde_json::to_string(&struct_).unwrap();
+ let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
+ }
+
+ #[test]
+ fn test_serde_hashmap() {
+ let mut map = HashMap::new();
+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
+ let s = serde_json::to_string(&map).unwrap();
+ let _s: HashMap<SmolStr, SmolStr> = serde_json::from_str(&s).unwrap();
+ }
+
+ #[test]
+ fn test_serde_hashmap_reader() {
+ let mut map = HashMap::new();
+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
+ let s = serde_json::to_string(&map).unwrap();
+ let _s: HashMap<SmolStr, SmolStr> =
+ serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
+ }
+
+ #[test]
+ fn test_serde_vec() {
+ let vec = vec![SmolStr::new(""), SmolStr::new("b")];
+ let s = serde_json::to_string(&vec).unwrap();
+ let _s: Vec<SmolStr> = serde_json::from_str(&s).unwrap();
+ }
+
+ #[test]
+ fn test_serde_vec_reader() {
+ let vec = vec![SmolStr::new(""), SmolStr::new("b")];
+ let s = serde_json::to_string(&vec).unwrap();
+ let _s: Vec<SmolStr> = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
+ }
+}
+
+#[test]
+fn test_search_in_hashmap() {
+ let mut m = ::std::collections::HashMap::<SmolStr, i32>::new();
+ m.insert("aaa".into(), 17);
+ assert_eq!(17, *m.get("aaa").unwrap());
+}
+
+#[test]
+fn test_from_char_iterator() {
+ let examples = [
+ // Simple keyword-like strings
+ ("if", false),
+ ("for", false),
+ ("impl", false),
+ // Strings containing two-byte characters
+ ("パーティーへ行かないか", true),
+ ("パーティーへ行か", true),
+ ("パーティーへ行_", false),
+ ("和製漢語", false),
+ ("部落格", false),
+ ("사회과학원 어학연구소", true),
+ // String containing diverse characters
+ ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true),
+ ];
+ for (raw, is_heap) in &examples {
+ let s: SmolStr = raw.chars().collect();
+ assert_eq!(s.as_str(), *raw);
+ assert_eq!(s.is_heap_allocated(), *is_heap);
+ }
+ // String which has too many characters to even consider inlining: Chars::size_hint uses
+ // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately
+ // heap allocate
+ let raw = "a".repeat(23 * 4 + 1);
+ let s: SmolStr = raw.chars().collect();
+ assert_eq!(s.as_str(), raw);
+ assert!(s.is_heap_allocated());
+}
+
+#[test]
+fn test_bad_size_hint_char_iter() {
+ struct BadSizeHint<I>(I);
+
+ impl<T, I: Iterator<Item = T>> Iterator for BadSizeHint<I> {
+ type Item = T;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.0.next()
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (1024, None)
+ }
+ }
+
+ let data = "testing";
+ let collected: SmolStr = BadSizeHint(data.chars()).collect();
+ let new = SmolStr::new(data);
+
+ assert!(!collected.is_heap_allocated());
+ assert!(!new.is_heap_allocated());
+ assert_eq!(new, collected);
+}
+
+#[test]
+fn test_to_smolstr() {
+ use smol_str::ToSmolStr;
+
+ for i in 0..26 {
+ let a = &"abcdefghijklmnopqrstuvwxyz"[i..];
+
+ assert_eq!(a, a.to_smolstr());
+ assert_eq!(a, smol_str::format_smolstr!("{}", a));
+ }
+}
+
+#[test]
+fn test_builder_push_str() {
+ //empty
+ let builder = SmolStrBuilder::new();
+ assert_eq!("", builder.finish());
+
+ // inline push
+ let mut builder = SmolStrBuilder::new();
+ builder.push_str("a");
+ builder.push_str("b");
+ let s = builder.finish();
+ assert!(!s.is_heap_allocated());
+ assert_eq!("ab", s);
+
+ // inline max push
+ let mut builder = SmolStrBuilder::new();
+ builder.push_str(&"a".repeat(23));
+ let s = builder.finish();
+ assert!(!s.is_heap_allocated());
+ assert_eq!("a".repeat(23), s);
+
+ // heap push immediate
+ let mut builder = SmolStrBuilder::new();
+ builder.push_str(&"a".repeat(24));
+ let s = builder.finish();
+ assert!(s.is_heap_allocated());
+ assert_eq!("a".repeat(24), s);
+
+ // heap push succession
+ let mut builder = SmolStrBuilder::new();
+ builder.push_str(&"a".repeat(23));
+ builder.push_str(&"a".repeat(23));
+ let s = builder.finish();
+ assert!(s.is_heap_allocated());
+ assert_eq!("a".repeat(46), s);
+
+ // heap push on multibyte char
+ let mut builder = SmolStrBuilder::new();
+ builder.push_str("ohnonononononononono!");
+ builder.push('🤯');
+ let s = builder.finish();
+ assert!(s.is_heap_allocated());
+ assert_eq!("ohnonononononononono!🤯", s);
+}
+
+#[test]
+fn test_builder_push() {
+ //empty
+ let builder = SmolStrBuilder::new();
+ assert_eq!("", builder.finish());
+
+ // inline push
+ let mut builder = SmolStrBuilder::new();
+ builder.push('a');
+ builder.push('b');
+ let s = builder.finish();
+ assert!(!s.is_heap_allocated());
+ assert_eq!("ab", s);
+
+ // inline max push
+ let mut builder = SmolStrBuilder::new();
+ for _ in 0..23 {
+ builder.push('a');
+ }
+ let s = builder.finish();
+ assert!(!s.is_heap_allocated());
+ assert_eq!("a".repeat(23), s);
+
+ // heap push
+ let mut builder = SmolStrBuilder::new();
+ for _ in 0..24 {
+ builder.push('a');
+ }
+ let s = builder.finish();
+ assert!(s.is_heap_allocated());
+ assert_eq!("a".repeat(24), s);
+}
+
+#[cfg(test)]
+mod test_str_ext {
+ use smol_str::StrExt;
+
+ #[test]
+ fn large() {
+ let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr();
+ assert_eq!(
+ lowercase,
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ );
+ assert!(lowercase.is_heap_allocated());
+ }
+
+ #[test]
+ fn to_lowercase() {
+ let lowercase = "aßΔC".to_lowercase_smolstr();
+ assert_eq!(lowercase, "aßδc");
+ assert!(!lowercase.is_heap_allocated());
+ }
+
+ #[test]
+ fn to_uppercase() {
+ let uppercase = "aßΔC".to_uppercase_smolstr();
+ assert_eq!(uppercase, "ASSΔC");
+ assert!(!uppercase.is_heap_allocated());
+ }
+
+ #[test]
+ fn to_ascii_lowercase() {
+ let uppercase = "aßΔC".to_ascii_lowercase_smolstr();
+ assert_eq!(uppercase, "aßΔc");
+ assert!(!uppercase.is_heap_allocated());
+ }
+
+ #[test]
+ fn to_ascii_uppercase() {
+ let uppercase = "aßΔC".to_ascii_uppercase_smolstr();
+ assert_eq!(uppercase, "AßΔC");
+ assert!(!uppercase.is_heap_allocated());
+ }
+
+ #[test]
+ fn replace() {
+ let result = "foo_bar_baz".replace_smolstr("ba", "do");
+ assert_eq!(result, "foo_dor_doz");
+ assert!(!result.is_heap_allocated());
+ }
+
+ #[test]
+ fn replacen() {
+ let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1);
+ assert_eq!(result, "foo_dor_baz");
+ assert!(!result.is_heap_allocated());
+ }
+
+ #[test]
+ fn replacen_1_ascii() {
+ let result = "foo_bar_baz".replacen_smolstr("o", "u", 1);
+ assert_eq!(result, "fuo_bar_baz");
+ assert!(!result.is_heap_allocated());
+ }
+}
+
+#[cfg(feature = "borsh")]
+mod borsh_tests {
+ use borsh::BorshDeserialize;
+ use smol_str::{SmolStr, ToSmolStr};
+ use std::io::Cursor;
+
+ #[test]
+ fn borsh_serialize_stack() {
+ let smolstr_on_stack = "aßΔCaßδc".to_smolstr();
+ let mut buffer = Vec::new();
+ borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap();
+ let mut cursor = Cursor::new(buffer);
+ let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();
+ assert_eq!(smolstr_on_stack, decoded);
+ }
+ #[test]
+ fn borsh_serialize_heap() {
+ let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr();
+ let mut buffer = Vec::new();
+ borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap();
+ let mut cursor = Cursor::new(buffer);
+ let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();
+ assert_eq!(smolstr_on_heap, decoded);
+ }
+ #[test]
+ fn borsh_non_utf8_stack() {
+ let invalid_utf8: Vec<u8> = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence
+
+ let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });
+ let mut buffer = Vec::new();
+ borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();
+ let mut cursor = Cursor::new(buffer);
+ let result = SmolStr::deserialize_reader(&mut cursor);
+ assert!(result.is_err());
+ }
+
+ #[test]
+ fn borsh_non_utf8_heap() {
+ let invalid_utf8: Vec<u8> = vec![
+ 0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32,
+ 0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F,
+ 0x0E, 0x80,
+ ];
+ let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });
+ let mut buffer = Vec::new();
+ borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();
+ let mut cursor = Cursor::new(buffer);
+ let result = SmolStr::deserialize_reader(&mut cursor);
+ assert!(result.is_err());
+ }
+}
diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs
new file mode 100644
index 0000000000..e2d809e40f
--- /dev/null
+++ b/lib/smol_str/tests/tidy.rs
@@ -0,0 +1,47 @@
+#![cfg(not(miri))]
+use std::{
+ env,
+ path::{Path, PathBuf},
+ process::{Command, Stdio},
+};
+
+fn project_root() -> PathBuf {
+ PathBuf::from(
+ env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()),
+ )
+}
+
+fn run(cmd: &str, dir: impl AsRef<Path>) -> Result<(), ()> {
+ let mut args: Vec<_> = cmd.split_whitespace().collect();
+ let bin = args.remove(0);
+ println!("> {}", cmd);
+ let output = Command::new(bin)
+ .args(args)
+ .current_dir(dir)
+ .stdin(Stdio::null())
+ .stdout(Stdio::piped())
+ .stderr(Stdio::inherit())
+ .output()
+ .map_err(drop)?;
+ if output.status.success() {
+ Ok(())
+ } else {
+ let stdout = String::from_utf8(output.stdout).map_err(drop)?;
+ print!("{}", stdout);
+ Err(())
+ }
+}
+
+#[test]
+fn check_code_formatting() {
+ let dir = project_root();
+ if run("rustfmt +stable --version", &dir).is_err() {
+ panic!(
+ "failed to run rustfmt from toolchain 'stable'; \
+ please run `rustup component add rustfmt --toolchain stable` to install it.",
+ );
+ }
+ if run("cargo +stable fmt -- --check", &dir).is_err() {
+ panic!("code is not properly formatted; please format the code by running `cargo fmt`")
+ }
+}