rust-analyzer

diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs
new file mode 100644
index 0000000000..c594e8973c
--- /dev/null
+++ b/lib/smol_str/.github/ci.rs

@@ -0,0 +1,127 @@

+use std::{

+ env, fs,

+ process::{self, Command, ExitStatus, Stdio},

+ time::Instant,

+};

+type Error = Box<dyn std::error::Error>;

+type Result<T> = std::result::Result<T, Error>;

+fn main() {

+ if let Err(err) = try_main() {

+ eprintln!("{}", err);

+ process::exit(1);

+ }

+fn try_main() -> Result<()> {

+ let cwd = env::current_dir()?;

+ let cargo_toml = cwd.join("Cargo.toml");

+ assert!(

+ cargo_toml.exists(),

+ "Cargo.toml not found, cwd: {}",

+ cwd.display()

+ );

+ {

+ let _s = Section::new("BUILD_NO_DEFAULT_FEATURES");

+ shell("cargo test --all-features --workspace --no-run --no-default-features")?;

+ }

+ {

+ let _s = Section::new("BUILD");

+ shell("cargo test --all-features --workspace --no-run")?;

+ }

+ {

+ let _s = Section::new("TEST");

+ shell("cargo test --all-features --workspace")?;

+ shell("cargo test --no-default-features --workspace")?;

+ }

+ {

+ let _s = Section::new("TEST_BENCHES");

+ shell("cargo test --benches --all-features")?;

+ }

+ let current_branch = shell_output("git branch --show-current")?;

+ if &current_branch == "master" {

+ let _s = Section::new("PUBLISH");

+ let manifest = fs::read_to_string(&cargo_toml)?;

+ let version = get_field(&manifest, "version")?;

+ let tag = format!("v{}", version);

+ let tags = shell_output("git tag --list")?;

+ if !tags.contains(&tag) {

+ let token = env::var("CRATES_IO_TOKEN").unwrap();

+ shell(&format!("git tag v{}", version))?;

+ shell(&format!("cargo publish --token {}", token))?;

+ shell("git push --tags")?;

+ }

+ Ok(())

+fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> {

+ for line in text.lines() {

+ let words = line.split_ascii_whitespace().collect::<Vec<_>>();

+ match words.as_slice() {

+ [n, "=", v, ..] if n.trim() == name => {

+ assert!(v.starts_with('"') && v.ends_with('"'));

+ return Ok(&v[1..v.len() - 1]);

+ }

+ _ => (),

+ }

+ Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))?

+fn shell(cmd: &str) -> Result<()> {

+ let status = command(cmd).status()?;

+ check_status(status)

+fn shell_output(cmd: &str) -> Result<String> {

+ let output = command(cmd).stderr(Stdio::inherit()).output()?;

+ check_status(output.status)?;

+ let res = String::from_utf8(output.stdout)?;

+ let res = res.trim().to_string();

+ println!("{}", res);

+ Ok(res)

+fn command(cmd: &str) -> Command {

+ eprintln!("> {}", cmd);

+ let words = cmd.split_ascii_whitespace().collect::<Vec<_>>();

+ let (cmd, args) = words.split_first().unwrap();

+ let mut res = Command::new(cmd);

+ res.args(args);

+ res

+fn check_status(status: ExitStatus) -> Result<()> {

+ if !status.success() {

+ Err(format!("$status: {}", status))?;

+ }

+ Ok(())

+struct Section {

+ name: &'static str,

+ start: Instant,

+impl Section {

+ fn new(name: &'static str) -> Section {

+ println!("::group::{}", name);

+ let start = Instant::now();

+ Section { name, start }

+ }

+impl Drop for Section {

+ fn drop(&mut self) {

+ eprintln!("{}: {:.2?}", self.name, self.start.elapsed());

+ println!("::endgroup::");

+ }

diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml
new file mode 100644
index 0000000000..1c2e347374
--- /dev/null
+++ b/lib/smol_str/.github/workflows/ci.yaml

@@ -0,0 +1,36 @@

+name: CI

+on:

+ pull_request:

+ push:

+ branches:

+ - master

+ - staging

+ - trying

+env:

+ CARGO_INCREMENTAL: 0

+ CARGO_NET_RETRY: 10

+ CI: 1

+ RUST_BACKTRACE: short

+ RUSTFLAGS: -D warnings

+ RUSTUP_MAX_RETRIES: 10

+jobs:

+ rust:

+ name: Rust

+ runs-on: ubuntu-latest

+ steps:

+ - name: Checkout repository

+ uses: actions/checkout@v2

+ with:

+ fetch-depth: 0

+ - name: Install Rust toolchain

+ uses: actions-rust-lang/setup-rust-toolchain@v1

+ with:

+ cache: false

+ - run: rustc ./.github/ci.rs && ./ci

+ env:

+ CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}

diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore
new file mode 100644
index 0000000000..0c8227b253
--- /dev/null
+++ b/lib/smol_str/.gitignore

@@ -0,0 +1,4 @@

+/target

+/ci

+/.vscode

+Cargo.lock

diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md
new file mode 100644
index 0000000000..fb65d88ad1
--- /dev/null
+++ b/lib/smol_str/CHANGELOG.md

@@ -0,0 +1,38 @@

+# Changelog

+## Unreleased

+## 0.3.4 - 2025-10-23

+- Added `rust-version` field to `Cargo.toml`

+## 0.3.3 - 2025-10-23

+- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr`

+ ~2x speedup inline, ~4-22x for heap.

+- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap.

+- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace,

+ ~3x speedup inline & heap.

+## 0.3.2 - 2024-10-23

+- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a

+ multibyte character push

+## 0.3.1 - 2024-09-04

+- Fix `SmolStrBuilder` leaking implementation details

+## 0.3.0 - 2024-09-04

+- Remove deprecated `SmolStr::new_inline_from_ascii` function

+- Remove `SmolStr::to_string` in favor of `ToString::to_string`

+- Add `impl AsRef<[u8]> for SmolStr` impl

+- Add `impl AsRef<OsStr> for SmolStr` impl

+- Add `impl AsRef<Path> for SmolStr` impl

+- Add `SmolStrBuilder`

+## 0.2.2 - 2024-05-14

+- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar

+- Add `PartialEq` optimization for `ptr_eq`-able representations

diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml
new file mode 100644
index 0000000000..4752a84ed4
--- /dev/null
+++ b/lib/smol_str/Cargo.toml

@@ -0,0 +1,37 @@

+[package]

+name = "smol_str"

+version = "0.3.4"

+description = "small-string optimized string type with O(1) clone"

+license = "MIT OR Apache-2.0"

+repository = "https://github.com/rust-analyzer/smol_str"

+authors = ["Aleksey Kladov <[email protected]>", "Lukas Wirth <[email protected]>"]

+edition = "2021"

+rust-version = "1.89"

+[package.metadata.docs.rs]

+rustdoc-args = ["--cfg", "docsrs"]

+all-features = true

+[dependencies]

+serde_core = { version = "1.0.220", optional = true, default-features = false }

+borsh = { version = "1.4.0", optional = true, default-features = false }

+arbitrary = { version = "1.3", optional = true }

+[dev-dependencies]

+proptest = "1.5"

+serde_json = "1.0"

+serde = { version = "1.0", features = ["derive"] }

+criterion = "0.7"

+rand = "0.9.2"

+[features]

+default = ["std"]

+std = ["serde_core?/std", "borsh?/std"]

+serde = ["dep:serde_core"]

+[[bench]]

+name = "bench"

+harness = false

+[profile.bench]

+lto = "fat"

diff --git a/lib/smol_str/LICENSE-APACHE b/lib/smol_str/LICENSE-APACHE
new file mode 100644
index 0000000000..16fe87b06e
--- /dev/null
+++ b/lib/smol_str/LICENSE-APACHE

@@ -0,0 +1,201 @@

+ Apache License

+ Version 2.0, January 2004

+ http://www.apache.org/licenses/

+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

+1. Definitions.

+ "License" shall mean the terms and conditions for use, reproduction,

+ and distribution as defined by Sections 1 through 9 of this document.

+ "Licensor" shall mean the copyright owner or entity authorized by

+ the copyright owner that is granting the License.

+ "Legal Entity" shall mean the union of the acting entity and all

+ other entities that control, are controlled by, or are under common

+ control with that entity. For the purposes of this definition,

+ "control" means (i) the power, direct or indirect, to cause the

+ direction or management of such entity, whether by contract or

+ otherwise, or (ii) ownership of fifty percent (50%) or more of the

+ outstanding shares, or (iii) beneficial ownership of such entity.

+ "You" (or "Your") shall mean an individual or Legal Entity

+ exercising permissions granted by this License.

+ "Source" form shall mean the preferred form for making modifications,

+ including but not limited to software source code, documentation

+ source, and configuration files.

+ "Object" form shall mean any form resulting from mechanical

+ transformation or translation of a Source form, including but

+ not limited to compiled object code, generated documentation,

+ and conversions to other media types.

+ "Work" shall mean the work of authorship, whether in Source or

+ Object form, made available under the License, as indicated by a

+ copyright notice that is included in or attached to the work

+ (an example is provided in the Appendix below).

+ "Derivative Works" shall mean any work, whether in Source or Object

+ form, that is based on (or derived from) the Work and for which the

+ editorial revisions, annotations, elaborations, or other modifications

+ represent, as a whole, an original work of authorship. For the purposes

+ of this License, Derivative Works shall not include works that remain

+ separable from, or merely link (or bind by name) to the interfaces of,

+ the Work and Derivative Works thereof.

+ "Contribution" shall mean any work of authorship, including

+ the original version of the Work and any modifications or additions

+ to that Work or Derivative Works thereof, that is intentionally

+ submitted to Licensor for inclusion in the Work by the copyright owner

+ or by an individual or Legal Entity authorized to submit on behalf of

+ the copyright owner. For the purposes of this definition, "submitted"

+ means any form of electronic, verbal, or written communication sent

+ to the Licensor or its representatives, including but not limited to

+ communication on electronic mailing lists, source code control systems,

+ and issue tracking systems that are managed by, or on behalf of, the

+ Licensor for the purpose of discussing and improving the Work, but

+ excluding communication that is conspicuously marked or otherwise

+ designated in writing by the copyright owner as "Not a Contribution."

+ "Contributor" shall mean Licensor and any individual or Legal Entity

+ on behalf of whom a Contribution has been received by Licensor and

+ subsequently incorporated within the Work.

+2. Grant of Copyright License. Subject to the terms and conditions of

+ this License, each Contributor hereby grants to You a perpetual,

+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable

+ copyright license to reproduce, prepare Derivative Works of,

+ publicly display, publicly perform, sublicense, and distribute the

+ Work and such Derivative Works in Source or Object form.

+3. Grant of Patent License. Subject to the terms and conditions of

+ this License, each Contributor hereby grants to You a perpetual,

+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable

+ (except as stated in this section) patent license to make, have made,

+ use, offer to sell, sell, import, and otherwise transfer the Work,

+ where such license applies only to those patent claims licensable

+ by such Contributor that are necessarily infringed by their

+ Contribution(s) alone or by combination of their Contribution(s)

+ with the Work to which such Contribution(s) was submitted. If You

+ institute patent litigation against any entity (including a

+ cross-claim or counterclaim in a lawsuit) alleging that the Work

+ or a Contribution incorporated within the Work constitutes direct

+ or contributory patent infringement, then any patent licenses

+ granted to You under this License for that Work shall terminate

+ as of the date such litigation is filed.

+4. Redistribution. You may reproduce and distribute copies of the

+ Work or Derivative Works thereof in any medium, with or without

+ modifications, and in Source or Object form, provided that You

+ meet the following conditions:

+ (a) You must give any other recipients of the Work or

+ Derivative Works a copy of this License; and

+ (b) You must cause any modified files to carry prominent notices

+ stating that You changed the files; and

+ (c) You must retain, in the Source form of any Derivative Works

+ that You distribute, all copyright, patent, trademark, and

+ attribution notices from the Source form of the Work,

+ excluding those notices that do not pertain to any part of

+ the Derivative Works; and

+ (d) If the Work includes a "NOTICE" text file as part of its

+ distribution, then any Derivative Works that You distribute must

+ include a readable copy of the attribution notices contained

+ within such NOTICE file, excluding those notices that do not

+ pertain to any part of the Derivative Works, in at least one

+ of the following places: within a NOTICE text file distributed

+ as part of the Derivative Works; within the Source form or

+ documentation, if provided along with the Derivative Works; or,

+ within a display generated by the Derivative Works, if and

+ wherever such third-party notices normally appear. The contents

+ of the NOTICE file are for informational purposes only and

+ do not modify the License. You may add Your own attribution

+ notices within Derivative Works that You distribute, alongside

+ or as an addendum to the NOTICE text from the Work, provided

+ that such additional attribution notices cannot be construed

+ as modifying the License.

+ You may add Your own copyright statement to Your modifications and

+ may provide additional or different license terms and conditions

+ for use, reproduction, or distribution of Your modifications, or

+ for any such Derivative Works as a whole, provided Your use,

+ reproduction, and distribution of the Work otherwise complies with

+ the conditions stated in this License.

+5. Submission of Contributions. Unless You explicitly state otherwise,

+ any Contribution intentionally submitted for inclusion in the Work

+ by You to the Licensor shall be under the terms and conditions of

+ this License, without any additional terms or conditions.

+ Notwithstanding the above, nothing herein shall supersede or modify

+ the terms of any separate license agreement you may have executed

+ with Licensor regarding such Contributions.

+6. Trademarks. This License does not grant permission to use the trade

+ names, trademarks, service marks, or product names of the Licensor,

+ except as required for reasonable and customary use in describing the

+ origin of the Work and reproducing the content of the NOTICE file.

+7. Disclaimer of Warranty. Unless required by applicable law or

+ agreed to in writing, Licensor provides the Work (and each

+ Contributor provides its Contributions) on an "AS IS" BASIS,

+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or

+ implied, including, without limitation, any warranties or conditions

+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A

+ PARTICULAR PURPOSE. You are solely responsible for determining the

+ appropriateness of using or redistributing the Work and assume any

+ risks associated with Your exercise of permissions under this License.

+8. Limitation of Liability. In no event and under no legal theory,

+ whether in tort (including negligence), contract, or otherwise,

+ unless required by applicable law (such as deliberate and grossly

+ negligent acts) or agreed to in writing, shall any Contributor be

+ liable to You for damages, including any direct, indirect, special,

+ incidental, or consequential damages of any character arising as a

+ result of this License or out of the use or inability to use the

+ Work (including but not limited to damages for loss of goodwill,

+ work stoppage, computer failure or malfunction, or any and all

+ other commercial damages or losses), even if such Contributor

+ has been advised of the possibility of such damages.

+9. Accepting Warranty or Additional Liability. While redistributing

+ the Work or Derivative Works thereof, You may choose to offer,

+ and charge a fee for, acceptance of support, warranty, indemnity,

+ or other liability obligations and/or rights consistent with this

+ License. However, in accepting such obligations, You may act only

+ on Your own behalf and on Your sole responsibility, not on behalf

+ of any other Contributor, and only if You agree to indemnify,

+ defend, and hold each Contributor harmless for any liability

+ incurred by, or claims asserted against, such Contributor by reason

+ of your accepting any such warranty or additional liability.

+END OF TERMS AND CONDITIONS

+APPENDIX: How to apply the Apache License to your work.

+ To apply the Apache License to your work, attach the following

+ boilerplate notice, with the fields enclosed by brackets "[]"

+ replaced with your own identifying information. (Don't include

+ the brackets!) The text should be enclosed in the appropriate

+ comment syntax for the file format. We also recommend that a

+ file or class name and description of purpose be included on the

+ same "printed page" as the copyright notice for easier

+ identification within third-party archives.

+Copyright [yyyy] [name of copyright owner]

+Licensed under the Apache License, Version 2.0 (the "License");

+you may not use this file except in compliance with the License.

+You may obtain a copy of the License at

+ http://www.apache.org/licenses/LICENSE-2.0

+Unless required by applicable law or agreed to in writing, software

+distributed under the License is distributed on an "AS IS" BASIS,

+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+See the License for the specific language governing permissions and

+limitations under the License.

diff --git a/lib/smol_str/LICENSE-MIT b/lib/smol_str/LICENSE-MIT
new file mode 100644
index 0000000000..31aa79387f
--- /dev/null
+++ b/lib/smol_str/LICENSE-MIT

@@ -0,0 +1,23 @@

+Permission is hereby granted, free of charge, to any

+person obtaining a copy of this software and associated

+documentation files (the "Software"), to deal in the

+Software without restriction, including without

+limitation the rights to use, copy, modify, merge,

+publish, distribute, sublicense, and/or sell copies of

+the Software, and to permit persons to whom the Software

+is furnished to do so, subject to the following

+conditions:

+The above copyright notice and this permission notice

+shall be included in all copies or substantial portions

+of the Software.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF

+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED

+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A

+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT

+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR

+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+DEALINGS IN THE SOFTWARE.

diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md
new file mode 100644
index 0000000000..56296fb53f
--- /dev/null
+++ b/lib/smol_str/README.md

@@ -0,0 +1,35 @@

+# smol_str

+[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI)

+[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str)

+[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/)

+A `SmolStr` is a string type that has the following properties:

+* `size_of::<SmolStr>() == 24` (therefore `== size_of::<String>()` on 64 bit platforms)

+* `Clone` is `O(1)`

+* Strings are stack-allocated if they are:

+ * Up to 23 bytes long

+ * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist

+ solely of consecutive newlines, followed by consecutive spaces

+* If a string does not satisfy the aforementioned conditions, it is heap-allocated

+* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation

+Unlike `String`, however, `SmolStr` is immutable. The primary use case for

+`SmolStr` is a good enough default storage for tokens of typical programming

+languages. Strings consisting of a series of newlines, followed by a series of

+whitespace are a typical pattern in computer programs because of indentation.

+Note that a specialized interner might be a better solution for some use cases.

+## Benchmarks

+Run criterion benches with

+```sh

+cargo bench --bench \* -- --quick

+```

+## MSRV Policy

+Minimal Supported Rust Version: latest stable.

+Bumping MSRV is not considered a semver-breaking change.

diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs
new file mode 100644
index 0000000000..2643b02557
--- /dev/null
+++ b/lib/smol_str/benches/bench.rs

@@ -0,0 +1,118 @@

+use criterion::{criterion_group, criterion_main, Criterion};

+use rand::distr::{Alphanumeric, SampleString};

+use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr};

+use std::hint::black_box;

+/// 12: small (inline)

+/// 50: medium (heap)

+/// 1000: large (heap)

+const TEST_LENS: [usize; 3] = [12, 50, 1000];

+fn format_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let n = rand::random_range(10000..99999);

+ let str_len = len.checked_sub(n.to_smolstr().len()).unwrap();

+ let str = Alphanumeric.sample_string(&mut rand::rng(), str_len);

+ c.bench_function(&format!("format_smolstr! len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = format_smolstr!("{str}-{n}"));

+ assert_eq!(v, format!("{str}-{n}"));

+ });

+ }

+fn from_str_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);

+ c.bench_function(&format!("SmolStr::from len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = SmolStr::from(black_box(&str)));

+ assert_eq!(v, str);

+ });

+ }

+fn clone_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);

+ let smolstr = SmolStr::new(&str);

+ c.bench_function(&format!("SmolStr::clone len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = smolstr.clone());

+ assert_eq!(v, str);

+ });

+ }

+fn eq_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);

+ let smolstr = SmolStr::new(&str);

+ c.bench_function(&format!("SmolStr::eq len={len}"), |b| {

+ let mut v = false;

+ b.iter(|| v = smolstr == black_box(&str));

+ assert!(v);

+ });

+ }

+fn to_lowercase_bench(c: &mut Criterion) {

+ const END_CHAR: char = 'İ';

+ for len in TEST_LENS {

+ // mostly ascii seq with some non-ascii at the end

+ let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8());

+ str.push(END_CHAR);

+ let str = str.as_str();

+ c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = str.to_lowercase_smolstr());

+ assert_eq!(v, str.to_lowercase());

+ });

+ }

+fn to_ascii_lowercase_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let str = Alphanumeric.sample_string(&mut rand::rng(), len);

+ let str = str.as_str();

+ c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = str.to_ascii_lowercase_smolstr());

+ assert_eq!(v, str.to_ascii_lowercase());

+ });

+ }

+fn replace_bench(c: &mut Criterion) {

+ for len in TEST_LENS {

+ let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2)

+ + "-"

+ + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2);

+ let str = s_dash_s.as_str();

+ c.bench_function(&format!("replace_smolstr len={len}"), |b| {

+ let mut v = <_>::default();

+ b.iter(|| v = str.replace_smolstr("-", "_"));

+ assert_eq!(v, str.replace("-", "_"));

+ });

+ }

+criterion_group!(

+ benches,

+ format_bench,

+ from_str_bench,

+ clone_bench,

+ eq_bench,

+ to_lowercase_bench,

+ to_ascii_lowercase_bench,

+ replace_bench,

+);

+criterion_main!(benches);

diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml
new file mode 100644
index 0000000000..b92b99ac30
--- /dev/null
+++ b/lib/smol_str/bors.toml

@@ -0,0 +1,2 @@

+status = [ "Rust" ]

+delete_merged_branches = true

diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs
new file mode 100644
index 0000000000..ebb20d71a0
--- /dev/null
+++ b/lib/smol_str/src/borsh.rs

@@ -0,0 +1,40 @@

+use crate::{Repr, SmolStr, INLINE_CAP};

+use alloc::string::{String, ToString};

+use borsh::{

+ io::{Error, ErrorKind, Read, Write},

+ BorshDeserialize, BorshSerialize,

+};

+use core::mem::transmute;

+impl BorshSerialize for SmolStr {

+ fn serialize<W: Write>(&self, writer: &mut W) -> borsh::io::Result<()> {

+ self.as_str().serialize(writer)

+ }

+impl BorshDeserialize for SmolStr {

+ #[inline]

+ fn deserialize_reader<R: Read>(reader: &mut R) -> borsh::io::Result<Self> {

+ let len = u32::deserialize_reader(reader)?;

+ if (len as usize) < INLINE_CAP {

+ let mut buf = [0u8; INLINE_CAP];

+ reader.read_exact(&mut buf[..len as usize])?;

+ _ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| {

+ let msg = err.to_string();

+ Error::new(ErrorKind::InvalidData, msg)

+ })?;

+ Ok(SmolStr(Repr::Inline {

+ len: unsafe { transmute::<u8, crate::InlineSize>(len as u8) },

+ buf,

+ }))

+ } else {

+ // u8::vec_from_reader always returns Some on success in current implementation

+ let vec = u8::vec_from_reader(len, reader)?

+ .ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?;

+ Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| {

+ let msg = err.to_string();

+ Error::new(ErrorKind::InvalidData, msg)

+ })?))

+ }

diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs
new file mode 100644
index 0000000000..ded07c61c6
--- /dev/null
+++ b/lib/smol_str/src/lib.rs

@@ -0,0 +1,998 @@

+#![cfg_attr(not(feature = "std"), no_std)]

+#![cfg_attr(docsrs, feature(doc_auto_cfg))]

+extern crate alloc;

+use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};

+use core::{

+ borrow::Borrow,

+ cmp::{self, Ordering},

+ convert::Infallible,

+ fmt, hash, iter, mem, ops,

+ str::FromStr,

+};

+/// A `SmolStr` is a string type that has the following properties:

+///

+/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)

+/// * `Clone` is `O(1)`

+/// * Strings are stack-allocated if they are:

+/// * Up to 23 bytes long

+/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist

+/// solely of consecutive newlines, followed by consecutive spaces

+/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated

+/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation

+///

+/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for

+/// `SmolStr` is a good enough default storage for tokens of typical programming

+/// languages. Strings consisting of a series of newlines, followed by a series of

+/// whitespace are a typical pattern in computer programs because of indentation.

+/// Note that a specialized interner might be a better solution for some use cases.

+///

+/// `WS`: A string of 32 newlines followed by 128 spaces.

+pub struct SmolStr(Repr);

+impl SmolStr {

+ /// Constructs an inline variant of `SmolStr`.

+ ///

+ /// This never allocates.

+ ///

+ /// # Panics

+ ///

+ /// Panics if `text.len() > 23`.

+ #[inline]

+ pub const fn new_inline(text: &str) -> SmolStr {

+ assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop

+ let text = text.as_bytes();

+ let mut buf = [0; INLINE_CAP];

+ let mut i = 0;

+ while i < text.len() {

+ buf[i] = text[i];

+ i += 1

+ }

+ SmolStr(Repr::Inline {

+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`

+ // as we asserted it.

+ len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) },

+ buf,

+ })

+ }

+ /// Constructs a `SmolStr` from a statically allocated string.

+ ///

+ /// This never allocates.

+ #[inline(always)]

+ pub const fn new_static(text: &'static str) -> SmolStr {

+ // NOTE: this never uses the inline storage; if a canonical

+ // representation is needed, we could check for `len() < INLINE_CAP`

+ // and call `new_inline`, but this would mean an extra branch.

+ SmolStr(Repr::Static(text))

+ }

+ /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary.

+ #[inline(always)]

+ pub fn new(text: impl AsRef<str>) -> SmolStr {

+ SmolStr(Repr::new(text.as_ref()))

+ }

+ /// Returns a `&str` slice of this `SmolStr`.

+ #[inline(always)]

+ pub fn as_str(&self) -> &str {

+ self.0.as_str()

+ }

+ /// Returns the length of `self` in bytes.

+ #[inline(always)]

+ pub fn len(&self) -> usize {

+ self.0.len()

+ }

+ /// Returns `true` if `self` has a length of zero bytes.

+ #[inline(always)]

+ pub fn is_empty(&self) -> bool {

+ self.0.is_empty()

+ }

+ /// Returns `true` if `self` is heap-allocated.

+ #[inline(always)]

+ pub const fn is_heap_allocated(&self) -> bool {

+ matches!(self.0, Repr::Heap(..))

+ }

+impl Clone for SmolStr {

+ #[inline]

+ fn clone(&self) -> Self {

+ if !self.is_heap_allocated() {

+ // SAFETY: We verified that the payload of `Repr` is a POD

+ return unsafe { core::ptr::read(self as *const SmolStr) };

+ }

+ Self(self.0.clone())

+ }

+impl Default for SmolStr {

+ #[inline(always)]

+ fn default() -> SmolStr {

+ SmolStr(Repr::Inline {

+ len: InlineSize::_V0,

+ buf: [0; INLINE_CAP],

+ })

+ }

+impl ops::Deref for SmolStr {

+ type Target = str;

+ #[inline(always)]

+ fn deref(&self) -> &str {

+ self.as_str()

+ }

+// region: PartialEq implementations

+impl Eq for SmolStr {}

+impl PartialEq<SmolStr> for SmolStr {

+ fn eq(&self, other: &SmolStr) -> bool {

+ self.0.ptr_eq(&other.0) || self.as_str() == other.as_str()

+ }

+impl PartialEq<str> for SmolStr {

+ #[inline(always)]

+ fn eq(&self, other: &str) -> bool {

+ self.as_str() == other

+ }

+impl PartialEq<SmolStr> for str {

+ #[inline(always)]

+ fn eq(&self, other: &SmolStr) -> bool {

+ other == self

+ }

+impl<'a> PartialEq<&'a str> for SmolStr {

+ #[inline(always)]

+ fn eq(&self, other: &&'a str) -> bool {

+ self == *other

+ }

+impl PartialEq<SmolStr> for &str {

+ #[inline(always)]

+ fn eq(&self, other: &SmolStr) -> bool {

+ *self == other

+ }

+impl PartialEq<String> for SmolStr {

+ #[inline(always)]

+ fn eq(&self, other: &String) -> bool {

+ self.as_str() == other

+ }

+impl PartialEq<SmolStr> for String {

+ #[inline(always)]

+ fn eq(&self, other: &SmolStr) -> bool {

+ other == self

+ }

+impl<'a> PartialEq<&'a String> for SmolStr {

+ #[inline(always)]

+ fn eq(&self, other: &&'a String) -> bool {

+ self == *other

+ }

+impl PartialEq<SmolStr> for &String {

+ #[inline(always)]

+ fn eq(&self, other: &SmolStr) -> bool {

+ *self == other

+ }

+// endregion: PartialEq implementations

+impl Ord for SmolStr {

+ fn cmp(&self, other: &SmolStr) -> Ordering {

+ self.as_str().cmp(other.as_str())

+ }

+impl PartialOrd for SmolStr {

+ fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {

+ Some(self.cmp(other))

+ }

+impl hash::Hash for SmolStr {

+ fn hash<H: hash::Hasher>(&self, hasher: &mut H) {

+ self.as_str().hash(hasher);

+ }

+impl fmt::Debug for SmolStr {

+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {

+ fmt::Debug::fmt(self.as_str(), f)

+ }

+impl fmt::Display for SmolStr {

+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {

+ fmt::Display::fmt(self.as_str(), f)

+ }

+impl iter::FromIterator<char> for SmolStr {

+ fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {

+ from_char_iter(iter.into_iter())

+ }

+#[inline]

+fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr {

+ from_buf_and_chars([0; _], 0, iter)

+fn from_buf_and_chars(

+ mut buf: [u8; INLINE_CAP],

+ buf_len: usize,

+ mut iter: impl Iterator<Item = char>,

+) -> SmolStr {

+ let min_size = iter.size_hint().0 + buf_len;

+ if min_size > INLINE_CAP {

+ let heap: String = core::str::from_utf8(&buf[..buf_len])

+ .unwrap()

+ .chars()

+ .chain(iter)

+ .collect();

+ if heap.len() <= INLINE_CAP {

+ // size hint lied

+ return SmolStr::new_inline(&heap);

+ }

+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));

+ }

+ let mut len = buf_len;

+ while let Some(ch) = iter.next() {

+ let size = ch.len_utf8();

+ if size + len > INLINE_CAP {

+ let (min_remaining, _) = iter.size_hint();

+ let mut heap = String::with_capacity(size + len + min_remaining);

+ heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());

+ heap.push(ch);

+ heap.extend(iter);

+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));

+ }

+ ch.encode_utf8(&mut buf[len..]);

+ len += size;

+ }

+ SmolStr(Repr::Inline {

+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`

+ // as we otherwise return early.

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ })

+fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr

+where

+ T: AsRef<str>,

+ String: iter::Extend<T>,

+ let mut len = 0;

+ let mut buf = [0u8; INLINE_CAP];

+ while let Some(slice) = iter.next() {

+ let slice = slice.as_ref();

+ let size = slice.len();

+ if size + len > INLINE_CAP {

+ let mut heap = String::with_capacity(size + len);

+ heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());

+ heap.push_str(slice);

+ heap.extend(iter);

+ return SmolStr(Repr::Heap(heap.into_boxed_str().into()));

+ }

+ buf[len..][..size].copy_from_slice(slice.as_bytes());

+ len += size;

+ }

+ SmolStr(Repr::Inline {

+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`

+ // as we otherwise return early.

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ })

+impl iter::FromIterator<String> for SmolStr {

+ fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {

+ build_from_str_iter(iter.into_iter())

+ }

+impl<'a> iter::FromIterator<&'a String> for SmolStr {

+ fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {

+ SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))

+ }

+impl<'a> iter::FromIterator<&'a str> for SmolStr {

+ fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {

+ build_from_str_iter(iter.into_iter())

+ }

+impl AsRef<str> for SmolStr {

+ #[inline(always)]

+ fn as_ref(&self) -> &str {

+ self.as_str()

+ }

+impl AsRef<[u8]> for SmolStr {

+ #[inline(always)]

+ fn as_ref(&self) -> &[u8] {

+ self.as_str().as_bytes()

+ }

+#[cfg(feature = "std")]

+impl AsRef<std::ffi::OsStr> for SmolStr {

+ #[inline(always)]

+ fn as_ref(&self) -> &std::ffi::OsStr {

+ AsRef::<std::ffi::OsStr>::as_ref(self.as_str())

+ }

+#[cfg(feature = "std")]

+impl AsRef<std::path::Path> for SmolStr {

+ #[inline(always)]

+ fn as_ref(&self) -> &std::path::Path {

+ AsRef::<std::path::Path>::as_ref(self.as_str())

+ }

+impl From<&str> for SmolStr {

+ #[inline]

+ fn from(s: &str) -> SmolStr {

+ SmolStr::new(s)

+ }

+impl From<&mut str> for SmolStr {

+ #[inline]

+ fn from(s: &mut str) -> SmolStr {

+ SmolStr::new(s)

+ }

+impl From<&String> for SmolStr {

+ #[inline]

+ fn from(s: &String) -> SmolStr {

+ SmolStr::new(s)

+ }

+impl From<String> for SmolStr {

+ #[inline(always)]

+ fn from(text: String) -> Self {

+ Self::new(text)

+ }

+impl From<Box<str>> for SmolStr {

+ #[inline]

+ fn from(s: Box<str>) -> SmolStr {

+ SmolStr::new(s)

+ }

+impl From<Arc<str>> for SmolStr {

+ #[inline]

+ fn from(s: Arc<str>) -> SmolStr {

+ let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s));

+ Self(repr)

+ }

+impl<'a> From<Cow<'a, str>> for SmolStr {

+ #[inline]

+ fn from(s: Cow<'a, str>) -> SmolStr {

+ SmolStr::new(s)

+ }

+impl From<SmolStr> for Arc<str> {

+ #[inline(always)]

+ fn from(text: SmolStr) -> Self {

+ match text.0 {

+ Repr::Heap(data) => data,

+ _ => text.as_str().into(),

+ }

+impl From<SmolStr> for String {

+ #[inline(always)]

+ fn from(text: SmolStr) -> Self {

+ text.as_str().into()

+ }

+impl Borrow<str> for SmolStr {

+ #[inline(always)]

+ fn borrow(&self) -> &str {

+ self.as_str()

+ }

+impl FromStr for SmolStr {

+ type Err = Infallible;

+ #[inline]

+ fn from_str(s: &str) -> Result<SmolStr, Self::Err> {

+ Ok(SmolStr::from(s))

+ }

+const INLINE_CAP: usize = InlineSize::_V23 as usize;

+const N_NEWLINES: usize = 32;

+const N_SPACES: usize = 128;

+const WS: &str =

+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";

+const _: () = {

+ assert!(WS.len() == N_NEWLINES + N_SPACES);

+ assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');

+ assert!(WS.as_bytes()[N_NEWLINES] == b' ');

+};

+/// A [`u8`] with a bunch of niches.

+#[derive(Clone, Copy, Debug, PartialEq)]

+#[repr(u8)]

+enum InlineSize {

+ _V0 = 0,

+ _V1,

+ _V2,

+ _V3,

+ _V4,

+ _V5,

+ _V6,

+ _V7,

+ _V8,

+ _V9,

+ _V10,

+ _V11,

+ _V12,

+ _V13,

+ _V14,

+ _V15,

+ _V16,

+ _V17,

+ _V18,

+ _V19,

+ _V20,

+ _V21,

+ _V22,

+ _V23,

+impl InlineSize {

+ /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`]

+ #[inline(always)]

+ const unsafe fn transmute_from_u8(value: u8) -> Self {

+ debug_assert!(value <= InlineSize::_V23 as u8);

+ // SAFETY: The caller is responsible to uphold this invariant

+ unsafe { mem::transmute::<u8, Self>(value) }

+ }

+#[derive(Clone, Debug)]

+enum Repr {

+ Inline {

+ len: InlineSize,

+ buf: [u8; INLINE_CAP],

+ },

+ Static(&'static str),

+ Heap(Arc<str>),

+impl Repr {

+ /// This function tries to create a new Repr::Inline or Repr::Static

+ /// If it isn't possible, this function returns None

+ fn new_on_stack<T>(text: T) -> Option<Self>

+ where

+ T: AsRef<str>,

+ {

+ let text = text.as_ref();

+ let len = text.len();

+ if len <= INLINE_CAP {

+ let mut buf = [0; INLINE_CAP];

+ buf[..len].copy_from_slice(text.as_bytes());

+ return Some(Repr::Inline {

+ // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ });

+ }

+ if len <= N_NEWLINES + N_SPACES {

+ let bytes = text.as_bytes();

+ let possible_newline_count = cmp::min(len, N_NEWLINES);

+ let newlines = bytes[..possible_newline_count]

+ .iter()

+ .take_while(|&&b| b == b'\n')

+ .count();

+ let possible_space_count = len - newlines;

+ if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {

+ let spaces = possible_space_count;

+ let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];

+ return Some(Repr::Static(substring));

+ }

+ None

+ }

+ fn new(text: &str) -> Self {

+ Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text)))

+ }

+ #[inline(always)]

+ fn len(&self) -> usize {

+ match self {

+ Repr::Heap(data) => data.len(),

+ Repr::Static(data) => data.len(),

+ Repr::Inline { len, .. } => *len as usize,

+ }

+ #[inline(always)]

+ fn is_empty(&self) -> bool {

+ match self {

+ Repr::Heap(data) => data.is_empty(),

+ Repr::Static(data) => data.is_empty(),

+ &Repr::Inline { len, .. } => len as u8 == 0,

+ }

+ #[inline]

+ fn as_str(&self) -> &str {

+ match self {

+ Repr::Heap(data) => data,

+ Repr::Static(data) => data,

+ Repr::Inline { len, buf } => {

+ let len = *len as usize;

+ // SAFETY: len is guaranteed to be <= INLINE_CAP

+ let buf = unsafe { buf.get_unchecked(..len) };

+ // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes

+ unsafe { ::core::str::from_utf8_unchecked(buf) }

+ }

+ fn ptr_eq(&self, other: &Self) -> bool {

+ match (self, other) {

+ (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0),

+ (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0),

+ (

+ Self::Inline {

+ len: l_len,

+ buf: l_buf,

+ },

+ Self::Inline {

+ len: r_len,

+ buf: r_buf,

+ },

+ ) => l_len == r_len && l_buf == r_buf,

+ _ => false,

+ }

+/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.

+///

+/// Almost identical to [`ToString`], but converts to `SmolStr` instead.

+pub trait ToSmolStr {

+ fn to_smolstr(&self) -> SmolStr;

+/// [`str`] methods producing [`SmolStr`]s.

+pub trait StrExt: private::Sealed {

+ /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::to_lowercase`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn to_lowercase_smolstr(&self) -> SmolStr;

+ /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::to_uppercase`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn to_uppercase_smolstr(&self) -> SmolStr;

+ /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::to_ascii_lowercase`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn to_ascii_lowercase_smolstr(&self) -> SmolStr;

+ /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::to_ascii_uppercase`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn to_ascii_uppercase_smolstr(&self) -> SmolStr;

+ /// Replaces all matches of a &str with another &str returning a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::replace`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr;

+ /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`],

+ /// potentially without allocating.

+ ///

+ /// See [`str::replacen`].

+ #[must_use = "this returns a new SmolStr without modifying the original"]

+ fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr;

+impl StrExt for str {

+ #[inline]

+ fn to_lowercase_smolstr(&self) -> SmolStr {

+ let len = self.len();

+ if len <= INLINE_CAP {

+ let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase);

+ from_buf_and_chars(

+ buf,

+ len - rest.len(),

+ rest.chars().flat_map(|c| c.to_lowercase()),

+ )

+ } else {

+ self.to_lowercase().into()

+ }

+ #[inline]

+ fn to_uppercase_smolstr(&self) -> SmolStr {

+ let len = self.len();

+ if len <= INLINE_CAP {

+ let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase);

+ from_buf_and_chars(

+ buf,

+ len - rest.len(),

+ rest.chars().flat_map(|c| c.to_uppercase()),

+ )

+ } else {

+ self.to_uppercase().into()

+ }

+ #[inline]

+ fn to_ascii_lowercase_smolstr(&self) -> SmolStr {

+ let len = self.len();

+ if len <= INLINE_CAP {

+ let mut buf = [0u8; INLINE_CAP];

+ buf[..len].copy_from_slice(self.as_bytes());

+ buf[..len].make_ascii_lowercase();

+ SmolStr(Repr::Inline {

+ // SAFETY: `len` is in bounds

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ })

+ } else {

+ self.to_ascii_lowercase().into()

+ }

+ #[inline]

+ fn to_ascii_uppercase_smolstr(&self) -> SmolStr {

+ let len = self.len();

+ if len <= INLINE_CAP {

+ let mut buf = [0u8; INLINE_CAP];

+ buf[..len].copy_from_slice(self.as_bytes());

+ buf[..len].make_ascii_uppercase();

+ SmolStr(Repr::Inline {

+ // SAFETY: `len` is in bounds

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ })

+ } else {

+ self.to_ascii_uppercase().into()

+ }

+ #[inline]

+ fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr {

+ self.replacen_smolstr(from, to, usize::MAX)

+ }

+ #[inline]

+ fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr {

+ // Fast path for replacing a single ASCII character with another inline.

+ if let [from_u8] = from.as_bytes() {

+ if let [to_u8] = to.as_bytes() {

+ return if self.len() <= count {

+ // SAFETY: `from_u8` & `to_u8` are ascii

+ unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) }

+ } else {

+ unsafe {

+ replacen_1_ascii(self, |b| {

+ if b == from_u8 && count != 0 {

+ count -= 1;

+ *to_u8

+ } else {

+ *b

+ }

+ })

+ }

+ };

+ }

+ let mut result = SmolStrBuilder::new();

+ let mut last_end = 0;

+ for (start, part) in self.match_indices(from).take(count) {

+ // SAFETY: `start` is guaranteed to be within the bounds of `self` as per

+ // `match_indices` and last_end is always less than or equal to `start`

+ result.push_str(unsafe { self.get_unchecked(last_end..start) });

+ result.push_str(to);

+ last_end = start + part.len();

+ }

+ // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is

+ // always less than or equal to `self.len()`

+ result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });

+ SmolStr::from(result)

+ }

+/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes.

+#[inline]

+unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr {

+ if src.len() <= INLINE_CAP {

+ let mut buf = [0u8; INLINE_CAP];

+ for (idx, b) in src.as_bytes().iter().enumerate() {

+ buf[idx] = map(b);

+ }

+ SmolStr(Repr::Inline {

+ // SAFETY: `len` is in bounds

+ len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },

+ buf,

+ })

+ } else {

+ let out = src.as_bytes().iter().map(map).collect();

+ // SAFETY: We replaced ascii with ascii on valid utf8 strings.

+ unsafe { String::from_utf8_unchecked(out).into() }

+ }

+/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.

+#[inline]

+fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {

+ // Process the input in chunks of 16 bytes to enable auto-vectorization.

+ // Previously the chunk size depended on the size of `usize`,

+ // but on 32-bit platforms with sse or neon is also the better choice.

+ // The only downside on other platforms would be a bit more loop-unrolling.

+ const N: usize = 16;

+ debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings");

+ let mut slice = s.as_bytes();

+ let mut out = [0u8; INLINE_CAP];

+ let mut out_slice = &mut out[..slice.len()];

+ let mut is_ascii = [false; N];

+ while slice.len() >= N {

+ // SAFETY: checked in loop condition

+ let chunk = unsafe { slice.get_unchecked(..N) };

+ // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets

+ let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };

+ for j in 0..N {

+ is_ascii[j] = chunk[j] <= 127;

+ }

+ // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk

+ // size gives the best result, specifically a pmovmsk instruction on x86.

+ // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not

+ // currently recognize other similar idioms.

+ if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {

+ break;

+ }

+ for j in 0..N {

+ out_chunk[j] = convert(&chunk[j]);

+ }

+ slice = unsafe { slice.get_unchecked(N..) };

+ out_slice = unsafe { out_slice.get_unchecked_mut(N..) };

+ }

+ // handle the remainder as individual bytes

+ while !slice.is_empty() {

+ let byte = slice[0];

+ if byte > 127 {

+ break;

+ }

+ // SAFETY: out_slice has at least same length as input slice

+ unsafe {

+ *out_slice.get_unchecked_mut(0) = convert(&byte);

+ }

+ slice = unsafe { slice.get_unchecked(1..) };

+ out_slice = unsafe { out_slice.get_unchecked_mut(1..) };

+ }

+ unsafe {

+ // SAFETY: we know this is a valid char boundary

+ // since we only skipped over leading ascii bytes

+ let rest = core::str::from_utf8_unchecked(slice);

+ (out, rest)

+ }

+impl<T> ToSmolStr for T

+where

+ T: fmt::Display + ?Sized,

+ fn to_smolstr(&self) -> SmolStr {

+ format_smolstr!("{}", self)

+ }

+mod private {

+ /// No downstream impls allowed.

+ pub trait Sealed {}

+ impl Sealed for str {}

+/// Formats arguments to a [`SmolStr`], potentially without allocating.

+///

+/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.

+#[macro_export]

+macro_rules! format_smolstr {

+ ($($tt:tt)*) => {{

+ let mut w = $crate::SmolStrBuilder::new();

+ ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error");

+ w.finish()

+ }};

+/// A builder that can be used to efficiently build a [`SmolStr`].

+///

+/// This won't allocate if the final string fits into the inline buffer.

+#[derive(Clone, Default, Debug, PartialEq, Eq)]

+pub struct SmolStrBuilder(SmolStrBuilderRepr);

+#[derive(Clone, Debug, PartialEq, Eq)]

+enum SmolStrBuilderRepr {

+ Inline { len: usize, buf: [u8; INLINE_CAP] },

+ Heap(String),

+impl Default for SmolStrBuilderRepr {

+ #[inline]

+ fn default() -> Self {

+ SmolStrBuilderRepr::Inline {

+ buf: [0; INLINE_CAP],

+ len: 0,

+ }

+impl SmolStrBuilder {

+ /// Creates a new empty [`SmolStrBuilder`].

+ #[must_use]

+ pub const fn new() -> Self {

+ Self(SmolStrBuilderRepr::Inline {

+ buf: [0; INLINE_CAP],

+ len: 0,

+ })

+ }

+ /// Builds a [`SmolStr`] from `self`.

+ #[must_use]

+ pub fn finish(&self) -> SmolStr {

+ SmolStr(match &self.0 {

+ &SmolStrBuilderRepr::Inline { len, buf } => {

+ debug_assert!(len <= INLINE_CAP);

+ Repr::Inline {

+ // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize`

+ len: unsafe { InlineSize::transmute_from_u8(len as u8) },

+ buf,

+ }

+ SmolStrBuilderRepr::Heap(heap) => Repr::new(heap),

+ })

+ }

+ /// Appends the given [`char`] to the end of `self`'s buffer.

+ pub fn push(&mut self, c: char) {

+ match &mut self.0 {

+ SmolStrBuilderRepr::Inline { len, buf } => {

+ let char_len = c.len_utf8();

+ let new_len = *len + char_len;

+ if new_len <= INLINE_CAP {

+ c.encode_utf8(&mut buf[*len..]);

+ *len += char_len;

+ } else {

+ let mut heap = String::with_capacity(new_len);

+ // copy existing inline bytes over to the heap

+ // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes

+ unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) };

+ heap.push(c);

+ self.0 = SmolStrBuilderRepr::Heap(heap);

+ }

+ SmolStrBuilderRepr::Heap(h) => h.push(c),

+ }

+ /// Appends a given string slice onto the end of `self`'s buffer.

+ pub fn push_str(&mut self, s: &str) {

+ match &mut self.0 {

+ SmolStrBuilderRepr::Inline { len, buf } => {

+ let old_len = *len;

+ *len += s.len();

+ // if the new length will fit on the stack (even if it fills it entirely)

+ if *len <= INLINE_CAP {

+ buf[old_len..*len].copy_from_slice(s.as_bytes());

+ return; // skip the heap push below

+ }

+ let mut heap = String::with_capacity(*len);

+ // copy existing inline bytes over to the heap

+ // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes

+ unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) };

+ heap.push_str(s);

+ self.0 = SmolStrBuilderRepr::Heap(heap);

+ }

+ SmolStrBuilderRepr::Heap(heap) => heap.push_str(s),

+ }

+impl fmt::Write for SmolStrBuilder {

+ #[inline]

+ fn write_str(&mut self, s: &str) -> fmt::Result {

+ self.push_str(s);

+ Ok(())

+ }

+impl From<SmolStrBuilder> for SmolStr {

+ fn from(value: SmolStrBuilder) -> Self {

+ value.finish()

+ }

+#[cfg(feature = "arbitrary")]

+impl<'a> arbitrary::Arbitrary<'a> for SmolStr {

+ fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {

+ let s = <&str>::arbitrary(u)?;

+ Ok(SmolStr::new(s))

+ }

+#[cfg(feature = "borsh")]

+mod borsh;

+#[cfg(feature = "serde")]

+mod serde;

+#[test]

+fn from_buf_and_chars_size_hinted_heap() {

+ let str = from_buf_and_chars(

+ *b"abcdefghijklmnopqr00000",

+ 18,

+ "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(),

+ );

+ assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13");

diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs
new file mode 100644
index 0000000000..4f08b444c5
--- /dev/null
+++ b/lib/smol_str/src/serde.rs

@@ -0,0 +1,97 @@

+use alloc::{string::String, vec::Vec};

+use core::fmt;

+use serde::de::{Deserializer, Error, Unexpected, Visitor};

+use serde_core as serde;

+use crate::SmolStr;

+// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125

+fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>

+where

+ D: Deserializer<'de>,

+ struct SmolStrVisitor;

+ impl<'a> Visitor<'a> for SmolStrVisitor {

+ type Value = SmolStr;

+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {

+ formatter.write_str("a string")

+ }

+ fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ Ok(SmolStr::from(v))

+ }

+ fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ Ok(SmolStr::from(v))

+ }

+ fn visit_string<E>(self, v: String) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ Ok(SmolStr::from(v))

+ }

+ fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ match core::str::from_utf8(v) {

+ Ok(s) => Ok(SmolStr::from(s)),

+ Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),

+ }

+ fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ match core::str::from_utf8(v) {

+ Ok(s) => Ok(SmolStr::from(s)),

+ Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),

+ }

+ fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>

+ where

+ E: Error,

+ {

+ match String::from_utf8(v) {

+ Ok(s) => Ok(SmolStr::from(s)),

+ Err(e) => Err(Error::invalid_value(

+ Unexpected::Bytes(&e.into_bytes()),

+ &self,

+ )),

+ }

+ deserializer.deserialize_str(SmolStrVisitor)

+impl serde::Serialize for SmolStr {

+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>

+ where

+ S: serde::Serializer,

+ {

+ self.as_str().serialize(serializer)

+ }

+impl<'de> serde::Deserialize<'de> for SmolStr {

+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>

+ where

+ D: serde::Deserializer<'de>,

+ {

+ smol_str(deserializer)

+ }

diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs
new file mode 100644
index 0000000000..8f7d9ec39a
--- /dev/null
+++ b/lib/smol_str/tests/test.rs

@@ -0,0 +1,451 @@

+use std::sync::Arc;

+#[cfg(not(miri))]

+use proptest::{prop_assert, prop_assert_eq, proptest};

+use smol_str::{SmolStr, SmolStrBuilder};

+#[test]

+#[cfg(target_pointer_width = "64")]

+fn smol_str_is_smol() {

+ assert_eq!(

+ ::std::mem::size_of::<SmolStr>(),

+ ::std::mem::size_of::<String>(),

+ );

+#[test]

+fn assert_traits() {

+ fn f<T: Send + Sync + ::std::fmt::Debug + Clone>() {}

+ f::<SmolStr>();

+#[test]

+fn conversions() {

+ let s: SmolStr = "Hello, World!".into();

+ let s: String = s.into();

+ assert_eq!(s, "Hello, World!");

+ let s: SmolStr = Arc::<str>::from("Hello, World!").into();

+ let s: Arc<str> = s.into();

+ assert_eq!(s.as_ref(), "Hello, World!");

+#[test]

+fn const_fn_ctor() {

+ const EMPTY: SmolStr = SmolStr::new_inline("");

+ const A: SmolStr = SmolStr::new_inline("A");

+ const HELLO: SmolStr = SmolStr::new_inline("HELLO");

+ const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW");

+ assert_eq!(EMPTY, SmolStr::from(""));

+ assert_eq!(A, SmolStr::from("A"));

+ assert_eq!(HELLO, SmolStr::from("HELLO"));

+ assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW"));

+#[cfg(not(miri))]

+fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> {

+ prop_assert_eq!(smol.as_str(), std_str);

+ prop_assert_eq!(smol.len(), std_str.len());

+ prop_assert_eq!(smol.is_empty(), std_str.is_empty());

+ if smol.len() <= 23 {

+ prop_assert!(!smol.is_heap_allocated());

+ }

+ Ok(())

+#[cfg(not(miri))]

+proptest! {

+ #[test]

+ fn roundtrip(s: String) {

+ check_props(s.as_str(), SmolStr::new(s.clone()))?;

+ }

+ #[test]

+ fn roundtrip_spaces(s in r"( )*") {

+ check_props(s.as_str(), SmolStr::new(s.clone()))?;

+ }

+ #[test]

+ fn roundtrip_newlines(s in r"\n*") {

+ check_props(s.as_str(), SmolStr::new(s.clone()))?;

+ }

+ #[test]

+ fn roundtrip_ws(s in r"( |\n)*") {

+ check_props(s.as_str(), SmolStr::new(s.clone()))?;

+ }

+ #[test]

+ fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) {

+ let string: String = slices.iter().map(|x| x.as_str()).collect();

+ let smol: SmolStr = slices.into_iter().collect();

+ check_props(string.as_str(), smol)?;

+ }

+ #[test]

+ fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) {

+ let string: String = slices.iter().map(|x| x.as_str()).collect();

+ let smol: SmolStr = slices.iter().collect();

+ check_props(string.as_str(), smol)?;

+ }

+#[cfg(feature = "serde")]

+mod serde_tests {

+ use super::*;

+ use serde::{Deserialize, Serialize};

+ use std::collections::HashMap;

+ #[derive(Serialize, Deserialize)]

+ struct SmolStrStruct {

+ pub(crate) s: SmolStr,

+ pub(crate) vec: Vec<SmolStr>,

+ pub(crate) map: HashMap<SmolStr, SmolStr>,

+ }

+ #[test]

+ fn test_serde() {

+ let s = SmolStr::new("Hello, World");

+ let s = serde_json::to_string(&s).unwrap();

+ assert_eq!(s, "\"Hello, World\"");

+ let s: SmolStr = serde_json::from_str(&s).unwrap();

+ assert_eq!(s, "Hello, World");

+ }

+ #[test]

+ fn test_serde_reader() {

+ let s = SmolStr::new("Hello, World");

+ let s = serde_json::to_string(&s).unwrap();

+ assert_eq!(s, "\"Hello, World\"");

+ let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();

+ assert_eq!(s, "Hello, World");

+ }

+ #[test]

+ fn test_serde_struct() {

+ let mut map = HashMap::new();

+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));

+ let struct_ = SmolStrStruct {

+ s: SmolStr::new("Hello, World"),

+ vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],

+ map,

+ };

+ let s = serde_json::to_string(&struct_).unwrap();

+ let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap();

+ }

+ #[test]

+ fn test_serde_struct_reader() {

+ let mut map = HashMap::new();

+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));

+ let struct_ = SmolStrStruct {

+ s: SmolStr::new("Hello, World"),

+ vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],

+ map,

+ };

+ let s = serde_json::to_string(&struct_).unwrap();

+ let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();

+ }

+ #[test]

+ fn test_serde_hashmap() {

+ let mut map = HashMap::new();

+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));

+ let s = serde_json::to_string(&map).unwrap();

+ let _s: HashMap<SmolStr, SmolStr> = serde_json::from_str(&s).unwrap();

+ }

+ #[test]

+ fn test_serde_hashmap_reader() {

+ let mut map = HashMap::new();

+ map.insert(SmolStr::new("a"), SmolStr::new("ohno"));

+ let s = serde_json::to_string(&map).unwrap();

+ let _s: HashMap<SmolStr, SmolStr> =

+ serde_json::from_reader(std::io::Cursor::new(s)).unwrap();

+ }

+ #[test]

+ fn test_serde_vec() {

+ let vec = vec![SmolStr::new(""), SmolStr::new("b")];

+ let s = serde_json::to_string(&vec).unwrap();

+ let _s: Vec<SmolStr> = serde_json::from_str(&s).unwrap();

+ }

+ #[test]

+ fn test_serde_vec_reader() {

+ let vec = vec![SmolStr::new(""), SmolStr::new("b")];

+ let s = serde_json::to_string(&vec).unwrap();

+ let _s: Vec<SmolStr> = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();

+ }

+#[test]

+fn test_search_in_hashmap() {

+ let mut m = ::std::collections::HashMap::<SmolStr, i32>::new();

+ m.insert("aaa".into(), 17);

+ assert_eq!(17, *m.get("aaa").unwrap());

+#[test]

+fn test_from_char_iterator() {

+ let examples = [

+ // Simple keyword-like strings

+ ("if", false),

+ ("for", false),

+ ("impl", false),

+ // Strings containing two-byte characters

+ ("パーティーへ行かないか", true),

+ ("パーティーへ行か", true),

+ ("パーティーへ行_", false),

+ ("和製漢語", false),

+ ("部落格", false),

+ ("사회과학원 어학연구소", true),

+ // String containing diverse characters

+ ("表ポあA鷗ŒéＢ逍Üßªąñ丂㐀𠀀", true),

+ ];

+ for (raw, is_heap) in &examples {

+ let s: SmolStr = raw.chars().collect();

+ assert_eq!(s.as_str(), *raw);

+ assert_eq!(s.is_heap_allocated(), *is_heap);

+ }

+ // String which has too many characters to even consider inlining: Chars::size_hint uses

+ // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately

+ // heap allocate

+ let raw = "a".repeat(23 * 4 + 1);

+ let s: SmolStr = raw.chars().collect();

+ assert_eq!(s.as_str(), raw);

+ assert!(s.is_heap_allocated());

+#[test]

+fn test_bad_size_hint_char_iter() {

+ struct BadSizeHint<I>(I);

+ impl<T, I: Iterator<Item = T>> Iterator for BadSizeHint<I> {

+ type Item = T;

+ fn next(&mut self) -> Option<Self::Item> {

+ self.0.next()

+ }

+ fn size_hint(&self) -> (usize, Option<usize>) {

+ (1024, None)

+ }

+ let data = "testing";

+ let collected: SmolStr = BadSizeHint(data.chars()).collect();

+ let new = SmolStr::new(data);

+ assert!(!collected.is_heap_allocated());

+ assert!(!new.is_heap_allocated());

+ assert_eq!(new, collected);

+#[test]

+fn test_to_smolstr() {

+ use smol_str::ToSmolStr;

+ for i in 0..26 {

+ let a = &"abcdefghijklmnopqrstuvwxyz"[i..];

+ assert_eq!(a, a.to_smolstr());

+ assert_eq!(a, smol_str::format_smolstr!("{}", a));

+ }

+#[test]

+fn test_builder_push_str() {

+ //empty

+ let builder = SmolStrBuilder::new();

+ assert_eq!("", builder.finish());

+ // inline push

+ let mut builder = SmolStrBuilder::new();

+ builder.push_str("a");

+ builder.push_str("b");

+ let s = builder.finish();

+ assert!(!s.is_heap_allocated());

+ assert_eq!("ab", s);

+ // inline max push

+ let mut builder = SmolStrBuilder::new();

+ builder.push_str(&"a".repeat(23));

+ let s = builder.finish();

+ assert!(!s.is_heap_allocated());

+ assert_eq!("a".repeat(23), s);

+ // heap push immediate

+ let mut builder = SmolStrBuilder::new();

+ builder.push_str(&"a".repeat(24));

+ let s = builder.finish();

+ assert!(s.is_heap_allocated());

+ assert_eq!("a".repeat(24), s);

+ // heap push succession

+ let mut builder = SmolStrBuilder::new();

+ builder.push_str(&"a".repeat(23));

+ let s = builder.finish();

+ assert!(s.is_heap_allocated());

+ assert_eq!("a".repeat(46), s);

+ // heap push on multibyte char

+ let mut builder = SmolStrBuilder::new();

+ builder.push_str("ohnonononononononono!");

+ builder.push('🤯');

+ let s = builder.finish();

+ assert!(s.is_heap_allocated());

+ assert_eq!("ohnonononononononono!🤯", s);

+#[test]

+fn test_builder_push() {

+ //empty

+ let builder = SmolStrBuilder::new();

+ assert_eq!("", builder.finish());

+ // inline push

+ let mut builder = SmolStrBuilder::new();

+ builder.push('a');

+ builder.push('b');

+ let s = builder.finish();

+ assert!(!s.is_heap_allocated());

+ assert_eq!("ab", s);

+ // inline max push

+ let mut builder = SmolStrBuilder::new();

+ for _ in 0..23 {

+ builder.push('a');

+ }

+ let s = builder.finish();

+ assert!(!s.is_heap_allocated());

+ assert_eq!("a".repeat(23), s);

+ // heap push

+ let mut builder = SmolStrBuilder::new();

+ for _ in 0..24 {

+ builder.push('a');

+ }

+ let s = builder.finish();

+ assert!(s.is_heap_allocated());

+ assert_eq!("a".repeat(24), s);

+#[cfg(test)]

+mod test_str_ext {

+ use smol_str::StrExt;

+ #[test]

+ fn large() {

+ let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr();

+ assert_eq!(

+ lowercase,

+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"

+ );

+ assert!(lowercase.is_heap_allocated());

+ }

+ #[test]

+ fn to_lowercase() {

+ let lowercase = "aßΔC".to_lowercase_smolstr();

+ assert_eq!(lowercase, "aßδc");

+ assert!(!lowercase.is_heap_allocated());

+ }

+ #[test]

+ fn to_uppercase() {

+ let uppercase = "aßΔC".to_uppercase_smolstr();

+ assert_eq!(uppercase, "ASSΔC");

+ assert!(!uppercase.is_heap_allocated());

+ }

+ #[test]

+ fn to_ascii_lowercase() {

+ let uppercase = "aßΔC".to_ascii_lowercase_smolstr();

+ assert_eq!(uppercase, "aßΔc");

+ assert!(!uppercase.is_heap_allocated());

+ }

+ #[test]

+ fn to_ascii_uppercase() {

+ let uppercase = "aßΔC".to_ascii_uppercase_smolstr();

+ assert_eq!(uppercase, "AßΔC");

+ assert!(!uppercase.is_heap_allocated());

+ }

+ #[test]

+ fn replace() {

+ let result = "foo_bar_baz".replace_smolstr("ba", "do");

+ assert_eq!(result, "foo_dor_doz");

+ assert!(!result.is_heap_allocated());

+ }

+ #[test]

+ fn replacen() {

+ let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1);

+ assert_eq!(result, "foo_dor_baz");

+ assert!(!result.is_heap_allocated());

+ }

+ #[test]

+ fn replacen_1_ascii() {

+ let result = "foo_bar_baz".replacen_smolstr("o", "u", 1);

+ assert_eq!(result, "fuo_bar_baz");

+ assert!(!result.is_heap_allocated());

+ }

+#[cfg(feature = "borsh")]

+mod borsh_tests {

+ use borsh::BorshDeserialize;

+ use smol_str::{SmolStr, ToSmolStr};

+ use std::io::Cursor;

+ #[test]

+ fn borsh_serialize_stack() {

+ let smolstr_on_stack = "aßΔCaßδc".to_smolstr();

+ let mut buffer = Vec::new();

+ borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap();

+ let mut cursor = Cursor::new(buffer);

+ let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();

+ assert_eq!(smolstr_on_stack, decoded);

+ }

+ #[test]

+ fn borsh_serialize_heap() {

+ let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr();

+ let mut buffer = Vec::new();

+ borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap();

+ let mut cursor = Cursor::new(buffer);

+ let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();

+ assert_eq!(smolstr_on_heap, decoded);

+ }

+ #[test]

+ fn borsh_non_utf8_stack() {

+ let invalid_utf8: Vec<u8> = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence

+ let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });

+ let mut buffer = Vec::new();

+ borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();

+ let mut cursor = Cursor::new(buffer);

+ let result = SmolStr::deserialize_reader(&mut cursor);

+ assert!(result.is_err());

+ }

+ #[test]

+ fn borsh_non_utf8_heap() {

+ let invalid_utf8: Vec<u8> = vec![

+ 0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32,

+ 0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F,

+ 0x0E, 0x80,

+ ];

+ let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });

+ let mut buffer = Vec::new();

+ borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();

+ let mut cursor = Cursor::new(buffer);

+ let result = SmolStr::deserialize_reader(&mut cursor);

+ assert!(result.is_err());

+ }

diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs
new file mode 100644
index 0000000000..e2d809e40f
--- /dev/null
+++ b/lib/smol_str/tests/tidy.rs

@@ -0,0 +1,47 @@

+#![cfg(not(miri))]

+use std::{

+ env,

+ path::{Path, PathBuf},

+ process::{Command, Stdio},

+};

+fn project_root() -> PathBuf {

+ PathBuf::from(

+ env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()),

+ )

+fn run(cmd: &str, dir: impl AsRef<Path>) -> Result<(), ()> {

+ let mut args: Vec<_> = cmd.split_whitespace().collect();

+ let bin = args.remove(0);

+ println!("> {}", cmd);

+ let output = Command::new(bin)

+ .args(args)

+ .current_dir(dir)

+ .stdin(Stdio::null())

+ .stdout(Stdio::piped())

+ .stderr(Stdio::inherit())

+ .output()

+ .map_err(drop)?;

+ if output.status.success() {

+ Ok(())

+ } else {

+ let stdout = String::from_utf8(output.stdout).map_err(drop)?;

+ print!("{}", stdout);

+ Err(())

+ }

+#[test]

+fn check_code_formatting() {

+ let dir = project_root();

+ if run("rustfmt +stable --version", &dir).is_err() {

+ panic!(

+ "failed to run rustfmt from toolchain 'stable'; \

+ please run `rustup component add rustfmt --toolchain stable` to install it.",

+ );

+ }

+ if run("cargo +stable fmt -- --check", &dir).is_err() {

+ panic!("code is not properly formatted; please format the code by running `cargo fmt`")

+ }