Skip to content

Commit 686d7be

Browse files
committed
Use string interning for PkgNameBuf, etc.
Use the lasso crate to intern package name strings, so that any instances of the same name share the same backing memory. These are also very cheap to clone because it only clones the "Spur" (a u32). The tradeoff is that there is some contention from using `ThreadedRodeo` since we need to support accessing the backing store from multiple threads. Signed-off-by: J Robert Ray <jrray@jrray.org>
1 parent 0063dfc commit 686d7be

File tree

8 files changed

+120
-35
lines changed

8 files changed

+120
-35
lines changed

Cargo.lock

Lines changed: 28 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ expanduser = "1.2"
4040
futures = "0.3.24"
4141
fuser = "0.12"
4242
indicatif = "0.16.2"
43+
lasso = { version = "0.7", features = ["multi-threaded"] }
4344
lazy_static = "1.4"
4445
libc = "0.2.80"
4546
nix = "0.26.2"

crates/parsedbuf/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,6 @@ name = "parsedbuf"
55
version = "0.1.0"
66

77
[dependencies]
8+
once_cell = { workspace = true }
89
paste = "1.0"
10+
lasso = { workspace = true }

crates/parsedbuf/src/lib.rs

Lines changed: 70 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@
55
#![deny(unsafe_op_in_unsafe_fn)]
66
#![warn(clippy::fn_params_excessive_bools)]
77

8+
use std::sync::Arc;
9+
810
pub use paste;
911

12+
/// Global string interner for common strings.
13+
pub static RODEO: once_cell::sync::Lazy<Arc<lasso::ThreadedRodeo>> =
14+
once_cell::sync::Lazy::new(|| Arc::new(lasso::ThreadedRodeo::default()));
15+
1016
/// Generate a pair of types to represent a parsed string type.
1117
///
1218
/// A `$type_name::validate()` method must be manually implemented which
@@ -42,9 +48,34 @@ macro_rules! parsed {
4248
}
4349

4450
$crate::paste::paste! {
45-
#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
51+
#[derive(Debug, Clone, Eq, PartialEq)]
4652
#[doc = "An owned, mutable, and validated " $what " string"]
47-
pub struct $owned_type_name(String);
53+
pub struct $owned_type_name(lasso::Spur);
54+
}
55+
56+
impl std::hash::Hash for $owned_type_name {
57+
#[inline]
58+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
59+
// Hash the interned string, not the Spur, for consistency
60+
// with hash() on the borrowed type.
61+
$crate::RODEO.resolve(&self.0).hash(state)
62+
}
63+
}
64+
65+
impl Ord for $owned_type_name {
66+
#[inline]
67+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
68+
// Order on the interned string, not the Spur, for consistency
69+
// with Ord on the borrowed type (as required by BTreeMap).
70+
self.as_str().cmp(other.as_str())
71+
}
72+
}
73+
74+
impl PartialOrd for $owned_type_name {
75+
#[inline]
76+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
77+
Some(self.cmp(other))
78+
}
4879
}
4980

5081
#[cfg(feature = "parsedbuf-serde")]
@@ -77,6 +108,7 @@ macro_rules! parsed {
77108
})
78109
}
79110

111+
#[inline]
80112
pub fn as_str(&self) -> &str {
81113
&self.0
82114
}
@@ -93,10 +125,12 @@ macro_rules! parsed {
93125
}
94126
}
95127

128+
#[inline]
96129
pub fn is_empty(&self) -> bool {
97130
self.0.is_empty()
98131
}
99132

133+
#[inline]
100134
pub fn len(&self) -> usize {
101135
self.0.len()
102136
}
@@ -112,127 +146,139 @@ macro_rules! parsed {
112146
#[doc = ""]
113147
#[doc = "No validation is performed on `name`."]
114148
pub unsafe fn from_string(name: String) -> Self {
115-
Self(name)
116-
}
117-
}
118-
119-
$crate::paste::paste! {
120-
#[doc = "Consume the [`" $owned_type_name "`], returning the inner [`String`]."]
121-
pub fn into_inner(self) -> String {
122-
self.0
149+
let key = $crate::RODEO.try_get_or_intern(name).expect("won't run out of intern slots");
150+
Self(key)
123151
}
124152
}
125153
}
126154

127155
impl std::borrow::Borrow<$type_name> for $owned_type_name {
156+
#[inline]
128157
fn borrow(&self) -> &$type_name {
129158
self.as_ref()
130159
}
131160
}
132161

133-
impl std::borrow::Borrow<String> for $owned_type_name {
134-
fn borrow(&self) -> &String {
135-
&self.0
162+
impl std::borrow::Borrow<str> for $owned_type_name {
163+
#[inline]
164+
fn borrow(&self) -> &str {
165+
$crate::RODEO.resolve(&self.0)
136166
}
137167
}
138168

139169
impl std::borrow::ToOwned for $type_name {
140170
type Owned = $owned_type_name;
141171

142172
fn to_owned(&self) -> Self::Owned {
143-
$owned_type_name(self.0.to_owned())
173+
let key = $crate::RODEO.try_get_or_intern(&self.0).expect("won't run out of intern slots");
174+
$owned_type_name(key)
144175
}
145176
}
146177

147178
impl std::cmp::PartialEq<$type_name> for $owned_type_name {
179+
#[inline]
148180
fn eq(&self, other: &$type_name) -> bool {
149-
&**self == other
181+
self.as_str() == &other.0
150182
}
151183
}
152184

153185
impl std::cmp::PartialEq<$owned_type_name> for $type_name {
186+
#[inline]
154187
fn eq(&self, other: &$owned_type_name) -> bool {
155188
&self.0 == other.as_str()
156189
}
157190
}
158191

159192
impl std::cmp::PartialEq<$owned_type_name> for &$type_name {
193+
#[inline]
160194
fn eq(&self, other: &$owned_type_name) -> bool {
161195
&self.0 == other.as_str()
162196
}
163197
}
164198

165199
impl std::cmp::PartialEq<str> for $type_name {
200+
#[inline]
166201
fn eq(&self, other: &str) -> bool {
167-
self.as_str() == other
202+
&self.0 == other
168203
}
169204
}
170205

171206
impl std::cmp::PartialEq<str> for $owned_type_name {
207+
#[inline]
172208
fn eq(&self, other: &str) -> bool {
173-
&**self == other
209+
self.as_str() == other
174210
}
175211
}
176212

177213
impl std::convert::AsRef<$type_name> for $type_name {
214+
#[inline]
178215
fn as_ref(&self) -> &$type_name {
179216
self
180217
}
181218
}
182219

183220
impl std::convert::AsRef<$type_name> for $owned_type_name {
221+
#[inline]
184222
fn as_ref(&self) -> &$type_name {
185223
// Safety: from_str bypasses validation but the contents
186224
// of owned instance must already be valid
187-
unsafe { $type_name::from_str(&self.0) }
225+
unsafe { $type_name::from_str($crate::RODEO.resolve(&self.0)) }
188226
}
189227
}
190228

191229
impl std::convert::AsRef<std::ffi::OsStr> for $type_name {
230+
#[inline]
192231
fn as_ref(&self) -> &std::ffi::OsStr {
193232
std::ffi::OsStr::new(&self.0)
194233
}
195234
}
196235

197236
impl std::convert::AsRef<std::path::Path> for $type_name {
237+
#[inline]
198238
fn as_ref(&self) -> &std::path::Path {
199239
std::path::Path::new(&self.0)
200240
}
201241
}
202242

203243
impl std::convert::AsRef<std::path::Path> for $owned_type_name {
244+
#[inline]
204245
fn as_ref(&self) -> &std::path::Path {
205-
std::path::Path::new(&self.0)
246+
std::path::Path::new($crate::RODEO.resolve(&self.0))
206247
}
207248
}
208249

209250
impl std::convert::AsRef<str> for $owned_type_name {
251+
#[inline]
210252
fn as_ref(&self) -> &str {
211-
&self.0
253+
$crate::RODEO.resolve(&self.0)
212254
}
213255
}
214256

215257
impl std::cmp::PartialEq<&str> for $type_name {
258+
#[inline]
216259
fn eq(&self, other: &&str) -> bool {
217-
self.as_str() == *other
260+
&self.0 == *other
218261
}
219262
}
220263

221264
impl std::cmp::PartialEq<&str> for $owned_type_name {
265+
#[inline]
222266
fn eq(&self, other: &&str) -> bool {
223-
&**self == other
267+
self.as_str() == *other
224268
}
225269
}
226270

227271
impl std::convert::From<&$type_name> for $owned_type_name {
272+
#[inline]
228273
fn from(name: &$type_name) -> Self {
229274
name.to_owned()
230275
}
231276
}
232277

233278
impl std::convert::From<$owned_type_name> for String {
279+
#[inline]
234280
fn from(val: $owned_type_name) -> Self {
235-
val.0
281+
$crate::RODEO.resolve(&val.0).to_string()
236282
}
237283
}
238284

@@ -276,7 +322,7 @@ macro_rules! parsed {
276322

277323
impl std::fmt::Display for $owned_type_name {
278324
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
279-
self.0.fmt(f)
325+
$crate::RODEO.resolve(&self.0).fmt(f)
280326
}
281327
}
282328

crates/spk-schema/crates/foundation/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ enum_dispatch = "0.3.8"
1717
format_serde_error = {version = "0.3", default_features = false, features = ["serde_yaml", "colored"]}
1818
ignore = "0.4.18"
1919
itertools = "0.10"
20+
lasso = { workspace = true }
2021
nom = { workspace = true }
2122
nom-supreme = { workspace = true }
2223
once_cell = { workspace = true }

crates/spk-schema/crates/foundation/src/name/error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@ pub type Result<T> = std::result::Result<T, Error>;
1010
pub enum Error {
1111
#[error(transparent)]
1212
InvalidNameError(#[from] super::InvalidNameError),
13+
#[error(transparent)]
14+
LassoError(#[from] lasso::LassoError),
1315
}

crates/spk-schema/crates/foundation/src/name/mod.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,17 +208,23 @@ impl OptName {
208208

209209
/// Return a copy of this option, adding the provided namespace if there isn't already one set
210210
pub fn with_default_namespace<N: AsRef<PkgName>>(&self, ns: N) -> OptNameBuf {
211-
OptNameBuf(format!(
212-
"{}{}{}",
213-
self.namespace().unwrap_or_else(|| ns.as_ref()),
214-
Self::SEP,
215-
self.base_name()
216-
))
211+
// Safety: the individual components are already validated.
212+
unsafe {
213+
OptNameBuf::from_string(format!(
214+
"{}{}{}",
215+
self.namespace().unwrap_or_else(|| ns.as_ref()),
216+
Self::SEP,
217+
self.base_name()
218+
))
219+
}
217220
}
218221

219222
/// Return a copy of this option, replacing any namespace with the provided one
220223
pub fn with_namespace<N: AsRef<PkgName>>(&self, ns: N) -> OptNameBuf {
221-
OptNameBuf(format!("{}{}{}", ns.as_ref(), Self::SEP, self.base_name()))
224+
// Safety: the individual components are already validated.
225+
unsafe {
226+
OptNameBuf::from_string(format!("{}{}{}", ns.as_ref(), Self::SEP, self.base_name()))
227+
}
222228
}
223229

224230
/// Return an option with the same name but no associated namespace
@@ -336,6 +342,6 @@ impl RepositoryName {
336342
impl RepositoryNameBuf {
337343
/// Return if this RepositoryName names the "local" repository
338344
pub fn is_local(&self) -> bool {
339-
self.0 == "local"
345+
self.as_str() == "local"
340346
}
341347
}

0 commit comments

Comments
 (0)