From 215feb9313bb91666da71e1ae73baaefb8f438ec Mon Sep 17 00:00:00 2001 From: Josh McKinney Date: Thu, 26 Sep 2024 13:54:28 -0700 Subject: [PATCH] Make AsciiSet consts and fields values Refs take 8 bytes, whereas the values are only 16 bytes, so there is not a huge benefit to using references rather than values. PercentEncoding is changed to store the AsciiSet as a value, and the functions that take AsciiSet now take Into instead of &'static AsciiSet. This allows existing code to continue to work without modification. The AsciiSet consts (CONTROLS and NON_ALPHANUMERIC) are also changed to be values, which is a breaking change, but will only affect code that attempts to dereference them. Discussion about the rationale for this is change is at --- percent_encoding/src/ascii_set.rs | 12 +++++++++--- percent_encoding/src/lib.rs | 25 ++++++++++++++++++++----- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/percent_encoding/src/ascii_set.rs b/percent_encoding/src/ascii_set.rs index 41cd235e..ad70a210 100644 --- a/percent_encoding/src/ascii_set.rs +++ b/percent_encoding/src/ascii_set.rs @@ -24,7 +24,7 @@ use core::{mem, ops}; /// /// https://url.spec.whatwg.org/#fragment-percent-encode-set /// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); /// ``` -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct AsciiSet { mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK], } @@ -83,6 +83,12 @@ impl AsciiSet { } } +impl From<&AsciiSet> for AsciiSet { + fn from(set: &AsciiSet) -> Self { + *set + } +} + impl ops::Add for AsciiSet { type Output = Self; @@ -104,7 +110,7 @@ impl ops::Not for AsciiSet { /// Note that this includes the newline and tab characters, but not the space 0x20. /// /// -pub const CONTROLS: &AsciiSet = &AsciiSet { +pub const CONTROLS: AsciiSet = AsciiSet { mask: [ !0_u32, // C0: 0x00 to 0x1F (32 bits set) 0, @@ -134,7 +140,7 @@ static_assert! { /// Everything that is not an ASCII letter or digit. /// /// This is probably more eager than necessary in any context. -pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS +pub const NON_ALPHANUMERIC: AsciiSet = CONTROLS .add(b' ') .add(b'!') .add(b'"') diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs index 6ea05237..b2bf033e 100644 --- a/percent_encoding/src/lib.rs +++ b/percent_encoding/src/lib.rs @@ -32,7 +32,7 @@ //! use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; //! //! /// https://url.spec.whatwg.org/#fragment-percent-encode-set -//! const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); +//! const FRAGMENT: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); //! //! assert_eq!(utf8_percent_encode("foo ", FRAGMENT).to_string(), "foo%20%3Cbar%3E"); //! ``` @@ -114,10 +114,10 @@ pub fn percent_encode_byte(byte: u8) -> &'static str { /// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F"); /// ``` #[inline] -pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> PercentEncode<'a> { +pub fn percent_encode<'a, T: Into>(input: &'a [u8], ascii_set: T) -> PercentEncode<'a> { PercentEncode { bytes: input, - ascii_set, + ascii_set: ascii_set.into(), } } @@ -133,7 +133,10 @@ pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> Perc /// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F"); /// ``` #[inline] -pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> PercentEncode<'a> { +pub fn utf8_percent_encode<'a, T: Into>( + input: &'a str, + ascii_set: T, +) -> PercentEncode<'a> { percent_encode(input.as_bytes(), ascii_set) } @@ -141,7 +144,7 @@ pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> #[derive(Debug, Clone, PartialEq, Eq)] pub struct PercentEncode<'a> { bytes: &'a [u8], - ascii_set: &'static AsciiSet, + ascii_set: AsciiSet, } impl<'a> Iterator for PercentEncode<'a> { @@ -383,6 +386,12 @@ mod tests { assert_eq!(encoded.collect::(), "foo bar?"); } + #[test] + fn percent_encode_accepts_value() { + let encoded = percent_encode(b"foo bar?", AsciiSet::EMPTY); + assert_eq!(encoded.collect::(), "foo bar?"); + } + #[test] fn percent_encode_collect() { let encoded = percent_encode(b"foo bar?", NON_ALPHANUMERIC); @@ -410,6 +419,12 @@ mod tests { assert_eq!(encoded.collect::(), "foo bar?"); } + #[test] + fn utf8_percent_encode_accepts_value() { + let encoded = super::utf8_percent_encode("foo bar?", AsciiSet::EMPTY); + assert_eq!(encoded.collect::(), "foo bar?"); + } + #[test] fn utf8_percent_encode() { assert_eq!(