Skip to content

Commit

Permalink
Make AsciiSet consts and fields values
Browse files Browse the repository at this point in the history
Refs take 8 bytes, whereas the values are only 16 bytes, so there is not
a huge benefit to using references rather than values. PercentEncoding
is changed to store the AsciiSet as a value, and the functions that take
AsciiSet now take Into<AsciiSet> instead of &'static AsciiSet. This
allows existing code to continue to work without modification. The
AsciiSet consts (CONTROLS and NON_ALPHANUMERIC) are also changed to be
values, which is a breaking change, but will only affect code that
attempts to dereference them.

Discussion about the rationale for this is change is at
<servo#970 (comment)>
  • Loading branch information
joshka committed Sep 26, 2024
1 parent eb354bd commit 23bd642
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
12 changes: 9 additions & 3 deletions percent_encoding/src/ascii_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use core::{mem, ops};
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
/// ```
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AsciiSet {
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
}
Expand Down Expand Up @@ -83,6 +83,12 @@ impl AsciiSet {
}
}

impl From<&AsciiSet> for AsciiSet {
fn from(set: &AsciiSet) -> Self {
*set
}
}

impl ops::Add for AsciiSet {
type Output = Self;

Expand All @@ -104,7 +110,7 @@ impl ops::Not for AsciiSet {
/// Note that this includes the newline and tab characters, but not the space 0x20.
///
/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
pub const CONTROLS: &AsciiSet = &AsciiSet {
pub const CONTROLS: AsciiSet = AsciiSet {
mask: [
!0_u32, // C0: 0x00 to 0x1F (32 bits set)
0,
Expand Down Expand Up @@ -134,7 +140,7 @@ static_assert! {
/// Everything that is not an ASCII letter or digit.
///
/// This is probably more eager than necessary in any context.
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
pub const NON_ALPHANUMERIC: AsciiSet = CONTROLS
.add(b' ')
.add(b'!')
.add(b'"')
Expand Down
25 changes: 20 additions & 5 deletions percent_encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
//! use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
//!
//! /// https://url.spec.whatwg.org/#fragment-percent-encode-set
//! const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
//! const FRAGMENT: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
//!
//! assert_eq!(utf8_percent_encode("foo <bar>", FRAGMENT).to_string(), "foo%20%3Cbar%3E");
//! ```
Expand Down Expand Up @@ -114,10 +114,10 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
/// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
/// ```
#[inline]
pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
pub fn percent_encode<'a, T: Into<AsciiSet>>(input: &'a [u8], ascii_set: T) -> PercentEncode<'a> {
PercentEncode {
bytes: input,
ascii_set,
ascii_set: ascii_set.into(),
}
}

Expand All @@ -133,15 +133,18 @@ pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> Perc
/// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
/// ```
#[inline]
pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
pub fn utf8_percent_encode<'a, T: Into<AsciiSet>>(
input: &'a str,
ascii_set: T,
) -> PercentEncode<'a> {
percent_encode(input.as_bytes(), ascii_set)
}

/// The return type of [`percent_encode`] and [`utf8_percent_encode`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PercentEncode<'a> {
bytes: &'a [u8],
ascii_set: &'static AsciiSet,
ascii_set: AsciiSet,
}

impl<'a> Iterator for PercentEncode<'a> {
Expand Down Expand Up @@ -383,6 +386,12 @@ mod tests {
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

#[test]
fn percent_encode_accepts_value() {
let encoded = percent_encode(b"foo bar?", AsciiSet::EMPTY);
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

#[test]
fn percent_encode_collect() {
let encoded = percent_encode(b"foo bar?", NON_ALPHANUMERIC);
Expand Down Expand Up @@ -410,6 +419,12 @@ mod tests {
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

#[test]
fn utf8_percent_encode_accepts_value() {
let encoded = super::utf8_percent_encode("foo bar?", AsciiSet::EMPTY);
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

#[test]
fn utf8_percent_encode() {
assert_eq!(
Expand Down

0 comments on commit 23bd642

Please sign in to comment.