Skip to content

Commit

Permalink
Make AsciiSet consts and fields values
Browse files Browse the repository at this point in the history
Refs take 8 bytes, whereas the values are only 16 bytes, so there is not
a huge benefit to using references rather than values. PercentEncoding
is changed to store the AsciiSet as a value, and the functions that
previously accepted a reference now accept a value. This is a breaking
change for users who were passing a reference to AsciiSet to the
functions in the public API.

The AsciiSet consts (CONTROLS, NON_ALPHANUMERIC, etc.) are also changed
to be values.

This is an alternative to the non-breaking change in
<#976>

Discussion about the rationale for this is change is at
<#970 (comment)>
  • Loading branch information
joshka committed Sep 27, 2024
1 parent 710e1e7 commit 2fc9dfb
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 19 deletions.
8 changes: 4 additions & 4 deletions percent_encoding/src/ascii_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use core::{mem, ops};
/// use percent_encoding::{AsciiSet, CONTROLS};
///
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
/// const FRAGMENT: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
/// ```
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AsciiSet {
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
}
Expand Down Expand Up @@ -104,7 +104,7 @@ impl ops::Not for AsciiSet {
/// Note that this includes the newline and tab characters, but not the space 0x20.
///
/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
pub const CONTROLS: &AsciiSet = &AsciiSet {
pub const CONTROLS: AsciiSet = AsciiSet {
mask: [
!0_u32, // C0: 0x00 to 0x1F (32 bits set)
0,
Expand Down Expand Up @@ -134,7 +134,7 @@ static_assert! {
/// Everything that is not an ASCII letter or digit.
///
/// This is probably more eager than necessary in any context.
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
pub const NON_ALPHANUMERIC: AsciiSet = CONTROLS
.add(b' ')
.add(b'!')
.add(b'"')
Expand Down
16 changes: 8 additions & 8 deletions percent_encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
//! use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
//!
//! /// https://url.spec.whatwg.org/#fragment-percent-encode-set
//! const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
//! const FRAGMENT: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
//!
//! assert_eq!(utf8_percent_encode("foo <bar>", FRAGMENT).to_string(), "foo%20%3Cbar%3E");
//! ```
Expand Down Expand Up @@ -114,7 +114,7 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
/// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
/// ```
#[inline]
pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
pub fn percent_encode(input: &[u8], ascii_set: AsciiSet) -> PercentEncode<'_> {
PercentEncode {
bytes: input,
ascii_set,
Expand All @@ -133,15 +133,15 @@ pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> Perc
/// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
/// ```
#[inline]
pub fn utf8_percent_encode<'a>(input: &'a str, ascii_set: &'static AsciiSet) -> PercentEncode<'a> {
pub fn utf8_percent_encode(input: &str, ascii_set: AsciiSet) -> PercentEncode<'_> {
percent_encode(input.as_bytes(), ascii_set)
}

/// The return type of [`percent_encode`] and [`utf8_percent_encode`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PercentEncode<'a> {
bytes: &'a [u8],
ascii_set: &'static AsciiSet,
ascii_set: AsciiSet,
}

impl<'a> Iterator for PercentEncode<'a> {
Expand Down Expand Up @@ -378,8 +378,8 @@ mod tests {
}

#[test]
fn percent_encode_accepts_ascii_set_ref() {
let encoded = percent_encode(b"foo bar?", &AsciiSet::EMPTY);
fn percent_encode_accepts_value() {
let encoded = percent_encode(b"foo bar?", AsciiSet::EMPTY);
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

Expand All @@ -405,8 +405,8 @@ mod tests {
}

#[test]
fn utf8_percent_encode_accepts_ascii_set_ref() {
let encoded = super::utf8_percent_encode("foo bar?", &AsciiSet::EMPTY);
fn utf8_percent_encode_accepts_value() {
let encoded = super::utf8_percent_encode("foo bar?", AsciiSet::EMPTY);
assert_eq!(encoded.collect::<String>(), "foo bar?");
}

Expand Down
14 changes: 7 additions & 7 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ use form_urlencoded::EncodingOverride;
use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};

/// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
const FRAGMENT: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');

/// https://url.spec.whatwg.org/#path-percent-encode-set
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
const PATH: AsciiSet = FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');

/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
pub(crate) const USERINFO: &AsciiSet = &PATH
pub(crate) const USERINFO: AsciiSet = PATH
.add(b'/')
.add(b':')
.add(b';')
Expand All @@ -34,15 +34,15 @@ pub(crate) const USERINFO: &AsciiSet = &PATH
.add(b'^')
.add(b'|');

pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
pub(crate) const PATH_SEGMENT: AsciiSet = PATH.add(b'/').add(b'%');

// The backslash (\) character is treated as a path separator in special URLs
// so it needs to be additionally escaped in that case.
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
pub(crate) const SPECIAL_PATH_SEGMENT: AsciiSet = PATH_SEGMENT.add(b'\\');

// https://url.spec.whatwg.org/#query-state
const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
const QUERY: AsciiSet = CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
const SPECIAL_QUERY: AsciiSet = QUERY.add(b'\'');

pub type ParseResult<T> = Result<T, ParseError>;

Expand Down

0 comments on commit 2fc9dfb

Please sign in to comment.