-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement OsStr::slice_encoded_bytes() proof of concept
- Loading branch information
Showing
2 changed files
with
98 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#![allow(unsafe_code)] | ||
use std::ffi::OsStr; | ||
use std::ops::RangeBounds; | ||
|
||
pub(crate) trait OsStrSlice { | ||
/// Takes a substring based on a range that corresponds to the return value of | ||
/// [`OsStr::as_encoded_bytes`]. | ||
/// | ||
/// The range's start and end must lie on valid `OsStr` boundaries, meaning one of: | ||
/// - The start of the string | ||
/// - The end of the string | ||
/// - Immediately before a valid non-empty UTF-8 substring | ||
/// - Immediately after a valid non-empty UTF-8 substring | ||
/// | ||
/// This requirement holds even on platforms where the underlying encoding is more | ||
/// permissive. | ||
/// | ||
/// # Panics | ||
/// | ||
/// Panics if the range does not lie on valid `OsStr` boundaries. | ||
/// | ||
/// # Example | ||
/// | ||
/// ```ignore | ||
/// use std::ffi::OsStr; | ||
/// | ||
/// let os_str = OsStr::new("foo=bar"); | ||
/// let bytes = os_str.as_encoded_bytes(); | ||
/// if let Some(index) = bytes.iter().position(|b| *b == b'=') { | ||
/// let key = os_str.slice_encoded_bytes(..index); | ||
/// let value = os_str.slice_encoded_bytes(index + 1..); | ||
/// assert_eq!(key, "foo"); | ||
/// assert_eq!(value, "bar"); | ||
/// } | ||
/// ``` | ||
fn slice_encoded_bytes<R: RangeBounds<usize>>(&self, range: R) -> &Self; | ||
} | ||
|
||
impl OsStrSlice for OsStr { | ||
fn slice_encoded_bytes<R: RangeBounds<usize>>(&self, range: R) -> &Self { | ||
fn is_valid_boundary(bytes: &[u8], index: usize) -> bool { | ||
if index == 0 || index == bytes.len() { | ||
return true; | ||
} | ||
|
||
let (before, after) = bytes.split_at(index); | ||
|
||
// UTF-8 takes at most 4 bytes per codepoint, so we don't | ||
// need to check more than that. | ||
let after = after.get(..4).unwrap_or(after); | ||
match std::str::from_utf8(after) { | ||
Ok(_) => return true, | ||
Err(err) if err.valid_up_to() != 0 => return true, | ||
Err(_) => (), | ||
} | ||
|
||
for len in 1..=4.min(index) { | ||
let before = &before[index - len..]; | ||
if std::str::from_utf8(before).is_ok() { | ||
return true; | ||
} | ||
} | ||
|
||
false | ||
} | ||
|
||
let bytes = self.as_encoded_bytes(); | ||
let range = std::slice::range(range, ..bytes.len()); | ||
assert!(is_valid_boundary(bytes, range.start)); | ||
assert!(is_valid_boundary(bytes, range.end)); | ||
|
||
// SAFETY: bytes was obtained from an OsStr just now, and we validated | ||
// that we only slice immediately before or after a valid non-empty | ||
// UTF-8 substring. | ||
unsafe { Self::from_encoded_bytes_unchecked(&bytes[range]) } | ||
} | ||
} |