Skip to content

Commit

Permalink
transmuting bytes to str when safe (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored Sep 20, 2023
1 parent 256e0b7 commit 95c7ac9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
2 changes: 1 addition & 1 deletion fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 20 additions & 9 deletions src/string_decoder.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::marker::PhantomData;
use std::ops::Range;

use crate::errors::{json_err, JsonResult};
use crate::errors::{json_err, json_error, JsonResult};

pub type Tape = Vec<u8>;

Expand Down Expand Up @@ -29,24 +29,23 @@ impl<'t> AbstractStringDecoder<'t> for StringDecoder<'t> {
let start = index;
let mut last_escape = start;
let mut found_escape = false;
let mut ascii_only = true;

while let Some(next) = data.get(index) {
match next {
b'"' => {
// in theory we could use `std::str::from_utf8_unchecked` here,
// it leads to big performance gains but some cases e.g. good_high_order_string
// passing when they error here, serde uses `std::str::from_utf8`, python's `json.loads`
// allows these higher order strings
let result = if found_escape {
let s = if found_escape {
tape.extend_from_slice(&data[last_escape..index]);
std::str::from_utf8(tape)
to_str(tape, ascii_only, start)?
} else {
std::str::from_utf8(&data[start..index])
to_str(&data[start..index], ascii_only, start)?
};
index += 1;
return match result {
Ok(s) => Ok((s, index)),
Err(err) => json_err!(InvalidString, err.valid_up_to(), start - 1),
};
return Ok((s, index));
}
b'\\' => {
found_escape = true;
Expand Down Expand Up @@ -74,7 +73,9 @@ impl<'t> AbstractStringDecoder<'t> for StringDecoder<'t> {
}
// all values below 32 are invalid
next if *next < 32u8 => return json_err!(InvalidString, index - start, start - 1),
// do nothing, we ex
next if *next >= 128u8 && ascii_only => {
ascii_only = false;
}
_ => (),
}
index += 1;
Expand All @@ -83,6 +84,16 @@ impl<'t> AbstractStringDecoder<'t> for StringDecoder<'t> {
}
}

fn to_str(bytes: &[u8], ascii_only: bool, start: usize) -> JsonResult<&str> {
if ascii_only {
// safety: in this case we've already confirmed that all characters are ascii, we can safely
// transmute from bytes to str
Ok(unsafe { std::str::from_utf8_unchecked(bytes) })
} else {
std::str::from_utf8(bytes).map_err(|e| json_error!(InvalidString, e.valid_up_to(), start - 1))
}
}

/// Taken from https://github.com/serde-rs/json/blob/45f10ec816e3f2765ac08f7ca73752326b0475d7/src/read.rs#L873-L928
fn parse_escape(data: &[u8], index: usize, start: usize) -> JsonResult<(char, usize)> {
let (n, index) = parse_u4(data, index, start)?;
Expand Down

0 comments on commit 95c7ac9

Please sign in to comment.