Skip to content

Commit

Permalink
fixing error incompatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Oct 31, 2023
1 parent 0fe7466 commit 6657ec7
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 20 deletions.
9 changes: 5 additions & 4 deletions fuzz/fuzz_targets/compare_to_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ fn remove_suffix(s: &str) -> &str {
fn errors_equal(jiter_error: &JiterError, serde_error: &SerdeError) -> bool {
let jiter_error_str = jiter_error.to_string();
let serde_error_str = serde_error.to_string();
if jiter_error_str.starts_with("invalid escape at") {
// strings like `"\"\\u\\"` give a EOF error for serde and invalid escape for jiter
true
} else if serde_error_str.starts_with("number out of range") {
// if jiter_error_str.starts_with("invalid escape at") {
// // strings like `"\"\\u\\"` give a EOF error for serde and invalid escape for jiter
// true
// } else if serde_error_str.starts_with("number out of range") {
if serde_error_str.starts_with("number out of range") {
// ignore this case as serde is stricter so fails on this before jiter does
true
} else if serde_error_str.starts_with("recursion limit exceeded") {
Expand Down
8 changes: 4 additions & 4 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pub enum JsonErrorType {
FloatKeyMustBeFinite,

/// Lone leading surrogate in hex escape.
LoneLeadingSurrogateInHexEscape,
LoneLeadingSurrogateInHexEscape(usize),

/// JSON has a comma after the last value in an array or map.
TrailingComma,
Expand All @@ -77,7 +77,7 @@ pub enum JsonErrorType {
TrailingCharacters,

/// Unexpected end of hex escape.
UnexpectedEndOfHexEscape,
UnexpectedEndOfHexEscape(usize),

/// Encountered nesting of JSON maps and arrays more than 128 layers deep.
RecursionLimitExceeded,
Expand Down Expand Up @@ -109,10 +109,10 @@ impl std::fmt::Display for JsonErrorType {
Self::KeyMustBeAString => f.write_str("key must be a string"),
Self::ExpectedNumericKey => f.write_str("invalid value: expected key to be a number in quotes"),
Self::FloatKeyMustBeFinite => f.write_str("float key must be finite (got NaN or +/-inf)"),
Self::LoneLeadingSurrogateInHexEscape => f.write_str("lone leading surrogate in hex escape"),
Self::LoneLeadingSurrogateInHexEscape(_) => f.write_str("lone leading surrogate in hex escape"),
Self::TrailingComma => f.write_str("trailing comma"),
Self::TrailingCharacters => f.write_str("trailing characters"),
Self::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
Self::UnexpectedEndOfHexEscape(_) => f.write_str("unexpected end of hex escape"),
Self::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
}
}
Expand Down
28 changes: 16 additions & 12 deletions src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,16 @@ fn to_str(bytes: &[u8], ascii_only: bool, start: usize) -> JsonResult<&str> {
}
}

/// Taken from https://github.com/serde-rs/json/blob/45f10ec816e3f2765ac08f7ca73752326b0475d7/src/read.rs#L873-L928
/// Taken approximately from https://github.com/serde-rs/json/blob/v1.0.107/src/read.rs#L872-L945
fn parse_escape(data: &[u8], index: usize, start: usize) -> JsonResult<(char, usize)> {
let (n, index) = parse_u4(data, index, start)?;
match n {
0xDC00..=0xDFFF => json_err!(InvalidEscape, index - start, start - 1),
0xD800..=0xDBFF => match (data.get(index + 1), data.get(index + 2)) {
(Some(b'\\'), Some(b'u')) => {
0xDC00..=0xDFFF => json_err!(LoneLeadingSurrogateInHexEscape, index - start, start - 1),
0xD800..=0xDBFF => match data.get(index + 1..index + 3) {
Some(slice) if slice == b"\\u" => {
let (n2, index) = parse_u4(data, index + 2, start)?;
if !(0xDC00..=0xDFFF).contains(&n2) {
return json_err!(InvalidEscape, index - start, start - 1);
return json_err!(LoneLeadingSurrogateInHexEscape, index - start, start - 1);
}
let n2 = (((n - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;

Expand All @@ -112,23 +112,27 @@ fn parse_escape(data: &[u8], index: usize, start: usize) -> JsonResult<(char, us
None => json_err!(EofWhileParsingString, index),
}
}
_ => json_err!(InvalidEscape, index - start, start - 1),
Some(_) => json_err!(UnexpectedEndOfHexEscape, index - start, start - 1),
None => match data.get(index + 1) {
Some(b'\\') | None => json_err!(EofWhileParsingString, data.len()),
Some(_) => json_err!(UnexpectedEndOfHexEscape, index - start, start - 1),
},
},
_ => match char::from_u32(n as u32) {
Some(c) => Ok((c, index)),
None => json_err!(EofWhileParsingString, index),
None => json_err!(InvalidEscape, index - start, index),
},
}
}

fn parse_u4(data: &[u8], mut index: usize, start: usize) -> JsonResult<(u16, usize)> {
let mut n = 0;
for _ in 0..4 {
let u4 = data
.get(index + 1..index + 5)
.ok_or_else(|| json_error!(EofWhileParsingString, data.len()))?;

for c in u4.iter() {
index += 1;
let c = match data.get(index) {
Some(c) => *c,
None => return json_err!(EofWhileParsingString, index),
};
let hex = match c {
b'0'..=b'9' => (c & 0x0f) as u16,
b'a'..=b'f' => (c - b'a' + 10) as u16,
Expand Down
32 changes: 32 additions & 0 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,38 @@ string_tests! {
controls_python: "\"\\b\\f\\n\\r\\t\"" => "\x08\x0c\n\r\t"; // python notation for the same thing
}

macro_rules! string_test_errors {
($($name:ident: $json:literal => $expected_error:literal;)*) => {
$(
paste::item! {
#[test]
fn [< string_parsing_errors_ $name >]() {
let data = $json.as_bytes();
let mut tape: Vec<u8> = Vec::new();
let mut parser = Parser::new(data);
let peak = parser.peak().unwrap();
assert_eq!(peak, Peak::String);
match parser.consume_string::<StringDecoder>(&mut tape) {
Ok(t) => panic!("unexpectedly valid: {:?} -> {:?}", $json, t),
Err(e) => {
let actual_error = format!("{:?} @ {}", e.error_type, e.index);
assert_eq!(actual_error, $expected_error);
}
}
}
}
)*
}
}

string_test_errors! {
u4_unclosed: r#""\uxx"# => "EofWhileParsingString @ 5";
u4_unclosed2: r#""\udBdd"# => "EofWhileParsingString @ 7";
line_leading_surrogate: r#""\uddBd""# => "LoneLeadingSurrogateInHexEscape(5) @ 0";
unexpected_hex_escape1: r#""\udBd8x"# => "UnexpectedEndOfHexEscape(5) @ 0";
unexpected_hex_escape2: r#""\udBd8xx"# => "UnexpectedEndOfHexEscape(5) @ 0";
}

#[test]
fn test_key_str() {
let json = r#"{"foo": "bar"}"#;
Expand Down

0 comments on commit 6657ec7

Please sign in to comment.