Skip to content

Commit de97692

Browse files
authored
Merge pull request #27 from richkadel/more-fuzz-fixes
More fuzz fixes
2 parents 1779f54 + e86fecb commit de97692

9 files changed

+238
-19
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "json5format"
3-
version = "0.2.1"
3+
version = "0.2.2"
44
authors = [
55
"Rich Kadel <richkadel@google.com>",
66
"David Tamas-Parris <davidatp@google.com>",
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
]}
Binary file not shown.
Binary file not shown.

src/content.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,19 @@ impl ParsedDocument {
3030
/// If a filename is also provided, any parsing errors will include the filename with the line
3131
/// number and column where the error was encountered.
3232
pub fn from_str(buffer: &str, filename: Option<String>) -> Result<Self, Error> {
33+
Self::from_str_with_nesting_limit(buffer, filename, Parser::DEFAULT_NESTING_LIMIT)
34+
}
35+
36+
/// Like `from_str()` but also overrides the default nesting limit, used to
37+
/// catch deeply nested JSON5 documents before overflowing the program
38+
/// stack.
39+
pub fn from_str_with_nesting_limit(
40+
buffer: &str,
41+
filename: Option<String>,
42+
nesting_limit: usize,
43+
) -> Result<Self, Error> {
3344
let mut parser = Parser::new(&filename);
45+
parser.set_nesting_limit(nesting_limit);
3446
let content = parser.parse(&buffer)?;
3547

3648
Ok(Self { owned_buffer: None, filename, content })

src/parser.rs

Lines changed: 135 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,18 @@ pub(crate) struct Parser<'parser> {
227227
/// and so on.
228228
scope_stack: Vec<Rc<RefCell<Value>>>,
229229

230+
/// To avoid accidentally overflowing the program stack, limit the number of
231+
/// nested scopes and generate an error if it is exceeded.
232+
nesting_limit: usize,
233+
230234
/// Captures a colon token when expected.
231235
colon_capturer: Capturer,
232236
}
233237

234238
impl<'parser> Parser<'parser> {
239+
/// The default limit of nested scopes when parsing a JSON5 document.
240+
pub const DEFAULT_NESTING_LIMIT: usize = 1000;
241+
235242
pub fn new(filename: &'parser Option<String>) -> Self {
236243
let remaining = "";
237244
let current_line = &remaining;
@@ -244,11 +251,19 @@ impl<'parser> Parser<'parser> {
244251
column_number: 1,
245252
next_line_number: 1,
246253
next_column_number: 1,
247-
scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))],
254+
scope_stack: Vec::default(),
255+
nesting_limit: Self::DEFAULT_NESTING_LIMIT,
248256
colon_capturer: Capturer::new(&COLON),
249257
}
250258
}
251259

260+
/// To avoid accidentally overflowing the program stack, there is a mutable
261+
/// limit on the number of nested scopes allowed. If this limit is exceeded
262+
/// while parsing a document, a parser error is generated.
263+
pub fn set_nesting_limit(&mut self, new_limit: usize) {
264+
self.nesting_limit = new_limit;
265+
}
266+
252267
fn current_scope(&self) -> Rc<RefCell<Value>> {
253268
assert!(self.scope_stack.len() > 0);
254269
self.scope_stack.last().unwrap().clone()
@@ -311,6 +326,12 @@ impl<'parser> Parser<'parser> {
311326
self.with_container(|container| container.add_value(value_ref.clone(), self))?;
312327
if is_container {
313328
self.scope_stack.push(value_ref.clone());
329+
if self.scope_stack.len() > self.nesting_limit {
330+
return Err(self.error(format!(
331+
"The given JSON5 document exceeds the parser's nesting limit of {}",
332+
self.nesting_limit
333+
)));
334+
}
314335
}
315336
Ok(())
316337
}
@@ -483,7 +504,11 @@ impl<'parser> Parser<'parser> {
483504

484505
fn exit_scope(&mut self) -> Result<(), Error> {
485506
self.scope_stack.pop();
486-
Ok(())
507+
if self.scope_stack.is_empty() {
508+
Err(self.error("Closing brace without a matching opening brace"))
509+
} else {
510+
Ok(())
511+
}
487512
}
488513

489514
fn close_object(&mut self) -> Result<(), Error> {
@@ -515,7 +540,7 @@ impl<'parser> Parser<'parser> {
515540
indicator += &"~".repeat(if self.line_number == self.next_line_number {
516541
self.next_column_number - self.column_number - 1
517542
} else {
518-
self.current_line.len() - self.column_number
543+
0
519544
});
520545
}
521546
Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator))
@@ -563,8 +588,34 @@ impl<'parser> Parser<'parser> {
563588
}
564589
}
565590

591+
/// Parse the given document string as a JSON5 document containing Array
592+
/// elements (with implicit outer braces). Document locations (use in, for
593+
/// example, error messages), are 1-based and start at line 1, column 1.
566594
pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
595+
self.parse_from_location(buffer, 1, 1)
596+
}
597+
598+
/// Parse the given document string as a JSON5 document containing Array
599+
/// elements (with implicit outer braces), and use the given 1-based line
600+
/// and column numbers when referring to document locations.
601+
pub fn parse_from_location(
602+
&mut self,
603+
buffer: &'parser str,
604+
starting_line_number: usize,
605+
starting_column_number: usize,
606+
) -> Result<Array, Error> {
567607
self.remaining = buffer;
608+
self.current_line = &self.remaining;
609+
610+
assert!(starting_line_number > 0, "document line numbers are 1-based");
611+
self.next_line_number = starting_line_number;
612+
self.next_column_number = starting_column_number;
613+
614+
self.next_line = self.current_line;
615+
self.line_number = self.next_line_number - 1;
616+
self.column_number = self.next_column_number - 1;
617+
self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))];
618+
568619
let mut next_token = Capturer::new(&NEXT_TOKEN);
569620
let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
570621
let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
@@ -1680,4 +1731,85 @@ mod tests {
16801731
let object_value = Object::new(vec![]);
16811732
assert!(object_value.is_object());
16821733
}
1734+
1735+
#[test]
1736+
fn test_document_exceeds_nesting_limit() {
1737+
let mut parser = Parser::new(&None);
1738+
parser.set_nesting_limit(5);
1739+
let good_buffer = r##"{
1740+
list_of_lists_of_lists: [[[]]]
1741+
}"##;
1742+
parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit");
1743+
1744+
let bad_buffer = r##"{
1745+
list_of_lists_of_lists: [[[[]]]]
1746+
}"##;
1747+
let err = parser
1748+
.parse_from_location(&bad_buffer, 8, 15)
1749+
.expect_err("should exceed nesting limit");
1750+
match err {
1751+
Error::Parse(_, message) => {
1752+
assert_eq!(
1753+
message,
1754+
r##"The given JSON5 document exceeds the parser's nesting limit of 5:
1755+
list_of_lists_of_lists: [[[[]]]]
1756+
^"##
1757+
)
1758+
}
1759+
_ => panic!("expected a parser error"),
1760+
}
1761+
}
1762+
1763+
#[test]
1764+
fn test_parse_from_location_error_location() {
1765+
let filename = Some("mixed_content.md".to_string());
1766+
let mixed_document = r##"
1767+
Mixed Content Doc
1768+
=================
1769+
1770+
This is a document with embedded JSON5 content.
1771+
1772+
```json5
1773+
json5_value = {
1774+
// The next line should generate a parser error
1775+
999,
1776+
}
1777+
```
1778+
1779+
End of mixed content document.
1780+
"##;
1781+
let json5_slice =
1782+
&mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()];
1783+
let mut parser = Parser::new(&filename);
1784+
let err = parser
1785+
.parse_from_location(json5_slice, 8, 15)
1786+
.expect_err("check error message for location");
1787+
match err {
1788+
Error::Parse(Some(loc), message) => {
1789+
assert_eq!(loc.file, Some("mixed_content.md".to_owned()));
1790+
assert_eq!(loc.line, 10);
1791+
assert_eq!(loc.col, 5);
1792+
assert_eq!(
1793+
message,
1794+
r##"Object values require property names:
1795+
999,
1796+
^~~"##
1797+
)
1798+
}
1799+
_ => panic!("expected a parser error"),
1800+
}
1801+
}
1802+
1803+
#[test]
1804+
fn test_doc_with_nulls() {
1805+
let mut parser = Parser::new(&None);
1806+
let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
1807+
let err = parser.parse(&buffer).expect_err("should fail");
1808+
match err {
1809+
Error::Parse(_, message) => {
1810+
assert!(message.starts_with("Mismatched braces in the document:"));
1811+
}
1812+
_ => panic!("expected a parser error"),
1813+
}
1814+
}
16831815
}

tests/lib.rs

Lines changed: 88 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,45 @@ fn test_parse_error_block_comment_not_closed() {
16881688
.unwrap();
16891689
}
16901690

1691+
#[test]
1692+
fn test_parse_error_closing_brace_without_opening_brace() {
1693+
test_format(FormatTest {
1694+
input: r##"]"##,
1695+
error: Some(
1696+
r#"Parse error: 1:1: Closing brace without a matching opening brace:
1697+
]
1698+
^"#,
1699+
),
1700+
..Default::default()
1701+
})
1702+
.unwrap();
1703+
1704+
test_format(FormatTest {
1705+
input: r##"
1706+
1707+
]"##,
1708+
error: Some(
1709+
r#"Parse error: 3:3: Closing brace without a matching opening brace:
1710+
]
1711+
^"#,
1712+
),
1713+
..Default::default()
1714+
})
1715+
.unwrap();
1716+
1717+
test_format(FormatTest {
1718+
input: r##"
1719+
}"##,
1720+
error: Some(
1721+
r#"Parse error: 2:5: Invalid Object token found while parsing an Array of 0 items (mismatched braces?):
1722+
}
1723+
^"#,
1724+
),
1725+
..Default::default()
1726+
})
1727+
.unwrap();
1728+
}
1729+
16911730
#[test]
16921731
fn test_multibyte_unicode_chars() {
16931732
test_format(FormatTest {
@@ -1725,12 +1764,21 @@ fn test_multibyte_unicode_chars() {
17251764
.unwrap();
17261765
}
17271766

1728-
fn visit_dir<F>(dir: &Path, cb: F) -> io::Result<()>
1767+
#[test]
1768+
fn test_empty_document() {
1769+
test_format(FormatTest { options: None, input: "", expected: "", ..Default::default() })
1770+
.unwrap();
1771+
}
1772+
1773+
fn visit_dir<F>(dir: &Path, cb: &mut F) -> io::Result<()>
17291774
where
1730-
F: Fn(&DirEntry) -> Result<(), std::io::Error> + Copy,
1775+
F: FnMut(&DirEntry) -> Result<(), std::io::Error>,
17311776
{
17321777
if !dir.is_dir() {
1733-
Err(io::Error::new(io::ErrorKind::Other, "visit_dir called with an invalid path"))
1778+
Err(io::Error::new(
1779+
io::ErrorKind::Other,
1780+
format!("visit_dir called with an invalid path: {:?}", dir),
1781+
))
17341782
} else {
17351783
for entry in fs::read_dir(dir)? {
17361784
let entry = entry?;
@@ -1751,26 +1799,52 @@ where
17511799
///
17521800
/// To manually verify test samples, use:
17531801
/// cargo test test_parsing_samples_does_not_crash -- --nocapture
1802+
///
1803+
/// To print the full error message (including the line and pointer to the
1804+
/// column), use:
1805+
/// JSON5FORMAT_TEST_FULL_ERRORS=1 cargo test test_parsing_samples_does_not_crash -- --nocapture
1806+
/// To point to a different samples directory:
1807+
/// JSON5FORMAT_TEST_SAMPLES_DIR="/tmp/fuzz_corpus" cargo test test_parsing_samples_does_not_crash
17541808
#[test]
17551809
fn test_parsing_samples_does_not_crash() -> Result<(), std::io::Error> {
1756-
let mut pathbuf = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1757-
pathbuf.push("samples");
1758-
visit_dir(pathbuf.as_path(), |entry| {
1810+
let mut count = 0;
1811+
let pathbuf = if let Some(samples_dir) = option_env!("JSON5FORMAT_TEST_SAMPLES_DIR") {
1812+
PathBuf::from(samples_dir)
1813+
} else {
1814+
let mut manifest_samples = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1815+
manifest_samples.push("samples");
1816+
manifest_samples
1817+
};
1818+
visit_dir(pathbuf.as_path(), &mut |entry| {
1819+
count += 1;
17591820
let filename = entry.path().into_os_string().to_string_lossy().to_string();
17601821
let mut buffer = String::new();
1761-
fs::File::open(&entry.path())?.read_to_string(&mut buffer)?;
1822+
println!("{}. Parsing: {} ...", count, filename);
1823+
if let Err(err) = fs::File::open(&entry.path())?.read_to_string(&mut buffer) {
1824+
println!("Ignoring failure to read the file into a string: {:?}", err);
1825+
return Ok(());
1826+
}
17621827
let result = ParsedDocument::from_string(buffer, Some(filename.clone()));
17631828
match result {
17641829
Ok(_parsed_document) => {
1765-
println!("Successfully parsed: {}", filename);
1830+
println!(" ... Success");
17661831
Ok(())
17671832
}
1768-
Err(Error::Parse(_, message)) => {
1769-
println!(
1770-
"Caught input error: {}\n {}",
1771-
filename,
1772-
message.lines().next().unwrap()
1773-
);
1833+
Err(err @ Error::Parse(..)) => {
1834+
if option_env!("JSON5FORMAT_TEST_FULL_ERRORS") == Some("1") {
1835+
println!(" ... Handled input error:\n{}", err);
1836+
} else if let Error::Parse(some_loc, message) = err {
1837+
let loc_string = if let Some(loc) = some_loc {
1838+
format!(" at {}:{}", loc.line, loc.col)
1839+
} else {
1840+
"".to_owned()
1841+
};
1842+
let mut first_line = message.lines().next().unwrap();
1843+
// strip the colon off the end of the first line of a parser error message
1844+
first_line = &first_line[0..first_line.len() - 1];
1845+
println!(" ... Handled input error{}: {}", loc_string, first_line);
1846+
}
1847+
17741848
// It's OK if the input file is bad, as long as the parser fails
17751849
// gracefully.
17761850
Ok(())

0 commit comments

Comments
 (0)