From 64b147fc3610ce1dd11cffe319b16effa7644891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Fri, 26 Jul 2024 14:43:34 -0700 Subject: [PATCH] Improve Breakpad file parsing performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our breakpad parser supports various CFI directives and other constructs, despite never actually using them. That is partly necessary: we need to recognize them in order to ignore them. However, we don't have to go all the way and parse them correctly. Rather, we can just skip over them more quickly. Adjust the code to do just that. As a result, we are a bit more lenient in what we accept (except for spurious empty lines that we now no longer accept...), but nobody should be using our parser to judge validity of a file, so being more accepting is fine. As a result, we speed up parsing of breakpad files that contain quite significantly: > main/symbolize_breakpad > time: [245.08 ms 245.62 ms 246.20 ms] > change: [−12.182% −11.608% −11.143%] (p = 0.00 < 0.02) > Performance has improved. Signed-off-by: Daniel Müller --- CHANGELOG.md | 1 + src/breakpad/parser.rs | 76 ++++-------------------------------------- 2 files changed, 8 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f27a325..4f9d0c61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Unreleased read failure - Added support for file based symbolization support on the Windows operating system +- Improved performance for parsing Breakpad files 0.2.0-rc.0 diff --git a/src/breakpad/parser.rs b/src/breakpad/parser.rs index 2707484b..4ee70f8d 100644 --- a/src/breakpad/parser.rs +++ b/src/breakpad/parser.rs @@ -37,11 +37,8 @@ use std::str; use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::take_while; -use nom::character::complete::hex_digit1; use nom::character::complete::multispace0; use nom::character::complete::space1; -use nom::character::is_digit; -use nom::character::is_hex_digit; use nom::combinator::cut; use nom::combinator::map; use nom::combinator::map_res; @@ -288,11 +285,6 @@ fn decimal_u32(input: &[u8]) -> IResult<&[u8], u32, VerboseError<&[u8]>> { Ok((remaining, res)) } -/// Take 0 or more non-space bytes. -fn non_space(input: &[u8]) -> IResult<&[u8], &[u8], VerboseError<&[u8]>> { - take_while(|c: u8| c != b' ')(input) -} - /// Accept `\n` with an arbitrary number of preceding `\r` bytes. /// /// This is different from `line_ending` which doesn't accept `\r` if it isn't @@ -309,37 +301,18 @@ fn not_my_eol(input: &[u8]) -> IResult<&[u8], &[u8], VerboseError<&[u8]>> { take_while(|b| b != b'\r' && b != b'\n')(input) } -/// Parse a single byte if it matches the predicate. -/// -/// nom has `satisfy`, which is similar. It differs in the argument type of the -/// predicate: `satisfy`'s predicate takes `char`, whereas `single`'s predicate -/// takes `u8`. -fn single(predicate: fn(u8) -> bool) -> impl Fn(&[u8]) -> IResult<&[u8], u8, VerboseError<&[u8]>> { - move |i: &[u8]| match i.split_first() { - Some((b, rest)) if predicate(*b) => Ok((rest, *b)), - _ => Err(Err::Error(VerboseError::from_error_kind( - i, - ErrorKind::Satisfy, - ))), - } -} - /// Matches a MODULE record. fn module_line(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { let (input, _) = terminated(tag("MODULE"), space1)(input)?; - let (input, _) = cut(tuple(( - terminated(non_space, space1), // os - terminated(non_space, space1), // cpu - terminated(hex_digit1, space1), // debug id - terminated(not_my_eol, my_eol), // filename - )))(input)?; + let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?; + Ok((input, ())) } /// Matches an INFO URL record. fn info_url(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { let (input, _) = terminated(tag("INFO URL"), space1)(input)?; - let (input, _url) = cut(terminated(map_res(not_my_eol, str::from_utf8), my_eol))(input)?; + let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?; Ok((input, ())) } @@ -468,34 +441,7 @@ fn inline_line(input: &[u8]) -> IResult<&[u8], impl Iterator, Ve /// Matches a STACK WIN record. fn stack_win_line(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { let (input, _) = terminated(tag("STACK WIN"), space1)(input)?; - let ( - input, - ( - _ty, - _address, - _code_size, - _prologue_size, - _epilogue_size, - _parameter_size, - _saved_register_size, - _local_size, - _max_stack_size, - _has_program_string, - _rest, - ), - ) = cut(tuple(( - terminated(single(is_hex_digit), space1), // ty - terminated(hex_str::, space1), // addr - terminated(hex_str::, space1), // code_size - terminated(hex_str::, space1), // prologue_size - terminated(hex_str::, space1), // epilogue_size - terminated(hex_str::, space1), // parameter_size - terminated(hex_str::, space1), // saved_register_size - terminated(hex_str::, space1), // local_size - terminated(hex_str::, space1), // max_stack_size - terminated(map(single(is_digit), |b| b == b'1'), space1), // has_program_string - terminated(map_res(not_my_eol, str::from_utf8), my_eol), - )))(input)?; + let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?; Ok((input, ())) } @@ -503,10 +449,7 @@ fn stack_win_line(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { /// Matches a STACK CFI record. fn stack_cfi(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { let (input, _) = terminated(tag("STACK CFI"), space1)(input)?; - let (input, (_address, _rules)) = cut(tuple(( - terminated(hex_str::, space1), - terminated(map_res(not_my_eol, str::from_utf8), my_eol), - )))(input)?; + let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?; Ok((input, ())) } @@ -514,11 +457,7 @@ fn stack_cfi(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { /// Matches a STACK CFI INIT record. fn stack_cfi_init(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> { let (input, _) = terminated(tag("STACK CFI INIT"), space1)(input)?; - let (input, (_address, _size, _rules)) = cut(tuple(( - terminated(hex_str::, space1), - terminated(hex_str::, space1), - terminated(map_res(not_my_eol, str::from_utf8), my_eol), - )))(input)?; + let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?; Ok((input, ())) } @@ -1166,7 +1105,6 @@ STACK CFI INIT badf00d abc init rules STACK CFI deadf00d some rules STACK CFI deadbeef more rules STACK CFI INIT f00f f0 more init rules - "[..]; let sym = SymbolFile::from_bytes(bytes).unwrap(); assert_eq!(sym.files.len(), 2); @@ -1253,7 +1191,7 @@ FUNC 1001 10 10 some func overlap contained FILE 0 foo.c "[..] ) - .is_err(),); + .is_ok(),); assert!(SymbolFile::from_bytes( &b"MODULE Linux x86 abcd1234 foo