Skip to content

Commit

Permalink
Improve Breakpad file parsing performance
Browse files Browse the repository at this point in the history
Our breakpad parser supports various CFI directives and other
constructs, despite never actually using them. That is partly necessary:
we need to recognize them in order to ignore them. However, we don't
have to go all the way and parse them correctly. Rather, we can just
skip over them more quickly. Adjust the code to do just that. As a
result, we are a bit more lenient in what we accept (except for spurious
empty lines that we now no longer accept...), but nobody should be using
our parser to judge validity of a file, so being more accepting is fine.
As a result, we speed up parsing of breakpad files that contain quite
significantly:

  > main/symbolize_breakpad
  >   time:   [245.08 ms 245.62 ms 246.20 ms]
  >   change: [−12.182% −11.608% −11.143%] (p = 0.00 < 0.02)
  >   Performance has improved.

Signed-off-by: Daniel Müller <deso@posteo.net>
  • Loading branch information
d-e-s-o committed Jul 26, 2024
1 parent d2071ad commit 64b147f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Unreleased
read failure
- Added support for file based symbolization support on the Windows operating
system
- Improved performance for parsing Breakpad files


0.2.0-rc.0
Expand Down
76 changes: 7 additions & 69 deletions src/breakpad/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,8 @@ use std::str;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_while;
use nom::character::complete::hex_digit1;
use nom::character::complete::multispace0;
use nom::character::complete::space1;
use nom::character::is_digit;
use nom::character::is_hex_digit;
use nom::combinator::cut;
use nom::combinator::map;
use nom::combinator::map_res;
Expand Down Expand Up @@ -288,11 +285,6 @@ fn decimal_u32(input: &[u8]) -> IResult<&[u8], u32, VerboseError<&[u8]>> {
Ok((remaining, res))
}

/// Take 0 or more non-space bytes.
fn non_space(input: &[u8]) -> IResult<&[u8], &[u8], VerboseError<&[u8]>> {
take_while(|c: u8| c != b' ')(input)
}

/// Accept `\n` with an arbitrary number of preceding `\r` bytes.
///
/// This is different from `line_ending` which doesn't accept `\r` if it isn't
Expand All @@ -309,37 +301,18 @@ fn not_my_eol(input: &[u8]) -> IResult<&[u8], &[u8], VerboseError<&[u8]>> {
take_while(|b| b != b'\r' && b != b'\n')(input)
}

/// Parse a single byte if it matches the predicate.
///
/// nom has `satisfy`, which is similar. It differs in the argument type of the
/// predicate: `satisfy`'s predicate takes `char`, whereas `single`'s predicate
/// takes `u8`.
fn single(predicate: fn(u8) -> bool) -> impl Fn(&[u8]) -> IResult<&[u8], u8, VerboseError<&[u8]>> {
move |i: &[u8]| match i.split_first() {
Some((b, rest)) if predicate(*b) => Ok((rest, *b)),
_ => Err(Err::Error(VerboseError::from_error_kind(
i,
ErrorKind::Satisfy,
))),
}
}

/// Matches a MODULE record.
fn module_line(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> {
let (input, _) = terminated(tag("MODULE"), space1)(input)?;
let (input, _) = cut(tuple((
terminated(non_space, space1), // os
terminated(non_space, space1), // cpu
terminated(hex_digit1, space1), // debug id
terminated(not_my_eol, my_eol), // filename
)))(input)?;
let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?;

Ok((input, ()))
}

/// Matches an INFO URL record.
fn info_url(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> {
let (input, _) = terminated(tag("INFO URL"), space1)(input)?;
let (input, _url) = cut(terminated(map_res(not_my_eol, str::from_utf8), my_eol))(input)?;
let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?;
Ok((input, ()))
}

Expand Down Expand Up @@ -468,57 +441,23 @@ fn inline_line(input: &[u8]) -> IResult<&[u8], impl Iterator<Item = Inlinee>, Ve
/// Matches a STACK WIN record.
fn stack_win_line(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> {
let (input, _) = terminated(tag("STACK WIN"), space1)(input)?;
let (
input,
(
_ty,
_address,
_code_size,
_prologue_size,
_epilogue_size,
_parameter_size,
_saved_register_size,
_local_size,
_max_stack_size,
_has_program_string,
_rest,
),
) = cut(tuple((
terminated(single(is_hex_digit), space1), // ty
terminated(hex_str::<u64>, space1), // addr
terminated(hex_str::<u32>, space1), // code_size
terminated(hex_str::<u32>, space1), // prologue_size
terminated(hex_str::<u32>, space1), // epilogue_size
terminated(hex_str::<u32>, space1), // parameter_size
terminated(hex_str::<u32>, space1), // saved_register_size
terminated(hex_str::<u32>, space1), // local_size
terminated(hex_str::<u32>, space1), // max_stack_size
terminated(map(single(is_digit), |b| b == b'1'), space1), // has_program_string
terminated(map_res(not_my_eol, str::from_utf8), my_eol),
)))(input)?;
let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?;

Ok((input, ()))
}

/// Matches a STACK CFI record.
fn stack_cfi(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> {
let (input, _) = terminated(tag("STACK CFI"), space1)(input)?;
let (input, (_address, _rules)) = cut(tuple((
terminated(hex_str::<u64>, space1),
terminated(map_res(not_my_eol, str::from_utf8), my_eol),
)))(input)?;
let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?;

Ok((input, ()))
}

/// Matches a STACK CFI INIT record.
fn stack_cfi_init(input: &[u8]) -> IResult<&[u8], (), VerboseError<&[u8]>> {
let (input, _) = terminated(tag("STACK CFI INIT"), space1)(input)?;
let (input, (_address, _size, _rules)) = cut(tuple((
terminated(hex_str::<u64>, space1),
terminated(hex_str::<u32>, space1),
terminated(map_res(not_my_eol, str::from_utf8), my_eol),
)))(input)?;
let (input, _) = cut(terminated(not_my_eol, my_eol))(input)?;

Ok((input, ()))
}
Expand Down Expand Up @@ -1166,7 +1105,6 @@ STACK CFI INIT badf00d abc init rules
STACK CFI deadf00d some rules
STACK CFI deadbeef more rules
STACK CFI INIT f00f f0 more init rules
"[..];
let sym = SymbolFile::from_bytes(bytes).unwrap();
assert_eq!(sym.files.len(), 2);
Expand Down Expand Up @@ -1253,7 +1191,7 @@ FUNC 1001 10 10 some func overlap contained
FILE 0 foo.c
"[..]
)
.is_err(),);
.is_ok(),);

assert!(SymbolFile::from_bytes(
&b"MODULE Linux x86 abcd1234 foo
Expand Down

0 comments on commit 64b147f

Please sign in to comment.