Skip to content

Commit

Permalink
Add optimistic text parsing for 20% improvement
Browse files Browse the repository at this point in the history
In the same vein of data driven optimizations: #111, #112

This commit introduces a fast, happy path parsing routine that takes
advantage of how typical save files are laid out and how the text
deserializer essentially only calls `read` when it either needs a key or
a value.

Keys will have leading whitespace (newline followed by tabs), so we
consume up to 8 of them at once. For EU4 this covers 100% of keys
in about 95% of saves (ie: it is extremely rare for there to be more
than 8 whitespace characters in a row).

This happy path hoists the the common keys and values (`{`, `}`, `"`,
and alphanumeric+dash) so it's more obvious to the compiler and CPU what
we're looking for.

After all these years, I still can't derive a good function to identify
a boundary character within 8 bytes, but I think I found the next best
thing: loop unrolling. Speaking of boundary characters, I removed the
notion of character classes as they were unused.

The happy path for parsing quoted data will now process 8 bytes at once
and will ask for forgiveness if an escape character is encountered.

The 20% improvement comes from measuring eu4 save deserialization, so
the improvement to this individual function is much greater.
  • Loading branch information
nickbabcock committed Jan 8, 2024
1 parent ebe4999 commit 2223ea3
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 38 deletions.
41 changes: 21 additions & 20 deletions src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,34 @@ const fn create_windows_1252_table() -> [char; 256] {
}

pub(crate) static WINDOWS_1252: [char; 256] = create_windows_1252_table();
pub(crate) const BOUNDARY: u8 = 1;
pub(crate) const WHITESPACE: u8 = 2;
pub(crate) const OPERATOR: u8 = 4;
pub(crate) const COMMENT: u8 = 8;

#[inline]
pub(crate) fn is_boundary(b: u8) -> bool {
CHARACTER_CLASS[usize::from(b)] != 0
boundary(b) != 0
}

#[inline]
pub(crate) fn boundary(b: u8) -> u8 {
CHARACTER_CLASS[usize::from(b)]
}

const fn create_character_class_table() -> [u8; 256] {
let mut table = [0u8; 256];
table[b'\t' as usize] = WHITESPACE;
table[b'\n' as usize] = WHITESPACE;
table[b'\x0b' as usize] = WHITESPACE; // \v
table[b'\x0c' as usize] = WHITESPACE; // \f
table[b'\r' as usize] = WHITESPACE;
table[b' ' as usize] = WHITESPACE;
table[b'!' as usize] = OPERATOR;
table[b'#' as usize] = COMMENT;
table[b'<' as usize] = OPERATOR;
table[b'=' as usize] = OPERATOR;
table[b'>' as usize] = OPERATOR;
table[b'[' as usize] = BOUNDARY;
table[b']' as usize] = BOUNDARY;
table[b'}' as usize] = BOUNDARY;
table[b'{' as usize] = BOUNDARY;
table[b'\t' as usize] = 1;
table[b'\n' as usize] = 1;
table[b'\x0b' as usize] = 1; // \v
table[b'\x0c' as usize] = 1; // \f
table[b'\r' as usize] = 1;
table[b' ' as usize] = 1;
table[b'!' as usize] = 1;
table[b'#' as usize] = 1;
table[b'<' as usize] = 1;
table[b'=' as usize] = 1;
table[b'>' as usize] = 1;
table[b'[' as usize] = 1;
table[b']' as usize] = 1;
table[b'}' as usize] = 1;
table[b'{' as usize] = 1;
table
}

Expand Down
106 changes: 88 additions & 18 deletions src/text/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::Operator;
use crate::{
buffer::{BufferError, BufferWindow, BufferWindowBuilder},
data::is_boundary,
util::{contains_zero_byte, count_chunk, repeat_byte},
util::{contains_zero_byte, count_chunk, leading_whitespace, repeat_byte},
Scalar,
};
use std::io::Read;
Expand Down Expand Up @@ -134,8 +134,7 @@ where
self.buf.position()
}

#[inline]
unsafe fn next_opt(&mut self) -> (Option<Token>, Option<ReaderError>) {
unsafe fn next_opt_fallback(&mut self) -> (Option<Token>, Option<ReaderError>) {
#[derive(Debug)]
enum ParseState {
None,
Expand All @@ -155,21 +154,7 @@ where

'inner: loop {
match *ptr {
c @ b' ' | c @ b'\t' => {
ptr = ptr.add(1);
loop {
if ptr == end {
break 'eof (0, 0);
}

if *ptr != c {
break;
}

ptr = ptr.add(1)
}
}
b'\n' | b'\r' | b';' => {
b' ' | b'\t' | b'\n' | b'\r' | b';' => {
ptr = ptr.add(1);
break 'inner;
}
Expand Down Expand Up @@ -425,6 +410,91 @@ where
}
}

#[inline]
unsafe fn next_opt(&mut self) -> (Option<Token>, Option<ReaderError>) {
let mut ptr = self.buf.start;
let end = self.buf.end;

if end.offset_from(ptr) < 9 {
return self.next_opt_fallback();
}

// 3.4 million newlines followed by an average of 3.3 tabs
let data = ptr.cast::<u64>().read_unaligned().to_le();
ptr = ptr.add(leading_whitespace(data) as usize);

// Eagerly check for brackets, there'll be millions of them
if *ptr == b'{' {
self.buf.advance_to(ptr.add(1));
return (Some(Token::Open), None);
} else if *ptr == b'}' {
self.buf.advance_to(ptr.add(1));
return (Some(Token::Close), None);
}
// unquoted values are the most frequent type of values in
// text so if we see something that is alphanumeric or a
// dash (for negative numbers) we eagerly attempt to match
// against it. Loop unrolling is used to minimize the number
// of access to the boundary lookup table.
else if matches!(*ptr, b'a'..=b'z' | b'0'..=b'9' | b'A'..=b'Z' | b'-') {
let start_ptr = ptr;
let mut opt_ptr = start_ptr.add(1);
while end.offset_from(opt_ptr) > 8 {
for _ in 0..8 {
if is_boundary(*opt_ptr) {
self.buf.advance_to(opt_ptr);

// for space delimited arrays, advance one
if *opt_ptr == b' ' {
self.buf.advance(1);
}

let scalar = self.buf.get(start_ptr..opt_ptr);
return (Some(Token::Unquoted(scalar)), None);
}
opt_ptr = opt_ptr.add(1);
}
}

// optimization failed, fallback to inner parsing loop
} else if *ptr == b'\"' {
let start_ptr = ptr.add(1);
let mut opt_ptr = start_ptr;
let mut escaped = false;
while end.offset_from(opt_ptr) > 8 {
let data = opt_ptr.cast::<u64>().read_unaligned().to_le();
escaped |= contains_zero_byte(data ^ repeat_byte(b'\\'));

// http://0x80.pl/notesen/2023-03-06-swar-find-any.html#faster-swar-procedure
let mask = repeat_byte(0x7f);
let lobits = data & mask;
let x0 = (lobits ^ repeat_byte(b'\"')) + mask;
let t0 = x0 | data;
let t1 = t0 & repeat_byte(0x80);
let t2 = t1 ^ repeat_byte(0x80);

if t2 != 0 {
let quote_ind = t2.trailing_zeros() >> 3;

if !escaped {
opt_ptr = opt_ptr.add(quote_ind as usize);
self.buf.advance_to(opt_ptr.add(1));
let scalar = self.buf.get(start_ptr..opt_ptr);
return (Some(Token::Quoted(scalar)), None);
} else {
break;
}
} else {
opt_ptr = opt_ptr.add(8);
}
}

// optimization failed, fallback to inner parsing loop
}

self.next_opt_fallback()
}

/// Advance a given number of bytes and return them.
///
/// The internal buffer must be large enough to accomodate all bytes.
Expand Down
20 changes: 20 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,31 @@ pub(crate) const fn count_chunk(value: u64, byte: u8) -> u64 {
sum_usize(bytewise_equal(value, repeat_byte(byte)))
}

#[inline]
pub(crate) fn leading_whitespace(value: u64) -> u32 {
let mask1 = repeat_byte(b'\t');
let mask2 = repeat_byte(b'\n');
let res1 = value ^ mask1;
let res2 = value ^ mask2;
(res1 & res2).trailing_zeros() >> 3
}

#[cfg(test)]
mod tests {
use super::*;
use rstest::*;

#[rstest]
#[case(*b"\t\t\t\t\t\t\t\t", 8)]
#[case(*b"a\t\t\t\t\t\t\t", 0)]
#[case(*b"\t ", 1)]
#[case(*b"\n\na ", 2)]
#[case(*b"\n\ta ", 2)]
fn test_leading_whitespace(#[case] input: [u8; 8], #[case] expected: u32) {
let lhs = u64::from_le_bytes(input);
assert_eq!(leading_whitespace(lhs), expected);
}

#[rstest]
#[case(*b" ", 0)]
#[case(*b" { ", 1)]
Expand Down

0 comments on commit 2223ea3

Please sign in to comment.