Skip to content

Commit

Permalink
optimize float_parse_bytes (#82)
Browse files Browse the repository at this point in the history
* optimize `float_parse_bytes`

* fix chrono deprecation warnings

* lint

* also optimize `int_parse_bytes`

* simplify
  • Loading branch information
davidhewitt authored Feb 11, 2025
1 parent 8b41c77 commit dc08750
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 72 deletions.
2 changes: 1 addition & 1 deletion benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ fn compare_timestamp_ok_chrono(bench: &mut Bencher) {
use chrono::{Datelike, Timelike};
let ts = black_box(1654617803);
bench.iter(|| {
let dt = chrono::NaiveDateTime::from_timestamp_opt(ts, 0).unwrap();
let dt = chrono::DateTime::from_timestamp(ts, 0).unwrap();
black_box((
dt.year(),
dt.month(),
Expand Down
132 changes: 68 additions & 64 deletions src/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,7 @@ pub fn int_parse_str(s: &str) -> Option<i64> {

/// Parse bytes as an int.
pub fn int_parse_bytes(s: &[u8]) -> Option<i64> {
let (neg, first_digit, digits) = match s {
[b'-', first, digits @ ..] => (true, first, digits),
[b'+', first, digits @ ..] | [first, digits @ ..] => (false, first, digits),
_ => return None,
};

let mut result = match first_digit {
b'0' => 0,
b'1'..=b'9' => (first_digit & 0x0f) as i64,
_ => return None,
};

for digit in digits {
result = result.checked_mul(10)?;
match digit {
b'0' => {}
b'1'..=b'9' => result = result.checked_add((digit & 0x0f) as i64)?,
_ => return None,
}
}
if neg {
Some(-result)
} else {
Some(result)
}
int_parse_bytes_internal(s).ok()
}

#[derive(Debug)]
Expand All @@ -58,53 +34,81 @@ pub fn float_parse_str(s: &str) -> IntFloat {

/// Parse bytes as an float.
pub fn float_parse_bytes(s: &[u8]) -> IntFloat {
// optimistically try to parse as an integer
match int_parse_bytes_internal(s) {
Ok(int) => IntFloat::Int(int),
// integer parsing failed on encountering a '.', try as a float
Err(Some(b'.')) => {
static OPTIONS: ParseFloatOptions = ParseFloatOptions::new();
match f64::from_lexical_with_options::<{ lexical_format::STANDARD }>(s, &OPTIONS) {
Ok(v) => IntFloat::Float(v),
Err(_) => IntFloat::Err,
}
}
// any other integer parse error is also a float error
Err(_) => IntFloat::Err,
}
}

const ERR: u8 = 0xff;

/// Optimized routine to either parse an integer or return the character which triggered the error.
fn int_parse_bytes_internal(s: &[u8]) -> Result<i64, Option<u8>> {
let (neg, first_digit, digits) = match s {
[b'-', first, digits @ ..] => (true, first, digits),
[b'+', first, digits @ ..] | [first, digits @ ..] => (false, first, digits),
_ => return IntFloat::Err,
[b'-', first, digits @ ..] => (true, *first, digits),
[b'+', first, digits @ ..] | [first, digits @ ..] => (false, *first, digits),
[] => return Err(None),
};

let mut int_part = match first_digit {
b'0' => 0,
b'1'..=b'9' => (first_digit & 0x0f) as i64,
_ => return IntFloat::Err,
};
let mut int_part = decoded_i64_value(first_digit)?;

let mut found_dot = false;

let mut bytes = digits.iter().copied();

for digit in bytes.by_ref() {
match digit {
b'0'..=b'9' => {
int_part = match int_part.checked_mul(10) {
Some(i) => i,
None => return IntFloat::Err,
};
int_part = match int_part.checked_add((digit & 0x0f) as i64) {
Some(i) => i,
None => return IntFloat::Err,
};
}
b'.' => {
found_dot = true;
break;
}
_ => return IntFloat::Err,
for &digit in digits {
int_part = int_part.wrapping_mul(10);
int_part = int_part.wrapping_add(decoded_i64_value(digit)?);

// only check once for overflow per loop iteration to minimize branching
if int_part < 0 {
return Err(Some(digit));
}
}

if found_dot {
let options = ParseFloatOptions::new();
match f64::from_lexical_with_options::<{ lexical_format::STANDARD }>(s, &options) {
Ok(v) => IntFloat::Float(v),
Err(_) => IntFloat::Err,
}
} else if neg {
IntFloat::Int(-int_part)
} else {
IntFloat::Int(int_part)
Ok(if neg { -int_part } else { int_part })
}

/// Helper to parse a single ascii digit as an i64.
fn decoded_i64_value(digit: u8) -> Result<i64, u8> {
#[rustfmt::skip]
static DECODE_MAP: [u8; 256] = {
const __: u8 = ERR;
[
// 1 2 3 4 5 6 7 8 9 A B C D E F
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, __, __, __, __, __, __, // 3
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
]
};

let value = DECODE_MAP[digit as usize];
debug_assert!(value <= 9 || value == ERR);

if value == ERR {
return Err(digit);
}

Ok(value as i64)
}

/// Count the number of decimal places in a byte slice.
Expand Down
14 changes: 7 additions & 7 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fs::File;
use std::io::Read;
use std::str::FromStr;

use chrono::{Datelike, FixedOffset as ChronoFixedOffset, NaiveDate, NaiveDateTime, Timelike, Utc as ChronoUtc};
use chrono::{Datelike, FixedOffset as ChronoFixedOffset, NaiveDate, Timelike, Utc as ChronoUtc};
use strum::EnumMessage;

use speedate::{
Expand Down Expand Up @@ -215,7 +215,7 @@ fn date_from_timestamp_milliseconds() {
}

fn try_date_timestamp(ts: i64, check_timestamp: bool) {
let chrono_date = NaiveDateTime::from_timestamp_opt(ts, 0).unwrap().date();
let chrono_date = chrono::DateTime::from_timestamp(ts, 0).unwrap().date_naive();
let d = Date::from_timestamp(ts, false).unwrap();
// println!("{} => {:?}", ts, d);
assert_eq!(
Expand Down Expand Up @@ -283,7 +283,7 @@ macro_rules! date_from_timestamp {
#[test]
fn [< date_from_timestamp_ $year _ $month _ $day >]() {
let chrono_date = NaiveDate::from_ymd_opt($year, $month, $day).unwrap();
let ts = chrono_date.and_hms_opt(0, 0, 0).unwrap().timestamp();
let ts = chrono_date.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let d = Date::from_timestamp(ts, false).unwrap();
assert_eq!(
d,
Expand Down Expand Up @@ -404,7 +404,7 @@ fn time_from_timestamp_error() {
}
}

fn try_datetime_timestamp(chrono_dt: NaiveDateTime) {
fn try_datetime_timestamp(chrono_dt: chrono::DateTime<ChronoUtc>) {
let ts = chrono_dt.timestamp();
let dt = DateTime::from_timestamp(ts, chrono_dt.nanosecond() / 1_000).unwrap();
// println!("{} ({}) => {}", ts, chrono_dt, dt);
Expand Down Expand Up @@ -435,7 +435,7 @@ macro_rules! datetime_from_timestamp {
paste::item! {
#[test]
fn [< datetime_from_timestamp_ $year _ $month _ $day _t_ $hour _ $minute _ $second _ $microsecond >]() {
let chrono_dt = NaiveDate::from_ymd_opt($year, $month, $day).unwrap().and_hms_nano_opt($hour, $minute, $second, $microsecond * 1_000).unwrap();
let chrono_dt = NaiveDate::from_ymd_opt($year, $month, $day).unwrap().and_hms_nano_opt($hour, $minute, $second, $microsecond * 1_000).unwrap().and_utc();
try_datetime_timestamp(chrono_dt);
}
}
Expand All @@ -455,8 +455,8 @@ datetime_from_timestamp! {
#[test]
fn datetime_from_timestamp_range() {
for ts in (0..157_766_400).step_by(757) {
try_datetime_timestamp(NaiveDateTime::from_timestamp_opt(ts, 0).unwrap());
try_datetime_timestamp(NaiveDateTime::from_timestamp_opt(-ts, 0).unwrap());
try_datetime_timestamp(chrono::DateTime::from_timestamp(ts, 0).unwrap());
try_datetime_timestamp(chrono::DateTime::from_timestamp(-ts, 0).unwrap());
}
}

Expand Down

0 comments on commit dc08750

Please sign in to comment.