Skip to content

Commit

Permalink
Re-organize and add iai-callgrind benchmarks (#36)
Browse files Browse the repository at this point in the history
* Modify fixed length key generator to return array

**Description**
 - Remove `rustc_hash` dependency
 - Have the fixed length key generator return an array instead of a
   boxed slice

**Motivation**
 - Remove little-used dependency
 - Less memory usage by removing indirection of boxed slice, it shows
   up in the tests

**Testing Done**
`cargo test`

* Redo fix-length key generator

**Description**
Simplify the fixed-length key generator so it returns the expected
number of keys, and no longer spreads the digits over the entire
[0, 255] range.

**Motivation**
This fix was done so that I could generate similarly sized inputs for
the benchmarks.

**Testing Done**
`cargo test`

* Re-organize the criterion benchmarks

**Description**
 - Re-organize the criterion benchmarks into a nested structure with a
   single main entrypoint.
 - Convert a couple `debug_assert`s to `assert`s so that there is a
   consistent error message even in release mode

**Motivation**
I wanted to try and share benchmark definitions between criterion and
iai-callgrind, but that didn't really pan out. This change is
in-between and I like that there is only a single benchmark binary
instead of one per-group.

**Testing Done**
`./scripts/full-test.sh nightly`

* Add `iai-callgrind` benchmarks

**Description**
Add `iai-callgrind` benchmarks over the similar set of functions
currently benchmarked by criterion.

**Motivation**
I like the `iai-callgrind` benchmarks because they are very precise
and subsequent runs with no code change return mostly the same
result.

They also run fewer iterations than criterion, and generally take
less time to run. This makes it easier to run them as part of the
developing loop.

**Testing Done**
`./scripts/full-test.sh nightly`

* Switch to `OnceLock` to maintain 1.78 MSRV

* Attempt 1/? to update CI to install iai-callgrind-runner

* Attemp 2/? to update CI to install valgrind

* Address some clippy lints
  • Loading branch information
declanvk authored Sep 15, 2024
1 parent f066f0f commit 808c38b
Show file tree
Hide file tree
Showing 37 changed files with 33,499 additions and 525 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ jobs:
with:
toolchain: ${{matrix.rust}}
components: rustfmt clippy

- uses: taiki-e/install-action@cargo-binstall
- name: Install iai-callgrind-runner and valgrind
run: |
version=$(cargo metadata --format-version=1 |\
jq '.packages[] | select(.name == "iai-callgrind").version' |\
tr -d '"'
)
cargo binstall --no-confirm iai-callgrind-runner --version $version
sudo apt install -y valgrind
- name: Run full test script
run: ./scripts/full-test.sh "${{matrix.rust}}"
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ default.profraw
.vscode/

# profiling workload data
data/
/data/
profile.json

# coverage info
Expand Down
46 changes: 9 additions & 37 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ exclude = [
".github/",
]

# All benchmarks must be registered manually
autobenches = false

[dependencies]
bytemuck = { version = "1.16.1", features = ["min_const_generics"] }
paste = "1.0.15"
Expand All @@ -33,48 +36,17 @@ bench-perf-events = []
argh = "0.1.12"
criterion = "0.5.1"
dhat = "0.3.3"
paste = "1.0.15"
# criterion-perf-events = "0.4.0"
# perfcnt = "0.8.0"
rand = "0.8.5"
rustc-hash = "2.0.0"
iai-callgrind = "0.13.4"
zipf = "7.0.1"

[[bench]]
name = "generated_get"
name = "criterion"
harness = false

[[bench]]
name = "generated_insert"
name = "iai_callgrind"
harness = false

[[bench]]
name = "dict_insert"
harness = false

[[bench]]
name = "fuzzy"
harness = false

[[bench]]
name = "dict_get"
harness = false

[[bench]]
name = "min_max"
harness = false

[[bench]]
name = "match_prefix"
harness = false

[[bench]]
name = "entry"
harness = false

[[bench]]
name = "iter"
harness = false

[[bench]]
name = "clone"
harness = false
[profile.bench]
debug = true
72 changes: 0 additions & 72 deletions benches/clone.rs

This file was deleted.

185 changes: 149 additions & 36 deletions benches/common.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,155 @@
#[allow(unused_macros)]
macro_rules! gen_benches {
($bench:ident, $(($target:ident, $event:path)),+) => {
#[cfg(all(feature = "bench-perf-events", target_arch = "x86_64"))]
paste::paste! {
$(
fn $target(c: &mut Criterion<criterion_perf_events::Perf>) {
$bench(c, stringify!($target));
}


criterion::criterion_group! {
name = [<group_ $target>];
config = Criterion::default()
.with_measurement(
criterion_perf_events::Perf::new(
perfcnt::linux::PerfCounterBuilderLinux::from_hardware_event($event),
)
);
targets = $target
}
)+

criterion::criterion_main!($([<group_ $target>]),+);
use std::{ffi::CString, sync::OnceLock};

use blart::{
tests_common::{
generate_key_fixed_length, generate_key_with_prefix, generate_keys_skewed, PrefixExpansion,
},
AsBytes, TreeMap,
};
use rand::{prelude::Distribution, SeedableRng};

static DICTIONARY: &str = include_str!("data/medium-dict.txt");

fn tree_from_keys<K: AsBytes>(keys: impl IntoIterator<Item = K>) -> TreeMap<K, usize> {
let mut tree = TreeMap::new();
for (v, k) in keys.into_iter().enumerate() {
tree.try_insert(k, v).unwrap();
}

tree
}

#[allow(dead_code)]
pub fn get_first_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
tree: &TreeMap<K, V, PREFIX_LEN>,
) -> &K {
tree.first_key_value().unwrap().0
}

#[allow(dead_code)]
pub fn get_middle_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
tree: &TreeMap<K, V, PREFIX_LEN>,
forward_step_size: usize,
backward_step_size: usize,
) -> &K {
let mut last_key = None;
let mut iter = tree.keys();
assert!(!tree.is_empty());

'outer: loop {
for _ in 0..forward_step_size {
let current = iter.next();
if current.is_none() {
break 'outer;
}
last_key = current;
}

#[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
fn default_run(c: &mut Criterion<criterion::measurement::WallTime>) {
$bench(c, "default");
for _ in 0..backward_step_size {
let current = iter.next_back();
if current.is_none() {
break 'outer;
}
last_key = current;
}
}

last_key.expect("tree is non-empty")
}

#[allow(dead_code)]
pub fn get_last_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
tree: &TreeMap<K, V, PREFIX_LEN>,
) -> &K {
tree.last_key_value().unwrap().0
}

#[allow(dead_code)]
pub fn select_zipfian_keys<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
tree: &TreeMap<K, V, PREFIX_LEN>,
num_elements: usize,
) -> Vec<&K> {
let keys = tree.keys().collect::<Vec<_>>();
let distr = zipf::ZipfDistribution::new(tree.len(), 1.78).unwrap();
let mut rng = rand::rngs::StdRng::from_seed([128; 32]);

distr
.map(move |idx| keys[idx])
.sample_iter(&mut rng)
.take(num_elements)
.collect()
}

#[allow(dead_code)]
pub fn remove_keys<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
tree: &mut TreeMap<K, V, PREFIX_LEN>,
keys: Vec<&K>,
) -> Vec<(K, V)> {
let output = Vec::with_capacity(keys.len());

for key in keys {
let _ = tree.remove(key);
}

output
}

#[allow(dead_code)]
pub fn skewed_tree() -> &'static TreeMap<Box<[u8]>, usize> {
static TREE: OnceLock<TreeMap<Box<[u8]>, usize>> = OnceLock::new();

TREE.get_or_init(|| tree_from_keys(generate_keys_skewed(256 * 128)))
}

#[allow(dead_code)]
pub fn dense_fixed_length_key_tree() -> &'static TreeMap<[u8; 2], usize> {
static TREE: OnceLock<TreeMap<[u8; 2], usize>> = OnceLock::new();

TREE.get_or_init(|| tree_from_keys(generate_key_fixed_length([u8::MAX, 127])))
}

// pub fn medium_sparse_fixed_length_key_tree() -> TreeMap<[u8; 3], usize> {
// tree_from_keys(generate_key_fixed_length([63; 3]))
// }

// pub fn sparse_fixed_length_key_tree() -> TreeMap<[u8; 16], usize> {
// tree_from_keys(generate_key_fixed_length([1; 16]))
// }

pub fn with_prefixes_tree() -> &'static TreeMap<Box<[u8]>, usize> {
static TREE: OnceLock<TreeMap<Box<[u8]>, usize>> = OnceLock::new();

TREE.get_or_init(|| {
tree_from_keys(generate_key_with_prefix(
[7; 5],
[
PrefixExpansion {
base_index: 1,
expanded_length: 12,
},
PrefixExpansion {
base_index: 4,
expanded_length: 8,
},
],
))
})
}

pub fn dictionary_tree() -> &'static TreeMap<CString, usize> {
fn swap<A, B>((a, b): (A, B)) -> (B, A) {
(b, a)
}

#[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
criterion::criterion_group!(
name = default_bench;
config = Criterion::default();
targets = default_run
);
static TREE: OnceLock<TreeMap<CString, usize>> = OnceLock::new();

#[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
criterion::criterion_main!(default_bench);
};
TREE.get_or_init(|| {
DICTIONARY
.split('\n')
.filter(|s| !s.is_empty())
.map(|s| CString::new(s).unwrap())
.enumerate()
.map(swap)
.collect()
})
}
20 changes: 20 additions & 0 deletions benches/criterion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use criterion::criterion_main;

#[macro_use]
mod common;

mod node;
mod tree;

criterion_main!(
node::match_prefix::bench_match_prefix_group,
node::min_max::bench_min_max_group,
tree::clone::bench_clone_group,
tree::dict_get::bench_dict_get_group,
tree::dict_insert::bench_dict_insert_group,
tree::entry::bench_entry_group,
tree::fuzzy::bench_fuzzy_group,
tree::generated_get::bench_generated_get_group,
tree::generated_insert::bench_generated_insert_group,
tree::iter::bench_iter_group,
);
File renamed without changes.
Loading

0 comments on commit 808c38b

Please sign in to comment.