Re-organize and add iai-callgrind benchmarks (#36)

* Modify fixed length key generator to return array **Description** - Remove `rustc_hash` dependency - Have the fixed length key generator return an array instead of a boxed slice **Motivation** - Remove little-used dependency - Less memory usage by removing indirection of boxed slice, it shows up in the tests **Testing Done** `cargo test` * Redo fix-length key generator **Description** Simplify the fixed-length key generator so it returns the expected number of keys, and no longer spreads the digits over the entire [0, 255] range. **Motivation** This fix was done so that I could generate similarly sized inputs for the benchmarks. **Testing Done** `cargo test` * Re-organize the criterion benchmarks **Description** - Re-organize the criterion benchmarks into a nested structure with a single main entrypoint. - Convert a couple `debug_assert`s to `assert`s so that there is a consistent error message even in release mode **Motivation** I wanted to try and share benchmark definitions between criterion and iai-callgrind, but that didn't really pan out. This change is in-between and I like that there is only a single benchmark binary instead of one per-group. **Testing Done** `./scripts/full-test.sh nightly` * Add `iai-callgrind` benchmarks **Description** Add `iai-callgrind` benchmarks over the similar set of functions currently benchmarked by criterion. **Motivation** I like the `iai-callgrind` benchmarks because they are very precise and subsequent runs with no code change return mostly the same result. They also run fewer iterations than criterion, and generally take less time to run. This makes it easier to run them as part of the developing loop. **Testing Done** `./scripts/full-test.sh nightly` * Switch to `OnceLock` to maintain 1.78 MSRV * Attempt 1/? to update CI to install iai-callgrind-runner * Attemp 2/? to update CI to install valgrind * Address some clippy lints
declanvk · Sep 15, 2024 · 808c38b · 808c38b
1 parent f066f0f
commit 808c38b
Show file tree

Hide file tree

Showing 37 changed files with 33,499 additions and 525 deletions.
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
@@ -37,6 +37,17 @@ jobs:
         with:
           toolchain: ${{matrix.rust}}
           components: rustfmt clippy
+
+      - uses: taiki-e/install-action@cargo-binstall
+      - name: Install iai-callgrind-runner and valgrind
+        run: |
+          version=$(cargo metadata --format-version=1 |\
+            jq '.packages[] | select(.name == "iai-callgrind").version' |\
+            tr -d '"'
+          )
+          cargo binstall --no-confirm iai-callgrind-runner --version $version
+          sudo apt install -y valgrind
+
 
       - name: Run full test script
         run: ./scripts/full-test.sh "${{matrix.rust}}"
diff --git a/.gitignore b/.gitignore
@@ -11,7 +11,7 @@ default.profraw
 .vscode/
 
 # profiling workload data
-data/
+/data/
 profile.json
 
 # coverage info

diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,9 @@ exclude = [
     ".github/",
 ]
 
+# All benchmarks must be registered manually
+autobenches = false
+
 [dependencies]
 bytemuck = { version = "1.16.1", features = ["min_const_generics"] }
 paste = "1.0.15"
@@ -33,48 +36,17 @@ bench-perf-events = []
 argh = "0.1.12"
 criterion = "0.5.1"
 dhat = "0.3.3"
-paste = "1.0.15"
-# criterion-perf-events = "0.4.0"
-# perfcnt = "0.8.0"
 rand = "0.8.5"
-rustc-hash = "2.0.0"
+iai-callgrind = "0.13.4"
+zipf = "7.0.1"
 
 [[bench]]
-name = "generated_get"
+name = "criterion"
 harness = false
 
 [[bench]]
-name = "generated_insert"
+name = "iai_callgrind"
 harness = false
 
-[[bench]]
-name = "dict_insert"
-harness = false
-
-[[bench]]
-name = "fuzzy"
-harness = false
-
-[[bench]]
-name = "dict_get"
-harness = false
-
-[[bench]]
-name = "min_max"
-harness = false
-
-[[bench]]
-name = "match_prefix"
-harness = false
-
-[[bench]]
-name = "entry"
-harness = false
-
-[[bench]]
-name = "iter"
-harness = false
-
-[[bench]]
-name = "clone"
-harness = false
+[profile.bench]
+debug = true
diff --git a/benches/clone.rs b/benches/clone.rs
diff --git a/benches/common.rs b/benches/common.rs
@@ -1,42 +1,155 @@
-#[allow(unused_macros)]
-macro_rules! gen_benches {
-    ($bench:ident, $(($target:ident, $event:path)),+) => {
-        #[cfg(all(feature = "bench-perf-events", target_arch = "x86_64"))]
-        paste::paste! {
-            $(
-                fn $target(c: &mut Criterion<criterion_perf_events::Perf>) {
-                    $bench(c, stringify!($target));
-                }
-
-
-                criterion::criterion_group! {
-                    name = [<group_ $target>];
-                    config = Criterion::default()
-                    .with_measurement(
-                        criterion_perf_events::Perf::new(
-                            perfcnt::linux::PerfCounterBuilderLinux::from_hardware_event($event),
-                        )
-                    );
-                    targets = $target
-                }
-            )+
-
-            criterion::criterion_main!($([<group_ $target>]),+);
+use std::{ffi::CString, sync::OnceLock};
+
+use blart::{
+    tests_common::{
+        generate_key_fixed_length, generate_key_with_prefix, generate_keys_skewed, PrefixExpansion,
+    },
+    AsBytes, TreeMap,
+};
+use rand::{prelude::Distribution, SeedableRng};
+
+static DICTIONARY: &str = include_str!("data/medium-dict.txt");
+
+fn tree_from_keys<K: AsBytes>(keys: impl IntoIterator<Item = K>) -> TreeMap<K, usize> {
+    let mut tree = TreeMap::new();
+    for (v, k) in keys.into_iter().enumerate() {
+        tree.try_insert(k, v).unwrap();
+    }
+
+    tree
+}
+
+#[allow(dead_code)]
+pub fn get_first_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
+    tree: &TreeMap<K, V, PREFIX_LEN>,
+) -> &K {
+    tree.first_key_value().unwrap().0
+}
+
+#[allow(dead_code)]
+pub fn get_middle_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
+    tree: &TreeMap<K, V, PREFIX_LEN>,
+    forward_step_size: usize,
+    backward_step_size: usize,
+) -> &K {
+    let mut last_key = None;
+    let mut iter = tree.keys();
+    assert!(!tree.is_empty());
+
+    'outer: loop {
+        for _ in 0..forward_step_size {
+            let current = iter.next();
+            if current.is_none() {
+                break 'outer;
+            }
+            last_key = current;
         }
 
-        #[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
-        fn default_run(c: &mut Criterion<criterion::measurement::WallTime>) {
-            $bench(c, "default");
+        for _ in 0..backward_step_size {
+            let current = iter.next_back();
+            if current.is_none() {
+                break 'outer;
+            }
+            last_key = current;
         }
+    }
+
+    last_key.expect("tree is non-empty")
+}
+
+#[allow(dead_code)]
+pub fn get_last_key<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
+    tree: &TreeMap<K, V, PREFIX_LEN>,
+) -> &K {
+    tree.last_key_value().unwrap().0
+}
+
+#[allow(dead_code)]
+pub fn select_zipfian_keys<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
+    tree: &TreeMap<K, V, PREFIX_LEN>,
+    num_elements: usize,
+) -> Vec<&K> {
+    let keys = tree.keys().collect::<Vec<_>>();
+    let distr = zipf::ZipfDistribution::new(tree.len(), 1.78).unwrap();
+    let mut rng = rand::rngs::StdRng::from_seed([128; 32]);
+
+    distr
+        .map(move |idx| keys[idx])
+        .sample_iter(&mut rng)
+        .take(num_elements)
+        .collect()
+}
+
+#[allow(dead_code)]
+pub fn remove_keys<K: AsBytes + Clone, V, const PREFIX_LEN: usize>(
+    tree: &mut TreeMap<K, V, PREFIX_LEN>,
+    keys: Vec<&K>,
+) -> Vec<(K, V)> {
+    let output = Vec::with_capacity(keys.len());
+
+    for key in keys {
+        let _ = tree.remove(key);
+    }
+
+    output
+}
+
+#[allow(dead_code)]
+pub fn skewed_tree() -> &'static TreeMap<Box<[u8]>, usize> {
+    static TREE: OnceLock<TreeMap<Box<[u8]>, usize>> = OnceLock::new();
+
+    TREE.get_or_init(|| tree_from_keys(generate_keys_skewed(256 * 128)))
+}
+
+#[allow(dead_code)]
+pub fn dense_fixed_length_key_tree() -> &'static TreeMap<[u8; 2], usize> {
+    static TREE: OnceLock<TreeMap<[u8; 2], usize>> = OnceLock::new();
+
+    TREE.get_or_init(|| tree_from_keys(generate_key_fixed_length([u8::MAX, 127])))
+}
+
+// pub fn medium_sparse_fixed_length_key_tree() -> TreeMap<[u8; 3], usize> {
+//     tree_from_keys(generate_key_fixed_length([63; 3]))
+// }
+
+// pub fn sparse_fixed_length_key_tree() -> TreeMap<[u8; 16], usize> {
+//     tree_from_keys(generate_key_fixed_length([1; 16]))
+// }
+
+pub fn with_prefixes_tree() -> &'static TreeMap<Box<[u8]>, usize> {
+    static TREE: OnceLock<TreeMap<Box<[u8]>, usize>> = OnceLock::new();
+
+    TREE.get_or_init(|| {
+        tree_from_keys(generate_key_with_prefix(
+            [7; 5],
+            [
+                PrefixExpansion {
+                    base_index: 1,
+                    expanded_length: 12,
+                },
+                PrefixExpansion {
+                    base_index: 4,
+                    expanded_length: 8,
+                },
+            ],
+        ))
+    })
+}
+
+pub fn dictionary_tree() -> &'static TreeMap<CString, usize> {
+    fn swap<A, B>((a, b): (A, B)) -> (B, A) {
+        (b, a)
+    }
 
-        #[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
-        criterion::criterion_group!(
-            name = default_bench;
-            config = Criterion::default();
-            targets = default_run
-        );
+    static TREE: OnceLock<TreeMap<CString, usize>> = OnceLock::new();
 
-        #[cfg(not(all(feature = "bench-perf-events", target_arch = "x86_64")))]
-        criterion::criterion_main!(default_bench);
-    };
+    TREE.get_or_init(|| {
+        DICTIONARY
+            .split('\n')
+            .filter(|s| !s.is_empty())
+            .map(|s| CString::new(s).unwrap())
+            .enumerate()
+            .map(swap)
+            .collect()
+    })
 }
diff --git a/benches/criterion.rs b/benches/criterion.rs
@@ -0,0 +1,20 @@
+use criterion::criterion_main;
+
+#[macro_use]
+mod common;
+
+mod node;
+mod tree;
+
+criterion_main!(
+    node::match_prefix::bench_match_prefix_group,
+    node::min_max::bench_min_max_group,
+    tree::clone::bench_clone_group,
+    tree::dict_get::bench_dict_get_group,
+    tree::dict_insert::bench_dict_insert_group,
+    tree::entry::bench_entry_group,
+    tree::fuzzy::bench_fuzzy_group,
+    tree::generated_get::bench_generated_get_group,
+    tree::generated_insert::bench_generated_insert_group,
+    tree::iter::bench_iter_group,
+);
diff --git a/benches/dict.txt → benches/data/dict.txt b/benches/dict.txt → benches/data/dict.txt