-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 39dcad3
Showing
8 changed files
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: CI | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- main | ||
paths-ignore: | ||
- '**.md' | ||
pull_request: | ||
branches: | ||
- main | ||
paths-ignore: | ||
- '**.md' | ||
|
||
env: | ||
RUSTFLAGS: "-C debuginfo=1" | ||
CARGO_TERM_COLOR: always | ||
RUST_BACKTRACE: "1" | ||
|
||
jobs: | ||
sqlness: | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 30 | ||
strategy: | ||
matrix: | ||
rust: [stable] | ||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
submodules: true | ||
- run: | | ||
rustup set auto-self-update disable | ||
rustup toolchain install ${{ matrix.rust }} --profile minimal | ||
- name: Run Style Check | ||
run: | | ||
make clippy | ||
make fmt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/target |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[package] | ||
name = "hash-benchmark-rs" | ||
description = "Benchmark different hash in Rust" | ||
version = "0.1.0" | ||
edition = "2021" | ||
authors = [ | ||
"CeresDB Authors <ceresdbservice@gmail.com>", | ||
] | ||
license = "Apache-2.0" | ||
repository = "https://github.com/CeresDB/hash-benchmark-rs" | ||
|
||
[dependencies] | ||
ahash = "0.8.3" | ||
byteorder = "1.4.3" | ||
murmur3 = "0.4.1" | ||
rand = "0.8.5" | ||
seahash = "4.1.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
|
||
run: | ||
cargo run --release | ||
|
||
fmt: | ||
cargo fmt -- --check | ||
|
||
clippy: | ||
cargo clippy --all-targets --all-features --workspace -- -D warnings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Hash Benchmark | ||
|
||
# How to run | ||
```bash | ||
make run | ||
``` | ||
|
||
# Result | ||
|
||
## Random string, key_num(10000000), key_len(100) | ||
| Op | Default | AHash | Murmur | SeaHasher | | ||
| --- | --- | --- | --- | --- | | ||
| build time | 441.487 | 12.979 | 462.289 | 141.508 | | ||
| std dev | 283.269 | 259.322 | 274.551 | 294.800 | | ||
| collision | 0 | 0 | 0 | 0 | | ||
|
||
|
||
## Increasing number | ||
| Op | Default | AHash | Murmur | SeaHasher | | ||
| --- | --- | --- | --- | --- | | ||
| build time | 46.265 | 12.492 | 174.967 | 82.611 | | ||
| std dev | 263.976 | 266.246 | 275.889 | 272.080 | | ||
| collision | 0 | 0 | 0 | 0 | | ||
|
||
|
||
# Conclusion | ||
|
||
- [Ahash](https://github.com/tkaitchuck/aHash) is fastest, but it doesn't guarantee fixed hash code, so it's only recommended used in memory structures. | ||
- Hash code generated by those hash is almost same(evenly distributed), no big difference. | ||
- [SeaHash](https://docs.rs/seahash/latest/seahash/) generate fixed hash code, and its speed is not very bad, so it's suitable for on-disk/permanent storage. | ||
- [DefaultHash](https://doc.rust-lang.org/std/collections/hash_map/struct.DefaultHasher.html) in std is pretty good, but hash code generated by it may change over rust release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
mod util; | ||
|
||
use ahash::AHasher; | ||
use seahash::SeaHasher; | ||
use std::collections::hash_map::DefaultHasher; | ||
use std::collections::HashSet; | ||
use std::hash::Hasher; | ||
use std::time::{Duration, Instant}; | ||
use util::MurmurHasher; | ||
|
||
use crate::util::gen_random_string; | ||
|
||
const KEY_NUM: usize = 10_000_000; | ||
const KEY_LEN: usize = 100; | ||
const BUCKET_LEN: usize = 128; | ||
|
||
fn test_speed<H: Hasher + Default>(keys: &[String]) -> Duration { | ||
let start_time = Instant::now(); | ||
for key in keys { | ||
let mut hasher = H::default(); | ||
hasher.write(key.as_bytes()); | ||
hasher.finish(); | ||
} | ||
|
||
start_time.elapsed() | ||
} | ||
|
||
fn test_collisions<H: Hasher + Default>(keys: &[String]) -> usize { | ||
let mut dedup = HashSet::with_capacity(keys.len()); | ||
for key in keys { | ||
let mut hasher = H::default(); | ||
hasher.write(key.as_bytes()); | ||
dedup.insert(hasher.finish()); | ||
} | ||
|
||
keys.len() - dedup.len() | ||
} | ||
|
||
fn test_distribution<H: Hasher + Default>(keys: &[String]) -> f64 { | ||
let mut buckets = vec![0; BUCKET_LEN]; | ||
for key in keys { | ||
let mut hasher = H::default(); | ||
hasher.write(key.as_bytes()); | ||
let idx = hasher.finish() as usize % BUCKET_LEN; | ||
buckets[idx] += 1; | ||
} | ||
|
||
let mean = buckets.iter().sum::<usize>() as f64 / BUCKET_LEN as f64; | ||
let variance = buckets | ||
.iter() | ||
.map(|n| { | ||
let diff = *n as f64 - mean; | ||
diff * diff | ||
}) | ||
.sum::<f64>() | ||
/ BUCKET_LEN as f64; | ||
|
||
// std_dev | ||
variance.sqrt() | ||
} | ||
|
||
fn main() { | ||
let keys: Vec<_> = (0..KEY_NUM).map(|_| gen_random_string(KEY_LEN)).collect(); | ||
println!( | ||
"## Random string, key_num({}), key_len({})", | ||
KEY_NUM, KEY_LEN | ||
); | ||
run(&keys); | ||
|
||
let keys: Vec<_> = (0..KEY_NUM).map(|i| i.to_string()).collect(); | ||
println!("\n\n ## Increasing number"); | ||
run(&keys) | ||
} | ||
|
||
fn run(keys: &[String]) { | ||
// Current print as markdown table, maybe we can add more format | ||
// https://github.com/phsym/prettytable-rs/ | ||
|
||
let as_ms = |v| -> f64 { v as f64 / 1000_f64 }; | ||
|
||
println!("| Op | Default | AHash | Murmur | SeaHasher |"); | ||
println!("| --- | --- | --- | --- | --- |"); | ||
println!( | ||
"| build time | {:.3} | {:.3} | {:.3} | {:.3} |", | ||
as_ms(test_speed::<DefaultHasher>(keys).as_micros()), | ||
as_ms(test_speed::<AHasher>(keys).as_micros()), | ||
as_ms(test_speed::<MurmurHasher>(keys).as_micros()), | ||
as_ms(test_speed::<SeaHasher>(keys).as_micros()), | ||
); | ||
println!( | ||
"| std dev | {:.3} | {:.3} | {:.3} | {:.3} |", | ||
test_distribution::<DefaultHasher>(keys), | ||
test_distribution::<AHasher>(keys), | ||
test_distribution::<MurmurHasher>(keys), | ||
test_distribution::<SeaHasher>(keys), | ||
); | ||
|
||
println!( | ||
"| collision | {} | {} | {} | {} |", | ||
test_collisions::<DefaultHasher>(keys), | ||
test_collisions::<DefaultHasher>(keys), | ||
test_collisions::<DefaultHasher>(keys), | ||
test_collisions::<DefaultHasher>(keys), | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
use std::hash::Hasher; | ||
|
||
use byteorder::{ByteOrder, LittleEndian}; | ||
use rand::Rng; | ||
|
||
pub fn hash64(mut bytes: &[u8]) -> u64 { | ||
use murmur3::murmur3_x64_128; | ||
|
||
let mut out = [0; 16]; | ||
murmur3_x64_128(&mut bytes, 0, &mut out); | ||
// in most cases we run on little endian target | ||
LittleEndian::read_u64(&out[0..8]) | ||
} | ||
|
||
#[derive(Debug, Default)] | ||
pub struct MurmurHasher(u64); | ||
|
||
impl Hasher for MurmurHasher { | ||
fn finish(&self) -> u64 { | ||
self.0 | ||
} | ||
|
||
fn write(&mut self, bytes: &[u8]) { | ||
self.0 ^= hash64(bytes); | ||
} | ||
} | ||
|
||
pub fn gen_random_string(length: usize) -> String { | ||
let mut rng = rand::thread_rng(); | ||
let chars: Vec<char> = (0..length) | ||
.map(|_| rng.gen_range(0..36)) | ||
.map(|n| if n < 26 { (n + 97) as u8 } else { (n - 26 + 48) as u8 } as char) | ||
.collect(); | ||
chars.iter().collect() | ||
} |