From 8d617d72e747e467a30961bb8118405c513be801 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 14 Jan 2020 20:48:52 +0100 Subject: [PATCH 1/4] Drop byteorder dependency --- Cargo.toml | 3 +-- lib.rs | 14 ++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3977102..141f6ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,8 +19,7 @@ name = "fxhash" path = "bench.rs" [dependencies] -byteorder = "1.0.0" [dev-dependencies] seahash = "3.0.5" -fnv = "1.0.5" \ No newline at end of file +fnv = "1.0.5" diff --git a/lib.rs b/lib.rs index 1abff27..b4b4253 100644 --- a/lib.rs +++ b/lib.rs @@ -29,9 +29,7 @@ use std::collections::{HashMap, HashSet}; use std::default::Default; use std::hash::{BuildHasherDefault, Hash, Hasher}; use std::ops::BitXor; - -extern crate byteorder; -use byteorder::{ByteOrder, NativeEndian}; +use std::convert::TryInto; /// A builder for default Fx hashers. pub type FxBuildHasher = BuildHasherDefault; @@ -79,12 +77,12 @@ impl_hash_word!(usize = SEED, u32 = SEED32, u64 = SEED64); #[inline] fn write32(mut hash: u32, mut bytes: &[u8]) -> u32 { while bytes.len() >= 4 { - hash.hash_word(NativeEndian::read_u32(bytes)); + hash.hash_word(u32::from_ne_bytes(bytes[..4].try_into().unwrap())); bytes = &bytes[4..]; } if bytes.len() >= 2 { - hash.hash_word(u32::from(NativeEndian::read_u16(bytes))); + hash.hash_word(u32::from(u16::from_ne_bytes(bytes[..2].try_into().unwrap()))); bytes = &bytes[2..]; } @@ -98,17 +96,17 @@ fn write32(mut hash: u32, mut bytes: &[u8]) -> u32 { #[inline] fn write64(mut hash: u64, mut bytes: &[u8]) -> u64 { while bytes.len() >= 8 { - hash.hash_word(NativeEndian::read_u64(bytes)); + hash.hash_word(u64::from_ne_bytes(bytes[..8].try_into().unwrap())); bytes = &bytes[8..]; } if bytes.len() >= 4 { - hash.hash_word(u64::from(NativeEndian::read_u32(bytes))); + hash.hash_word(u64::from(u32::from_ne_bytes(bytes[..4].try_into().unwrap()))); bytes = &bytes[4..]; } if bytes.len() >= 2 { - hash.hash_word(u64::from(NativeEndian::read_u16(bytes))); + hash.hash_word(u64::from(u16::from_ne_bytes(bytes[..2].try_into().unwrap()))); bytes = &bytes[2..]; } From 577f7c41d5d800df3eb2bedd7c85a6c8c08c80a6 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 13 Jan 2022 00:13:29 -0800 Subject: [PATCH 2/4] Make core hashing functions const --- lib.rs | 140 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 39 deletions(-) diff --git a/lib.rs b/lib.rs index b4b4253..febbd89 100644 --- a/lib.rs +++ b/lib.rs @@ -28,8 +28,6 @@ use std::collections::{HashMap, HashSet}; use std::default::Default; use std::hash::{BuildHasherDefault, Hash, Hasher}; -use std::ops::BitXor; -use std::convert::TryInto; /// A builder for default Fx hashers. pub type FxBuildHasher = BuildHasherDefault; @@ -59,73 +57,137 @@ trait HashWord { fn hash_word(&mut self, Self); } -macro_rules! impl_hash_word { - ($($ty:ty = $key:ident),* $(,)*) => ( - $( - impl HashWord for $ty { - #[inline] - fn hash_word(&mut self, word: Self) { - *self = self.rotate_left(ROTATE).bitxor(word).wrapping_mul($key); - } - } - )* - ) +#[inline] +const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + hash = hash.rotate_left(ROTATE); + hash = hash ^ word; + hash = hash.wrapping_mul(SEED32); + hash } -impl_hash_word!(usize = SEED, u32 = SEED32, u64 = SEED64); +impl HashWord for u32 { + #[inline] + fn hash_word(&mut self, word: Self) { + *self = hash_word_32(*self, word); + } +} #[inline] -fn write32(mut hash: u32, mut bytes: &[u8]) -> u32 { - while bytes.len() >= 4 { - hash.hash_word(u32::from_ne_bytes(bytes[..4].try_into().unwrap())); - bytes = &bytes[4..]; +const fn write32(mut hash: u32, bytes: &[u8]) -> u32 { + let mut cursor = 0; + + while bytes.len() - cursor >= 4 { + let word = u32::from_ne_bytes([ + bytes[cursor], + bytes[cursor+1], + bytes[cursor+2], + bytes[cursor+3] + ]); + hash = hash_word_32(hash, word); + cursor += 4; } - if bytes.len() >= 2 { - hash.hash_word(u32::from(u16::from_ne_bytes(bytes[..2].try_into().unwrap()))); - bytes = &bytes[2..]; + if bytes.len() - cursor >= 2 { + let word = u16::from_ne_bytes([ + bytes[cursor], + bytes[cursor+1] + ]); + hash = hash_word_32(hash, word as u32); + cursor += 2; } - if let Some(&byte) = bytes.first() { - hash.hash_word(u32::from(byte)); + if bytes.len() - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); } hash } #[inline] -fn write64(mut hash: u64, mut bytes: &[u8]) -> u64 { - while bytes.len() >= 8 { - hash.hash_word(u64::from_ne_bytes(bytes[..8].try_into().unwrap())); - bytes = &bytes[8..]; - } +const fn hash_word_64(mut hash: u64, word: u64) -> u64 { + hash = hash.rotate_left(ROTATE); + hash = hash ^ word; + hash = hash.wrapping_mul(SEED64); + hash +} - if bytes.len() >= 4 { - hash.hash_word(u64::from(u32::from_ne_bytes(bytes[..4].try_into().unwrap()))); - bytes = &bytes[4..]; +impl HashWord for u64 { + #[inline] + fn hash_word(&mut self, word: Self) { + *self = hash_word_64(*self, word); } +} - if bytes.len() >= 2 { - hash.hash_word(u64::from(u16::from_ne_bytes(bytes[..2].try_into().unwrap()))); - bytes = &bytes[2..]; +#[inline] +const fn write64(mut hash: u64, bytes: &[u8]) -> u64 { + let mut cursor = 0; + + while bytes.len() - cursor >= 8 { + let word = u64::from_ne_bytes([ + bytes[cursor], + bytes[cursor+1], + bytes[cursor+2], + bytes[cursor+3], + bytes[cursor+4], + bytes[cursor+5], + bytes[cursor+6], + bytes[cursor+7], + ]); + hash = hash_word_64(hash, word); + cursor += 8; + } + + while bytes.len() - cursor >= 4 { + let word = u32::from_ne_bytes([ + bytes[cursor], + bytes[cursor+1], + bytes[cursor+2], + bytes[cursor+3] + ]); + hash = hash_word_64(hash, word as u64); + cursor += 4; + } + + if bytes.len() - cursor >= 2 { + let word = u16::from_ne_bytes([ + bytes[cursor], + bytes[cursor+1] + ]); + hash = hash_word_64(hash, word as u64); + cursor += 2; + } + + if bytes.len() - cursor >= 1 { + hash = hash_word_64(hash, bytes[cursor] as u64); } - if let Some(&byte) = bytes.first() { - hash.hash_word(u64::from(byte)); - } + hash +} +#[inline] +const fn hash_word(mut hash: usize, word: usize) -> usize { + hash = hash.rotate_left(ROTATE); + hash = hash ^ word; + hash = hash.wrapping_mul(SEED); hash } +impl HashWord for usize { + #[inline] + fn hash_word(&mut self, word: Self) { + *self = hash_word(*self, word); + } +} + #[inline] #[cfg(target_pointer_width = "32")] -fn write(hash: usize, bytes: &[u8]) -> usize { +const fn write(hash: usize, bytes: &[u8]) -> usize { write32(hash as u32, bytes) as usize } #[inline] #[cfg(target_pointer_width = "64")] -fn write(hash: usize, bytes: &[u8]) -> usize { +const fn write(hash: usize, bytes: &[u8]) -> usize { write64(hash as u64, bytes) as usize } From 5181ad6ca2e2d7644bd3f0efa7fe8b36871ea0af Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 13 Jan 2022 00:17:24 -0800 Subject: [PATCH 3/4] Add public const APIs --- lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib.rs b/lib.rs index febbd89..9fdcc3a 100644 --- a/lib.rs +++ b/lib.rs @@ -397,3 +397,13 @@ pub fn hash(v: &T) -> usize { v.hash(&mut state); state.finish() as usize } + +/// A const function for when you need a 32-bit hash of a byte array. +pub const fn hash32_bytes(v: &[u8]) -> u32 { + write32(0, v) +} + +/// A const function for when you need a 64-bit hash of a byte array. +pub const fn hash64_bytes(v: &[u8]) -> u32 { + write32(0, v) +} From 48091552c771aea99cdc0b39aa74f0499f8b9eea Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 3 Jan 2024 12:33:44 -0800 Subject: [PATCH 4/4] Update lib.rs --- lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib.rs b/lib.rs index 9fdcc3a..cd3b2ff 100644 --- a/lib.rs +++ b/lib.rs @@ -404,6 +404,6 @@ pub const fn hash32_bytes(v: &[u8]) -> u32 { } /// A const function for when you need a 64-bit hash of a byte array. -pub const fn hash64_bytes(v: &[u8]) -> u32 { - write32(0, v) +pub const fn hash64_bytes(v: &[u8]) -> u64 { + write64(0, v) }