Skip to content

Commit

Permalink
Add mum-hash version 3
Browse files Browse the repository at this point in the history
  • Loading branch information
MaskRay committed Jun 23, 2024
1 parent 5efdfb7 commit 54c873b
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 57 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ add_library(
${T1HA_SRC}
${SHA_SRC}
mum.cc
mum_v3.cc
jody_hash32.c
jody_hash64.c
${TSIP_SRC}
Expand Down
1 change: 1 addition & 0 deletions Hashes.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ inline void mum_low_test ( const void * key, int len, uint32_t seed, void * out
*(uint32_t*)out = (uint32_t)result;
}

void mum_v3_hash_test(const void * key, int len, uint32_t seed, void * out);

//-----------------------------------------------------------------------------

Expand Down
2 changes: 2 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ HashInfo g_hashes[] =
{0x0} /* !! and many more. too many */ },
{ mum_low_test, 32, MUMLOW_VERIF,"MUMlow", "github.com/vnmakarov/mum-hash", GOOD,
{0x11fb062a, 0x3ca9411b, 0x3edd9a7d, 0x41f18860, 0x691457ba} /* !! */ },
{ mum_v3_hash_test, 64, MUM_VERIF, "MUMv3", "github.com/vnmakarov/mum-hash", POOR,
{0x0, 0x8bd72b8c}},
{ xmsx32_test, 32, 0x6B54E1D4, "xmsx32", "XMSX-32", GOOD, { 0x1505929f, 0xf0a6a74a } },
#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
#define MIR_VERIF 0x00A393C8
Expand Down
1 change: 1 addition & 0 deletions mum.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#define MUM_V1
#include "mum.h"

void mum_hash_test(const void *key, int len, uint32_t seed, void *out) {
Expand Down
116 changes: 59 additions & 57 deletions mum.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
/* Copyright (c) 2016, 2017, 2018
Vladimir Makarov <vmakarov@gcc.gnu.org>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
Expand Down Expand Up @@ -58,11 +59,7 @@ typedef unsigned __int64 uint64_t;

#ifdef __GNUC__
#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
# ifdef __clang__
# define _MUM_OPTIMIZE(opts)
# else
# define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
# endif
#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
#else
#define _MUM_ATTRIBUTE_UNUSED
Expand All @@ -83,10 +80,6 @@ typedef unsigned __int64 uint64_t;
#endif
#endif

#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
#define _MUM_FRESH_GCC
#endif

/* Here are different primes randomly generated with the equal
probability of their bit values. They are used to randomize input
values. */
Expand All @@ -97,7 +90,7 @@ static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;

static uint64_t _mum_primes [] = {
0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
Expand All @@ -116,7 +109,7 @@ _mum (uint64_t v, uint64_t p) {
multiplication. If we use a generic code we actually call a
function doing 128x128->128 bit multiplication. The function is
very slow. */
lo = v * p, hi;
lo = v * p;
asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
#else
__uint128_t r = (__uint128_t) v * (__uint128_t) p;
Expand All @@ -133,7 +126,7 @@ _mum (uint64_t v, uint64_t p) {
uint64_t rm_1 = hp * lv;
uint64_t rl = lv * lp;
uint64_t t, carry = 0;

/* We could ignore a carry bit here if we did not care about the
same hash for 32-bit and 64-bit targets. */
t = rl + (rm_0 << 32);
Expand Down Expand Up @@ -190,6 +183,17 @@ _mum_le32 (uint32_t v) {
#endif
}

static inline uint64_t
_mum_le16 (uint16_t v) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
return v;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return (v >> 8) | ((v & 0xff) << 8);
#else
#error "Unknown endianess"
#endif
}

/* Macro defining how many times the most nested loop in
_mum_hash_aligned will be unrolled by the compiler (although it can
make an own decision:). Use only a constant here to help a
Expand All @@ -203,8 +207,10 @@ _mum_le32 (uint32_t v) {
#define _MUM_UNROLL_FACTOR_POWER 3
#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
#define _MUM_UNROLL_FACTOR_POWER 4
#else
#elif defined (MUM_V1) || defined (MUM_V2)
#define _MUM_UNROLL_FACTOR_POWER 2
#else
#define _MUM_UNROLL_FACTOR_POWER 3
#endif
#endif

Expand All @@ -216,21 +222,35 @@ _mum_le32 (uint32_t v) {

#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)

/* Rotate V left by SH. */
static inline uint64_t _mum_rotl (uint64_t v, int sh) {
return v << sh | v >> (64 - sh);
}

static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
_mum_hash_aligned (uint64_t start, const void* key, size_t len) {
_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
uint64_t result = start;
const unsigned char* str = (const unsigned char*) key;
const unsigned char *str = (const unsigned char *) key;
uint64_t u64;
size_t i;
size_t n;


#ifndef MUM_V2
result = _mum (result, _mum_block_start_prime);
#endif
while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
/* This loop could be vectorized when we have vector insns for
64x64->128-bit multiplication. AVX2 currently only have a
vector insn for 4 32x32->64-bit multiplication. */
64x64->128-bit multiplication. AVX2 currently only have vector
insns for 4 32x32->64-bit multiplication and for 1
64x64->128-bit multiplication (pclmulqdq). */
#if defined (MUM_V1) || defined (MUM_V2)
for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
#else
for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2)
result ^= _mum (_mum_le (((uint64_t *) str)[i]) ^ _mum_primes[i],
_mum_le (((uint64_t *) str)[i + 1]) ^ _mum_primes[i + 1]);
#endif
len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
/* We will use the same prime numbers on the next iterations --
Expand All @@ -244,14 +264,12 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) {
switch (len) {
case 7:
u64 = _mum_le32 (*(uint32_t *) str);
u64 |= (uint64_t) str[4] << 32;
u64 |= (uint64_t) str[5] << 40;
u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
u64 |= (uint64_t) str[6] << 48;
return result ^ _mum (u64, _mum_tail_prime);
case 6:
u64 = _mum_le32 (*(uint32_t *) str);
u64 |= (uint64_t) str[4] << 32;
u64 |= (uint64_t) str[5] << 40;
u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32;
return result ^ _mum (u64, _mum_tail_prime);
case 5:
u64 = _mum_le32 (*(uint32_t *) str);
Expand All @@ -261,13 +279,11 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) {
u64 = _mum_le32 (*(uint32_t *) str);
return result ^ _mum (u64, _mum_tail_prime);
case 3:
u64 = str[0];
u64 |= (uint64_t) str[1] << 8;
u64 = _mum_le16 (*(uint16_t *) str);
u64 |= (uint64_t) str[2] << 16;
return result ^ _mum (u64, _mum_tail_prime);
case 2:
u64 = str[0];
u64 |= (uint64_t) str[1] << 8;
u64 = _mum_le16 (*(uint16_t *) str);
return result ^ _mum (u64, _mum_tail_prime);
case 1:
u64 = str[0];
Expand All @@ -279,23 +295,18 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) {
/* Final randomization of H. */
static inline uint64_t
_mum_final (uint64_t h) {
#if defined (MUM_V1)
h ^= _mum (h, _mum_finish_prime1);
h ^= _mum (h, _mum_finish_prime2);
#elif defined (MUM_V2)
h ^= _mum_rotl (h, 33);
h ^= _mum (h, _mum_finish_prime1);
#else
h = _mum (h, h);
#endif
return h;
}

#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)

/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
it is possible. Although on modern Intel processors MULQ takes
3-cycles vs. 4 for MULX, MULX permits more freedom in insn
scheduling as it uses less fixed registers. */
static inline uint64_t _MUM_TARGET("arch=haswell")
_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
return _mum_final (_mum_hash_aligned (seed + len, key, len));
}
#endif

#ifndef _MUM_UNALIGNED_ACCESS
#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
|| defined(__s390__) || defined(__m32c__) || defined(cris) \
Expand All @@ -320,16 +331,16 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {

static inline uint64_t
#if defined(__x86_64__)
//_MUM_TARGET("inline-all-stringops")
_MUM_TARGET("inline-all-stringops")
#endif
_mum_hash_default (const void *key, size_t len, uint64_t seed) {
uint64_t result;
const unsigned char *str = (const unsigned char *) key;
size_t block_len;
uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];

result = seed + len;
if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
if (((size_t) str & 0x7) == 0)
result = _mum_hash_aligned (result, key, len);
else {
while (len != 0) {
Expand All @@ -347,7 +358,7 @@ static inline uint64_t
_mum_next_factor (void) {
uint64_t start = 0;
int i;

for (i = 0; i < 8; i++)
start = (start << 8) | rand() % 256;
return start;
Expand Down Expand Up @@ -380,8 +391,7 @@ mum_hash_init (uint64_t seed) {

/* Process data KEY with the state H and return the updated state. */
static inline uint64_t
mum_hash_step (uint64_t h, uint64_t key)
{
mum_hash_step (uint64_t h, uint64_t key) {
return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
}

Expand All @@ -402,19 +412,11 @@ mum_hash64 (uint64_t key, uint64_t seed) {
target endianess and the unroll factor. */
static inline uint64_t
mum_hash (const void *key, size_t len, uint64_t seed) {
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
static int avx2_support = 0;

if (avx2_support > 0)
return _mum_hash_avx2 (key, len, seed);
else if (! avx2_support) {
__builtin_cpu_init ();
avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
if (avx2_support > 0)
return _mum_hash_avx2 (key, len, seed);
}
#endif
#if _MUM_UNALIGNED_ACCESS
return _mum_final (_mum_hash_aligned (seed + len, key, len));
#else
return _mum_hash_default (key, len, seed);
#endif
}

#endif
5 changes: 5 additions & 0 deletions mum_v3.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "mum.h"

void mum_v3_hash_test(const void *key, int len, uint32_t seed, void *out) {
*(uint64_t *)out = mum_hash(key, len, seed);
}

0 comments on commit 54c873b

Please sign in to comment.