diff --git a/README.md b/README.md index b895f29..23a7b08 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,37 @@ func main() { } ``` +Includes a helper function for using a `string` as key instead of an `uint64`. This requires a hasher that computes the string into a format accepted by `Hash()`. Such a hasher that uses [CRC-64 (ECMA)](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) is also included for convenience. + +```go +h := jump.HashString("127.0.0.1", 8, jump.CRC64) // h = 7 +``` + +If you want to use your own algorithm, you must implement the `Hasher` interface, which is a subset of the `hash.Hash64` interface available in the standard library. + +Here's an example of a custom `Hasher` that uses Google's [FarmHash](https://github.com/google/farmhash) algorithm (the successor of CityHash) to compute the final key. + +```go +type FarmHash struct { + buf bytes.Buffer +} + +func (f *FarmHash) Write(p []byte) (n int, err error) { + return f.buf.Write(p) +} + +func (f *FarmHash) Reset() { + f.buf.Reset() +} + +func (f *FarmHash) Sum64() uint64 { + // https://github.com/dgryski/go-farm + return farm.Hash64(f.buf.Bytes()) +} + +h := jump.HashString("127.0.0.1", 8, &FarmHash{}) // h = 5 +``` + ## License MIT diff --git a/jump.go b/jump.go index c379c4c..e63b579 100644 --- a/jump.go +++ b/jump.go @@ -1,5 +1,12 @@ package jump +import ( + "hash" + "hash/crc64" + "hash/fnv" + "io" +) + // Hash takes a 64 bit key and the number of buckets. It outputs a bucket // number in the range [0, buckets). func Hash(key uint64, buckets int32) int32 { @@ -17,3 +24,40 @@ func Hash(key uint64, buckets int32) int32 { return int32(b) } + +// Hasher is a subset of hash.Hash64 in the standard library. +type Hasher interface { + // Write (via the embedded io.Writer interface) adds more data to the + // running hash. + // It never returns an error. + io.Writer + + // Reset resets the Hasher to its initial state. + Reset() + + // Return the result of the added bytes (via io.Writer). + Sum64() uint64 +} + +// HashString takes string as key instead of an int and uses a Hasher to +// generate a key compatible with Hash(). +func HashString(key string, buckets int32, h Hasher) int32 { + h.Reset() + _, err := io.WriteString(h, key) + if err != nil { + panic(err) + } + return Hash(h.Sum64(), buckets) +} + +// Create some Hashers available in the standard library for use with +// HashString(). +var ( + // CRC64 uses the 64-bit Cyclic Redundancy Check (CRC-64) with ECMA + // polynomial. + CRC64 hash.Hash64 = crc64.New(crc64.MakeTable(crc64.ECMA)) + // FNV1 uses the non-cryptographic hash function FNV-1 + FNV1 hash.Hash64 = fnv.New64() + // FNV1a uses the non-cryptographic hash function FNV-1a + FNV1a hash.Hash64 = fnv.New64a() +) diff --git a/jump_test.go b/jump_test.go index 34460fc..5cd989e 100644 --- a/jump_test.go +++ b/jump_test.go @@ -2,16 +2,16 @@ package jump import ( "fmt" + "hash" + "strconv" "testing" ) -type testVector struct { - key uint64 - buckets int32 - output int32 -} - -var jumpTestVectors = []testVector{ +var jumpTestVectors = []struct { + key uint64 + buckets int32 + expected int32 +}{ {1, 1, 0}, {42, 57, 43}, {0xDEAD10CC, 1, 0}, @@ -25,8 +25,32 @@ var jumpTestVectors = []testVector{ func TestJumpHash(t *testing.T) { for _, v := range jumpTestVectors { h := Hash(v.key, v.buckets) - if h != v.output { - t.Errorf("expected bucket to be %d, got %d", v.output, h) + if h != v.expected { + t.Errorf("expected bucket for key=%d to be %d, got %d", + v.key, v.expected, h) + } + } +} + +var jumpStringTestVectors = []struct { + key string + buckets int32 + hasher hash.Hash64 + expected int32 +}{ + {"localhost", 10, CRC64, 6}, + {"ёлка", 10, CRC64, 6}, + {"ветер", 10, FNV1, 3}, + {"中国", 10, FNV1a, 5}, + {"日本", 10, CRC64, 6}, +} + +func TestJumpHashString(t *testing.T) { + for _, v := range jumpStringTestVectors { + h := HashString(v.key, v.buckets, v.hasher) + if h != v.expected { + t.Errorf("expected bucket for key=%s to be %d, got %d", + strconv.Quote(v.key), v.expected, h) } } } @@ -36,8 +60,34 @@ func ExampleHash() { // Output: 520 } +func ExampleHashString() { + fmt.Print(HashString("127.0.0.1", 8, CRC64)) + // Output: 7 +} + func BenchmarkHash(b *testing.B) { for i := 0; i < b.N; i++ { Hash(uint64(i), int32(i)) } } + +func BenchmarkHashStringCRC64(b *testing.B) { + s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat." + for i := 0; i < b.N; i++ { + HashString(s, int32(i), CRC64) + } +} + +func BenchmarkHashStringFNV1(b *testing.B) { + s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat." + for i := 0; i < b.N; i++ { + HashString(s, int32(i), FNV1) + } +} + +func BenchmarkHashStringFNV1a(b *testing.B) { + s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat." + for i := 0; i < b.N; i++ { + HashString(s, int32(i), FNV1a) + } +}