Skip to content

Commit

Permalink
Add helper function to use a string as key
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Renström committed Aug 20, 2015
1 parent 7705bc8 commit 5e49b75
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 9 deletions.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,37 @@ func main() {
}
```

Includes a helper function for using a `string` as key instead of an `uint64`. This requires a hasher that computes the string into a format accepted by `Hash()`. Such a hasher that uses [CRC-64 (ECMA)](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) is also included for convenience.

```go
h := jump.HashString("127.0.0.1", 8, jump.CRC64) // h = 7
```

If you want to use your own algorithm, you must implement the `Hasher` interface, which is a subset of the `hash.Hash64` interface available in the standard library.

Here's an example of a custom `Hasher` that uses Google's [FarmHash](https://github.com/google/farmhash) algorithm (the successor of CityHash) to compute the final key.

```go
type FarmHash struct {
buf bytes.Buffer
}

func (f *FarmHash) Write(p []byte) (n int, err error) {
return f.buf.Write(p)
}

func (f *FarmHash) Reset() {
f.buf.Reset()
}

func (f *FarmHash) Sum64() uint64 {
// https://github.com/dgryski/go-farm
return farm.Hash64(f.buf.Bytes())
}

h := jump.HashString("127.0.0.1", 8, &FarmHash{}) // h = 5
```

## License

MIT
44 changes: 44 additions & 0 deletions jump.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package jump

import (
"hash"
"hash/crc64"
"hash/fnv"
"io"
)

// Hash takes a 64 bit key and the number of buckets. It outputs a bucket
// number in the range [0, buckets).
func Hash(key uint64, buckets int32) int32 {
Expand All @@ -17,3 +24,40 @@ func Hash(key uint64, buckets int32) int32 {

return int32(b)
}

// Hasher is a subset of hash.Hash64 in the standard library.
type Hasher interface {
// Write (via the embedded io.Writer interface) adds more data to the
// running hash.
// It never returns an error.
io.Writer

// Reset resets the Hasher to its initial state.
Reset()

// Return the result of the added bytes (via io.Writer).
Sum64() uint64
}

// HashString takes string as key instead of an int and uses a Hasher to
// generate a key compatible with Hash().
func HashString(key string, buckets int32, h Hasher) int32 {
h.Reset()
_, err := io.WriteString(h, key)
if err != nil {
panic(err)
}
return Hash(h.Sum64(), buckets)
}

// Create some Hashers available in the standard library for use with
// HashString().
var (
// CRC64 uses the 64-bit Cyclic Redundancy Check (CRC-64) with ECMA
// polynomial.
CRC64 hash.Hash64 = crc64.New(crc64.MakeTable(crc64.ECMA))
// FNV1 uses the non-cryptographic hash function FNV-1
FNV1 hash.Hash64 = fnv.New64()
// FNV1a uses the non-cryptographic hash function FNV-1a
FNV1a hash.Hash64 = fnv.New64a()
)
68 changes: 59 additions & 9 deletions jump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ package jump

import (
"fmt"
"hash"
"strconv"
"testing"
)

type testVector struct {
key uint64
buckets int32
output int32
}

var jumpTestVectors = []testVector{
var jumpTestVectors = []struct {
key uint64
buckets int32
expected int32
}{
{1, 1, 0},
{42, 57, 43},
{0xDEAD10CC, 1, 0},
Expand All @@ -25,8 +25,32 @@ var jumpTestVectors = []testVector{
func TestJumpHash(t *testing.T) {
for _, v := range jumpTestVectors {
h := Hash(v.key, v.buckets)
if h != v.output {
t.Errorf("expected bucket to be %d, got %d", v.output, h)
if h != v.expected {
t.Errorf("expected bucket for key=%d to be %d, got %d",
v.key, v.expected, h)
}
}
}

var jumpStringTestVectors = []struct {
key string
buckets int32
hasher hash.Hash64
expected int32
}{
{"localhost", 10, CRC64, 6},
{"ёлка", 10, CRC64, 6},
{"ветер", 10, FNV1, 3},
{"中国", 10, FNV1a, 5},
{"日本", 10, CRC64, 6},
}

func TestJumpHashString(t *testing.T) {
for _, v := range jumpStringTestVectors {
h := HashString(v.key, v.buckets, v.hasher)
if h != v.expected {
t.Errorf("expected bucket for key=%s to be %d, got %d",
strconv.Quote(v.key), v.expected, h)
}
}
}
Expand All @@ -36,8 +60,34 @@ func ExampleHash() {
// Output: 520
}

func ExampleHashString() {
fmt.Print(HashString("127.0.0.1", 8, CRC64))
// Output: 7
}

func BenchmarkHash(b *testing.B) {
for i := 0; i < b.N; i++ {
Hash(uint64(i), int32(i))
}
}

func BenchmarkHashStringCRC64(b *testing.B) {
s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."
for i := 0; i < b.N; i++ {
HashString(s, int32(i), CRC64)
}
}

func BenchmarkHashStringFNV1(b *testing.B) {
s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."
for i := 0; i < b.N; i++ {
HashString(s, int32(i), FNV1)
}
}

func BenchmarkHashStringFNV1a(b *testing.B) {
s := "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."
for i := 0; i < b.N; i++ {
HashString(s, int32(i), FNV1a)
}
}

0 comments on commit 5e49b75

Please sign in to comment.