-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathcfilter.go
99 lines (82 loc) · 2.77 KB
/
cfilter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package cfilter
import (
"hash"
"math/rand"
)
// CFilter represents a Cuckoo Filter, a probabilistic data store
// for approximated set membership queries.
type CFilter struct {
hashfn hash.Hash // Hash function used for fingerprinting
buckets []bucket // Buckets where fingerprints are stored
count uint // Total number of elements currently in the Filter
bSize uint8 // Bucket size
fpSize uint8 // Fingerprint size
size uint // Number of buckets in the filter
kicks uint // Maximum number of times we kick down items from buckets
}
// New returns a new CFilter object. It's Insert, Lookup, Delete and
// Size behave as their names suggest.
// Takes zero or more of the following option functions and applies them in
// order to the Filter:
// - cfilter.Size(uint) sets the number of buckets in the filter
// - cfilter.BucketSize(uint8) sets the size of each bucket
// - cfilter.FingerprintSize(uint8) sets the size of the fingerprint
// - cfilter.MaximumKicks(uint) sets the maximum number of bucket kicks
// - cfilter.HashFn(hash.Hash) sets the fingerprinting hashing function
func New(opts ...option) *CFilter {
cf := new(CFilter)
for _, opt := range opts {
opt(cf)
}
configure(cf)
cf.buckets = make([]bucket, cf.size, cf.size)
for i := range cf.buckets {
cf.buckets[i] = make([]fingerprint, cf.bSize, cf.bSize)
}
return cf
}
// Insert adds an element (in byte-array form) to the Cuckoo filter,
// returns true if successful and false otherwise.
func (cf *CFilter) Insert(item []byte) bool {
f := fprint(item, cf.fpSize, cf.hashfn)
j := hashfp(item) % cf.size
k := (j ^ hashfp(f)) % cf.size
if cf.buckets[j].insert(f) || cf.buckets[k].insert(f) {
cf.count++
return true
}
i := [2]uint{j, k}[rand.Intn(2)]
for n := uint(0); n < cf.kicks; n++ {
f = cf.buckets[i].swap(f)
i = (i ^ hashfp(f)) % cf.size
if cf.buckets[i].insert(f) {
cf.count++
return true
}
}
return false
}
// Lookup checks if an element (in byte-array form) exists in the Cuckoo
// Filter, returns true if found and false otherwise.
func (cf *CFilter) Lookup(item []byte) bool {
f := fprint(item, cf.fpSize, cf.hashfn)
j := hashfp(item) % cf.size
k := (j ^ hashfp(f)) % cf.size
return cf.buckets[j].lookup(f) || cf.buckets[k].lookup(f)
}
// Delete removes an element (in byte-array form) from the Cuckoo Filter,
// returns true if element existed prior and false otherwise.
func (cf *CFilter) Delete(item []byte) bool {
f := fprint(item, cf.fpSize, cf.hashfn)
j := hashfp(item) % cf.size
k := (j ^ hashfp(f)) % cf.size
if cf.buckets[j].remove(f) || cf.buckets[k].remove(f) {
cf.count--
return true
}
return false
}
// Count returns the total number of elements currently in the Cuckoo Filter.
func (cf *CFilter) Count() uint {
return cf.count
}