Skip to content

Commit

Permalink
headerfs: add sub buckets to index
Browse files Browse the repository at this point in the history
With this commit we store the index keys (hash->height) in sub-
buckets with the first two bytes of the hash as the bucket name.
Storing a large number of keys in the same bucket has a large
impact on memory usage in bbolt if small-ish batch sizes are
used (the b+ tree needs to be copied with every resize operation).
Using sub buckets is a compromise between memory usage and
access time. 2 bytes (=max 65535 sub buckets) seems to be the
sweet spot (-50% memory usage, +30% access time). We take the
bytes from the beginning of the byte-serialized hash since all
Bitcoin hashes are reverse-serialized when displayed as
strings. That means the leading zeroes of a block hash
are actually at the end of the byte slice.
  • Loading branch information
guggero committed Mar 1, 2021
1 parent d9c1bf6 commit aa2c1b0
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 7 deletions.
50 changes: 46 additions & 4 deletions headerfs/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ const (
// RegularFilterHeaderSize is the size in bytes of the RegularFilter
// header type.
RegularFilterHeaderSize = 32

// numSubBucketBytes is the number of bytes of a hash that's used as a
// sub bucket to store the index keys (hash->height) in. Storing a large
// number of keys in the same bucket has a large impact on memory usage
// in bbolt if small-ish batch sizes are used (the b+ tree needs to be
// copied with every resize operation). Using sub buckets is a
// compromise between memory usage and access time. 2 bytes (=max 65535
// sub buckets) seems to be the sweet spot (-50% memory usage,
// +30% access time). We take the bytes from the beginning of the byte-
// serialized hash since all Bitcoin hashes are reverse-serialized when
// displayed as strings. That means the leading zeroes of a block hash
// are actually at the end of the byte slice.
numSubBucketBytes = 2
)

// headerIndex is an index stored within the database that allows for random
Expand Down Expand Up @@ -171,9 +184,19 @@ func (h *headerIndex) addHeaders(batch headerBatch) error {
)

for _, header := range batch {
// Place key in a sub bucket to improve bbolt memory
// behavior at the expense of a slight increase in
// access latency.
subBucket, err := rootBucket.CreateBucketIfNotExists(
header.hash[0:numSubBucketBytes],
)
if err != nil {
return err
}

var heightBytes [4]byte
binary.BigEndian.PutUint32(heightBytes[:], header.height)
err := rootBucket.Put(header.hash[:], heightBytes[:])
err = subBucket.Put(header.hash[:], heightBytes[:])
if err != nil {
return err
}
Expand Down Expand Up @@ -216,7 +239,12 @@ func (h *headerIndex) heightFromHashWithTx(tx walletdb.ReadTx,

rootBucket := tx.ReadBucket(indexBucket)

heightBytes := rootBucket.Get(hash[:])
subBucket := rootBucket.NestedReadBucket(hash[0:numSubBucketBytes])
if subBucket == nil {
return 0, ErrHashNotFound
}

heightBytes := subBucket.Get(hash[:])
if heightBytes == nil {
// If the hash wasn't found, then we don't know of this hash
// within the index.
Expand Down Expand Up @@ -270,7 +298,14 @@ func (h *headerIndex) chainTipWithTx(tx walletdb.ReadTx) (*chainhash.Hash,
// fetch the hash for this tip, then using that we'll fetch the height
// that corresponds to that hash.
tipHashBytes := rootBucket.Get(tipKey)
tipHeightBytes := rootBucket.Get(tipHashBytes)

subBucket := rootBucket.NestedReadBucket(
tipHashBytes[0:numSubBucketBytes],
)
if subBucket == nil {
return nil, 0, ErrHashNotFound
}
tipHeightBytes := subBucket.Get(tipHashBytes)
if len(tipHeightBytes) != 4 {
return nil, 0, ErrHeightNotFound
}
Expand Down Expand Up @@ -312,7 +347,14 @@ func (h *headerIndex) truncateIndex(newTip *chainhash.Hash, delete bool) error {
// being rolled back.
if delete {
prevTipHash := rootBucket.Get(tipKey)
if err := rootBucket.Delete(prevTipHash); err != nil {

subBucket := rootBucket.NestedReadWriteBucket(
prevTipHash[0:numSubBucketBytes],
)
if subBucket == nil {
return ErrHashNotFound
}
if err := subBucket.Delete(prevTipHash); err != nil {
return err
}
}
Expand Down
27 changes: 24 additions & 3 deletions headerfs/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,16 @@ func TestFilterHeaderStoreOperations(t *testing.T) {
rootBucket := tx.ReadWriteBucket(indexBucket)

for _, header := range blockHeaders {
subBucket, err := rootBucket.CreateBucketIfNotExists(
header.HeaderHash[0:numSubBucketBytes],
)
if err != nil {
return err
}

var heightBytes [4]byte
binary.BigEndian.PutUint32(heightBytes[:], header.Height)
err := rootBucket.Put(header.HeaderHash[:], heightBytes[:])
err = subBucket.Put(header.HeaderHash[:], heightBytes[:])
if err != nil {
return err
}
Expand Down Expand Up @@ -399,9 +406,16 @@ func TestFilterHeaderStoreRecovery(t *testing.T) {
rootBucket := tx.ReadWriteBucket(indexBucket)

for _, header := range blockHeaders {
subBucket, err := rootBucket.CreateBucketIfNotExists(
header.HeaderHash[0:numSubBucketBytes],
)
if err != nil {
return err
}

var heightBytes [4]byte
binary.BigEndian.PutUint32(heightBytes[:], header.Height)
err := rootBucket.Put(header.HeaderHash[:], heightBytes[:])
err = subBucket.Put(header.HeaderHash[:], heightBytes[:])
if err != nil {
return err
}
Expand Down Expand Up @@ -537,11 +551,18 @@ func TestFilterHeaderStateAssertion(t *testing.T) {
rootBucket := tx.ReadWriteBucket(indexBucket)

for _, header := range filterHeaderChain {
subBucket, err := rootBucket.CreateBucketIfNotExists(
header.HeaderHash[0:numSubBucketBytes],
)
if err != nil {
return err
}

var heightBytes [4]byte
binary.BigEndian.PutUint32(
heightBytes[:], header.Height,
)
err := rootBucket.Put(
err = subBucket.Put(
header.HeaderHash[:], heightBytes[:],
)
if err != nil {
Expand Down

0 comments on commit aa2c1b0

Please sign in to comment.