diff --git a/erigon-lib/seg/decompress.go b/erigon-lib/seg/decompress.go index 8621b2a4ab8..ca257a11bae 100644 --- a/erigon-lib/seg/decompress.go +++ b/erigon-lib/seg/decompress.go @@ -1066,31 +1066,27 @@ func (g *Getter) FastNext(buf []byte) ([]byte, uint64) { return buf[:wordLen], postLoopPos } -// BinarySearch - !expecting sorted file -func (g *Getter) BinarySearch(fromPrefix []byte, count int, f func(i uint64) (offset uint64)) (value []byte, ok bool) { +// BinarySearch - !expecting sorted file - does Seek `g` to key which >= `fromPrefix` by using BinarySearch - means unoptimal and touching many places in file +// use `.Next` to read found +// at `ok = false` leaving `g` in unpredictible state +func (g *Getter) BinarySearch(seek []byte, count int, getOffset func(i uint64) (offset uint64)) (foundOffset uint64, ok bool) { + var key []byte foundItem := sort.Search(count, func(i int) bool { - offset := f(uint64(i)) + offset := getOffset(uint64(i)) g.Reset(offset) if g.HasNext() { - key, _ := g.Next(nil) - return bytes.Compare(key, fromPrefix) >= 0 + key, _ = g.Next(key[:0]) + return bytes.Compare(key, seek) >= 0 } return false }) if foundItem == count { // `Search` returns `n` if not found - return nil, false + return 0, false } - foundOffset := f(uint64(foundItem)) + foundOffset = getOffset(uint64(foundItem)) g.Reset(foundOffset) - if g.HasNext() { - value, _ = g.Next(nil) - } - if dbg.AssertEnabled && foundItem > 2 { - g.Reset(f(uint64(foundItem - 2))) // prev key - prevKey, _ := g.Next(nil) - if bytes.Compare(prevKey, fromPrefix) >= 0 { - panic(fmt.Errorf("see smaller key: fromPrefix=%x, prevKey=%x", fromPrefix, prevKey)) - } + if !g.HasNext() { + return 0, false } - return value, true + return foundOffset, true } diff --git a/erigon-lib/seg/decompress_test.go b/erigon-lib/seg/decompress_test.go index f77dc51d325..9e6ba470323 100644 --- a/erigon-lib/seg/decompress_test.go +++ b/erigon-lib/seg/decompress_test.go @@ -297,33 +297,43 @@ func TestUncompressed(t *testing.T) { offsets = append(offsets, offset) } - t.Run("BinarySearch", func(t *testing.T) { + t.Run("BinarySearch middle", func(t *testing.T) { require := require.New(t) - k, ok := g.BinarySearch([]byte("ipsum"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok := g.BinarySearch([]byte("ipsum"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ := g.Next(nil) require.Equal("ipsum 38", string(k)) - k, ok = g.BinarySearch([]byte("ipsu"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok = g.BinarySearch([]byte("ipsu"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ = g.Next(nil) require.Equal("ipsum 38", string(k)) - + }) + t.Run("BinarySearch end of file", func(t *testing.T) { + require := require.New(t) //last word is `voluptate` - k, ok = g.BinarySearch([]byte("voluptate"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok := g.BinarySearch([]byte("voluptate"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ := g.Next(nil) require.Equal("voluptate 69", string(k)) - k, ok = g.BinarySearch([]byte("voluptat"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok = g.BinarySearch([]byte("voluptat"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ = g.Next(nil) require.Equal("voluptate 69", string(k)) - k, ok = g.BinarySearch([]byte("voluptatez"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok = g.BinarySearch([]byte("voluptatez"), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.False(ok) - require.Equal("", string(k)) + }) + t.Run("BinarySearch begin of file", func(t *testing.T) { + require := require.New(t) //first word is `` - k, ok = g.BinarySearch([]byte(""), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok := g.BinarySearch([]byte(""), d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ := g.Next(nil) require.Equal("", string(k)) - k, ok = g.BinarySearch(nil, d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) + _, ok = g.BinarySearch(nil, d.Count(), func(i uint64) (offset uint64) { return offsets[i] }) require.True(ok) + k, _ = g.Next(nil) require.Equal("", string(k)) }) diff --git a/erigon-lib/state/history.go b/erigon-lib/state/history.go index 483b572cd31..8f2f85f2cf6 100644 --- a/erigon-lib/state/history.go +++ b/erigon-lib/state/history.go @@ -1365,7 +1365,11 @@ func (ht *HistoryRoTx) WalkAsOf(ctx context.Context, startTxNum uint64, from, to var offset uint64 if len(from) > 0 { n := item.src.decompressor.Count() / 2 - offset = g.BinarySearch(from, n, idx.OrdinalLookup) + var ok bool + offset, ok = g.BinarySearch(from, n, idx.OrdinalLookup) + if !ok { + offset = 0 + } } g.Reset(offset) if g.HasNext() {