From 93ecfcb01254a689282bb8be5155e298085626c6 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 8 Feb 2024 00:29:19 +0000 Subject: [PATCH] fix(deps): update module github.com/roaringbitmap/roaring to v1.9.0 --- go.mod | 4 +- go.sum | 4 + .../RoaringBitmap/roaring/.drone.yml | 1 - .../github.com/RoaringBitmap/roaring/Makefile | 107 ---- .../RoaringBitmap/roaring/README.md | 12 +- .../RoaringBitmap/roaring/arraycontainer.go | 11 +- .../RoaringBitmap/roaring/bitmapcontainer.go | 1 - .../roaring/internal/byte_input.go | 63 ++- .../RoaringBitmap/roaring/roaring.go | 267 ++++++++-- .../RoaringBitmap/roaring/roaringarray.go | 19 +- .../RoaringBitmap/roaring/runcontainer.go | 48 +- .../RoaringBitmap/roaring/serialization.go | 1 - .../roaring/serialization_littleendian.go | 94 ++-- .../bits-and-blooms/bitset/README.md | 65 ++- .../bits-and-blooms/bitset/SECURITY.md | 5 + .../bits-and-blooms/bitset/bitset.go | 455 ++++++++++++------ .../bits-and-blooms/bitset/popcnt_19.go | 17 + .../bits-and-blooms/bitset/popcnt_amd64.go | 4 +- .../bits-and-blooms/bitset/popcnt_generic.go | 1 + .../bitset/trailing_zeros_18.go | 1 + .../bitset/trailing_zeros_19.go | 1 + vendor/modules.txt | 6 +- 22 files changed, 808 insertions(+), 379 deletions(-) delete mode 100644 vendor/github.com/RoaringBitmap/roaring/Makefile create mode 100644 vendor/github.com/bits-and-blooms/bitset/SECURITY.md diff --git a/go.mod b/go.mod index 5b4f19f6..475818ab 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/longhorn/backing-image-manager go 1.21 require ( - github.com/RoaringBitmap/roaring v1.2.3 + github.com/RoaringBitmap/roaring v1.9.0 github.com/golang/protobuf v1.5.3 github.com/gorilla/mux v1.8.0 github.com/longhorn/backupstore v0.0.0-20240207023845-915898cee71c @@ -25,7 +25,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v0.3.0 // indirect github.com/aws/aws-sdk-go v1.34.2 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/bits-and-blooms/bitset v1.12.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/felixge/httpsnoop v1.0.1 // indirect diff --git a/go.sum b/go.sum index 9b66a364..a29dc41f 100644 --- a/go.sum +++ b/go.sum @@ -7,6 +7,8 @@ github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v0.3.0/go.mod h1:tPaiy8S5bQ github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= +github.com/RoaringBitmap/roaring v1.9.0 h1:lwKhr90/j0jVXJyh5X+vQN1VVn77rQFfYnh6RDRGCcE= +github.com/RoaringBitmap/roaring v1.9.0/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= github.com/aws/aws-sdk-go v1.34.2 h1:9vCknCdTAmmV4ht7lPuda7aJXzllXwEQyCMZKJHjBrM= github.com/aws/aws-sdk-go v1.34.2/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -14,6 +16,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= diff --git a/vendor/github.com/RoaringBitmap/roaring/.drone.yml b/vendor/github.com/RoaringBitmap/roaring/.drone.yml index 698cd0e7..7936bfe8 100644 --- a/vendor/github.com/RoaringBitmap/roaring/.drone.yml +++ b/vendor/github.com/RoaringBitmap/roaring/.drone.yml @@ -11,7 +11,6 @@ steps: commands: - go get -t - go test - - go test -race -run TestConcurrent* - go build -tags appengine - go test -tags appengine - GOARCH=386 go build diff --git a/vendor/github.com/RoaringBitmap/roaring/Makefile b/vendor/github.com/RoaringBitmap/roaring/Makefile deleted file mode 100644 index 0a4f9f0a..00000000 --- a/vendor/github.com/RoaringBitmap/roaring/Makefile +++ /dev/null @@ -1,107 +0,0 @@ -.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets - - - - - - - - -# Display general help about this command -help: - @echo "" - @echo "The following commands are available:" - @echo "" - @echo " make qa : Run all the tests" - @echo " make test : Run the unit tests" - @echo "" - @echo " make format : Format the source code" - @echo " make fmtcheck : Check if the source code has been formatted" - @echo " make vet : Check for suspicious constructs" - @echo " make lint : Check for style errors" - @echo "" - @echo " make deps : Get the dependencies" - @echo " make clean : Remove any build artifact" - @echo " make nuke : Deletes any intermediate file" - @echo "" - @echo " make fuzz-smat : Fuzzy testing with smat" - @echo " make fuzz-stream : Fuzzy testing with stream deserialization" - @echo " make fuzz-buffer : Fuzzy testing with buffer deserialization" - @echo "" - -# Alias for help target -all: help -test: - go test - go test -race -run TestConcurrent* -# Format the source code -format: - @find ./ -type f -name "*.go" -exec gofmt -w {} \; - -# Check if the source code has been formatted -fmtcheck: - @mkdir -p target - @find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff - @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } - -# Check for syntax errors -vet: - GOPATH=$(GOPATH) go vet ./... - -# Check for style errors -lint: - GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./... - - - - - -# Alias to run all quality-assurance checks -qa: fmtcheck test vet lint - -# --- INSTALL --- - -# Get the dependencies -deps: - GOPATH=$(GOPATH) go get github.com/stretchr/testify - GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset - GOPATH=$(GOPATH) go get github.com/golang/lint/golint - GOPATH=$(GOPATH) go get github.com/mschoch/smat - GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz - GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build - GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream - GOPATH=$(GOPATH) go get github.com/philhofer/fwd - GOPATH=$(GOPATH) go get github.com/jtolds/gls - -fuzz-smat: - go test -tags=gofuzz -run=TestGenerateSmatCorpus - go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring - go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 - - -fuzz-stream: - go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring - go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 - - -fuzz-buffer: - go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring - go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 - -# Remove any build artifact -clean: - GOPATH=$(GOPATH) go clean ./... - -# Deletes any intermediate file -nuke: - rm -rf ./target - GOPATH=$(GOPATH) go clean -i ./... - -cover: - go test -coverprofile=coverage.out - go tool cover -html=coverage.out - -fetch-real-roaring-datasets: - # pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets - git submodule init - git submodule update diff --git a/vendor/github.com/RoaringBitmap/roaring/README.md b/vendor/github.com/RoaringBitmap/roaring/README.md index 753b8068..8c780afd 100644 --- a/vendor/github.com/RoaringBitmap/roaring/README.md +++ b/vendor/github.com/RoaringBitmap/roaring/README.md @@ -1,5 +1,7 @@ -roaring [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) -[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring) +# roaring + +[![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) + ![Go-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-CI/badge.svg) ![Go-ARM-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-ARM-CI/badge.svg) ![Go-Windows-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-Windows-CI/badge.svg) @@ -31,10 +33,10 @@ Roaring bitmaps are found to work well in many important applications: The ``roaring`` Go library is used by * [anacrolix/torrent] -* [runv](https://github.com/hyperhq/runv) * [InfluxDB](https://www.influxdata.com) * [Pilosa](https://www.pilosa.com/) * [Bleve](http://www.blevesearch.com) +* [Weaviate](https://github.com/weaviate/weaviate) * [lindb](https://github.com/lindb/lindb) * [Elasticell](https://github.com/deepfabric/elasticell) * [SourceGraph](https://github.com/sourcegraph/sourcegraph) @@ -99,7 +101,7 @@ whether you like it or not. That can become very wasteful. This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful. For example, if you have a small universe size. E.g., your bitmaps represent sets of integers -from [0,n) where n is small (e.g., n=64 or n=128). If you are able to uncompressed BitSet and +from [0,n) where n is small (e.g., n=64 or n=128). If you can use uncompressed BitSet and it does not blow up your memory usage, then compressed bitmaps are probably not useful to you. In fact, if you do not need compression, then a BitSet offers remarkable speed. @@ -134,7 +136,7 @@ There is a big problem with these formats however that can hurt you badly in som Roaring solves this problem. It works in the following manner. It divides the data into chunks of 216 integers (e.g., [0, 216), [216, 2 x 216), ...). Within a chunk, it can use an uncompressed bitmap, a simple list of integers, -or a list of runs. Whatever format it uses, they all allow you to check for the present of any one value quickly +or a list of runs. Whatever format it uses, they all allow you to check for the presence of any one value quickly (e.g., with a binary search). The net result is that Roaring can compute many operations much faster than run-length-encoded formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally offers better compression ratios. diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go index 9541fd53..a575caff 100644 --- a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go @@ -17,8 +17,17 @@ func (ac *arrayContainer) String() string { } func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) int { + if i < 0 { + panic("negative index") + } + if len(ac.content) == 0 { + return i + } + _ = x[len(ac.content)-1+i] + _ = ac.content[len(ac.content)-1] for k := 0; k < len(ac.content); k++ { - x[k+i] = uint32(ac.content[k]) | mask + x[k+i] = + uint32(ac.content[k]) | mask } return i + len(ac.content) } diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go index 71029f4f..35e68438 100644 --- a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go @@ -1062,7 +1062,6 @@ func (bc *bitmapContainer) PrevSetBit(i int) int { // reference the java implementation // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 -// func (bc *bitmapContainer) numberOfRuns() int { if bc.cardinality == 0 { return 0 diff --git a/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go b/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go index 3e5490a9..d5ebb91a 100644 --- a/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go +++ b/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go @@ -10,6 +10,11 @@ type ByteInput interface { // Next returns a slice containing the next n bytes from the buffer, // advancing the buffer as if the bytes had been returned by Read. Next(n int) ([]byte, error) + // NextReturnsSafeSlice returns true if Next() returns a safe slice as opposed + // to a slice that points to an underlying buffer possibly owned by another system. + // When NextReturnsSafeSlice returns false, the result from Next() should be copied + // before it is modified (i.e., it is immutable). + NextReturnsSafeSlice() bool // ReadUInt32 reads uint32 with LittleEndian order ReadUInt32() (uint32, error) // ReadUInt16 reads uint16 with LittleEndian order @@ -42,6 +47,25 @@ type ByteBuffer struct { off int } +// NewByteBuffer creates a new ByteBuffer. +func NewByteBuffer(buf []byte) *ByteBuffer { + return &ByteBuffer{ + buf: buf, + } +} + +var _ io.Reader = (*ByteBuffer)(nil) + +// Read implements io.Reader. +func (b *ByteBuffer) Read(p []byte) (int, error) { + data, err := b.Next(len(p)) + if err != nil { + return 0, err + } + copy(p, data) + return len(data), nil +} + // Next returns a slice containing the next n bytes from the reader // If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned func (b *ByteBuffer) Next(n int) ([]byte, error) { @@ -57,6 +81,12 @@ func (b *ByteBuffer) Next(n int) ([]byte, error) { return data, nil } +// NextReturnsSafeSlice returns false since ByteBuffer might hold +// an array owned by some other systems. +func (b *ByteBuffer) NextReturnsSafeSlice() bool { + return false +} + // ReadUInt32 reads uint32 with LittleEndian order func (b *ByteBuffer) ReadUInt32() (uint32, error) { if len(b.buf)-b.off < 4 { @@ -109,26 +139,45 @@ func (b *ByteBuffer) Reset(buf []byte) { type ByteInputAdapter struct { r io.Reader readBytes int + buf [4]byte +} + +var _ io.Reader = (*ByteInputAdapter)(nil) + +// Read implements io.Reader. +func (b *ByteInputAdapter) Read(buf []byte) (int, error) { + m, err := io.ReadAtLeast(b.r, buf, len(buf)) + b.readBytes += m + + if err != nil { + return 0, err + } + + return m, nil } // Next returns a slice containing the next n bytes from the buffer, // advancing the buffer as if the bytes had been returned by Read. func (b *ByteInputAdapter) Next(n int) ([]byte, error) { buf := make([]byte, n) - m, err := io.ReadAtLeast(b.r, buf, n) - b.readBytes += m + _, err := b.Read(buf) if err != nil { return nil, err } - return buf, nil } +// NextReturnsSafeSlice returns true since ByteInputAdapter always returns a slice +// allocated with make([]byte, ...) +func (b *ByteInputAdapter) NextReturnsSafeSlice() bool { + return true +} + // ReadUInt32 reads uint32 with LittleEndian order func (b *ByteInputAdapter) ReadUInt32() (uint32, error) { - buf, err := b.Next(4) - + buf := b.buf[:4] + _, err := b.Read(buf) if err != nil { return 0, err } @@ -138,8 +187,8 @@ func (b *ByteInputAdapter) ReadUInt32() (uint32, error) { // ReadUInt16 reads uint16 with LittleEndian order func (b *ByteInputAdapter) ReadUInt16() (uint16, error) { - buf, err := b.Next(2) - + buf := b.buf[:2] + _, err := b.Read(buf) if err != nil { return 0, err } diff --git a/vendor/github.com/RoaringBitmap/roaring/roaring.go b/vendor/github.com/RoaringBitmap/roaring/roaring.go index 7220da27..a31cdbd9 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaring.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaring.go @@ -13,6 +13,7 @@ import ( "strconv" "github.com/RoaringBitmap/roaring/internal" + "github.com/bits-and-blooms/bitset" ) // Bitmap represents a compressed bitmap where you can add integers. @@ -53,17 +54,186 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { return rb.highlowcontainer.toBytes() } +const wordSize = uint64(64) +const log2WordSize = uint64(6) +const capacity = ^uint64(0) +const bitmapContainerSize = (1 << 16) / 64 // bitmap size in words + +// DenseSize returns the size of the bitmap when stored as a dense bitmap. +func (rb *Bitmap) DenseSize() uint64 { + if rb.highlowcontainer.size() == 0 { + return 0 + } + + maximum := 1 + uint64(rb.Maximum()) + if maximum > (capacity - wordSize + 1) { + return uint64(capacity >> log2WordSize) + } + + return uint64((maximum + (wordSize - 1)) >> log2WordSize) +} + +// ToDense returns a slice of uint64s representing the bitmap as a dense bitmap. +// Useful to convert a roaring bitmap to a format that can be used by other libraries +// like https://github.com/bits-and-blooms/bitset or https://github.com/kelindar/bitmap +func (rb *Bitmap) ToDense() []uint64 { + sz := rb.DenseSize() + if sz == 0 { + return nil + } + + bitmap := make([]uint64, sz) + rb.WriteDenseTo(bitmap) + return bitmap +} + +// FromDense creates a bitmap from a slice of uint64s representing the bitmap as a dense bitmap. +// Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or +// https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. +// +// This function will not create any run containers, only array and bitmap containers. It's up to +// the caller to call RunOptimize if they want to further compress the runs of consecutive values. +// +// When doCopy is true, the bitmap is copied into a new slice for each bitmap container. +// This is useful when the bitmap is going to be modified after this function returns or if it's +// undesirable to hold references to large bitmaps which the GC would not be able to collect. +// One copy can still happen even when doCopy is false if the bitmap length is not divisible +// by bitmapContainerSize. +// +// See also FromBitSet. +func FromDense(bitmap []uint64, doCopy bool) *Bitmap { + sz := (len(bitmap) + bitmapContainerSize - 1) / bitmapContainerSize // round up + rb := &Bitmap{ + highlowcontainer: roaringArray{ + containers: make([]container, 0, sz), + keys: make([]uint16, 0, sz), + needCopyOnWrite: make([]bool, 0, sz), + }, + } + rb.FromDense(bitmap, doCopy) + return rb +} + +// FromDense unmarshalls from a slice of uint64s representing the bitmap as a dense bitmap. +// Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or +// https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. +// Callers are responsible for ensuring that the bitmap is empty before calling this function. +// +// This function will not create any run containers, only array and bitmap containers. It is up to +// the caller to call RunOptimize if they want to further compress the runs of consecutive values. +// +// When doCopy is true, the bitmap is copied into a new slice for each bitmap container. +// This is useful when the bitmap is going to be modified after this function returns or if it's +// undesirable to hold references to large bitmaps which the GC would not be able to collect. +// One copy can still happen even when doCopy is false if the bitmap length is not divisible +// by bitmapContainerSize. +// +// See FromBitSet. +func (rb *Bitmap) FromDense(bitmap []uint64, doCopy bool) { + if len(bitmap) == 0 { + return + } + + var k uint16 + const size = bitmapContainerSize + + for len(bitmap) > 0 { + hi := size + if len(bitmap) < size { + hi = len(bitmap) + } + + words := bitmap[:hi] + count := int(popcntSlice(words)) + + switch { + case count > arrayDefaultMaxSize: + c := &bitmapContainer{cardinality: count, bitmap: words} + cow := true + + if doCopy || len(words) < size { + c.bitmap = make([]uint64, size) + copy(c.bitmap, words) + cow = false + } + + rb.highlowcontainer.appendContainer(k, c, cow) + + case count > 0: + c := &arrayContainer{content: make([]uint16, count)} + var pos, base int + for _, w := range words { + for w != 0 { + t := w & -w + c.content[pos] = uint16(base + int(popcount(t-1))) + pos++ + w ^= t + } + base += 64 + } + rb.highlowcontainer.appendContainer(k, c, false) + } + + bitmap = bitmap[hi:] + k++ + } +} + +// WriteDenseTo writes to a slice of uint64s representing the bitmap as a dense bitmap. +// Callers are responsible for allocating enough space in the bitmap using DenseSize. +// Useful to convert a roaring bitmap to a format that can be used by other libraries +// like https://github.com/bits-and-blooms/bitset or https://github.com/kelindar/bitmap +func (rb *Bitmap) WriteDenseTo(bitmap []uint64) { + for i, ct := range rb.highlowcontainer.containers { + hb := uint32(rb.highlowcontainer.keys[i]) << 16 + + switch c := ct.(type) { + case *arrayContainer: + for _, x := range c.content { + n := int(hb | uint32(x)) + bitmap[n>>log2WordSize] |= uint64(1) << uint(x%64) + } + + case *bitmapContainer: + copy(bitmap[int(hb)>>log2WordSize:], c.bitmap) + + case *runContainer16: + for j := range c.iv { + start := uint32(c.iv[j].start) + end := start + uint32(c.iv[j].length) + 1 + lo := int(hb|start) >> log2WordSize + hi := int(hb|(end-1)) >> log2WordSize + + if lo == hi { + bitmap[lo] |= (^uint64(0) << uint(start%64)) & + (^uint64(0) >> (uint(-end) % 64)) + continue + } + + bitmap[lo] |= ^uint64(0) << uint(start%64) + for n := lo + 1; n < hi; n++ { + bitmap[n] = ^uint64(0) + } + bitmap[hi] |= ^uint64(0) >> (uint(-end) % 64) + } + default: + panic("unsupported container type") + } + } +} + // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for // using bitmaps as elements in hash sets or as keys in hash maps, as well as // generally quicker comparisons. // The implementation is biased towards efficiency in little endian machines, so // expect some extra CPU cycles and memory to be used if your machine is big endian. -// Likewise, don't use this to verify integrity unless you're certain you'll load -// the bitmap on a machine with the same endianess used to create it. +// Likewise, do not use this to verify integrity unless you are certain you will load +// the bitmap on a machine with the same endianess used to create it. (Thankfully +// very few people use big endian machines these days.) func (rb *Bitmap) Checksum() uint64 { const ( offset = 14695981039346656037 - prime = 1099511628211 + prime = 1099511628211 ) var bytes []byte @@ -106,6 +276,20 @@ func (rb *Bitmap) Checksum() uint64 { return hash } +// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy. +// It is the caller's responsibility to ensure that the input data is not modified and remains valid for the entire lifetime of this bitmap. +// This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually. +// The containers in the resulting bitmap are immutable containers tied to the provided byte array and they rely on +// copy-on-write which means that modifying them creates copies. Thus FromUnsafeBytes is more likely to be appropriate for read-only use cases, +// when the resulting bitmap can be considered immutable. +// +// See also the FromBuffer function. +// See https://github.com/RoaringBitmap/roaring/pull/395 for more details. +func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, err error) { + stream := internal.NewByteBuffer(data) + return rb.ReadFrom(stream) +} + // ReadFrom reads a serialized version of this bitmap from stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: @@ -114,12 +298,18 @@ func (rb *Bitmap) Checksum() uint64 { // So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom. // It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly. func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) { - stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter) - stream.Reset(reader) + stream, ok := reader.(internal.ByteInput) + if !ok { + byteInputAdapter := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter) + byteInputAdapter.Reset(reader) + stream = byteInputAdapter + } p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...) - internal.ByteInputAdapterPool.Put(stream) + if !ok { + internal.ByteInputAdapterPool.Put(stream.(*internal.ByteInputAdapter)) + } return } @@ -139,12 +329,17 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err // You should *not* change the copy-on-write status of the resulting // bitmaps (SetCopyOnWrite). // +// Thus FromBuffer is more likely to be appropriate for read-only use cases, +// when the resulting bitmap can be considered immutable. +// // If buf becomes unavailable, then a bitmap created with // FromBuffer would be effectively broken. Furthermore, any // bitmap derived from this bitmap (e.g., via Or, And) might // also be broken. Thus, before making buf unavailable, you should // call CloneCopyOnWriteContainers on all such bitmaps. // +// See also the FromUnsafeBytes function which can have better performance +// in some cases. func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { stream := internal.ByteBufferPool.Get().(*internal.ByteBuffer) stream.Reset(buf) @@ -194,6 +389,16 @@ func (rb *Bitmap) Clear() { rb.highlowcontainer.clear() } +// ToBitSet copies the content of the RoaringBitmap into a bitset.BitSet instance +func (rb *Bitmap) ToBitSet() *bitset.BitSet { + return bitset.From(rb.ToDense()) +} + +// FromBitSet creates a new RoaringBitmap from a bitset.BitSet instance +func FromBitSet(bitset *bitset.BitSet) *Bitmap { + return FromDense(bitset.Bytes(), false) +} + // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order func (rb *Bitmap) ToArray() []uint32 { array := make([]uint32, rb.GetCardinality()) @@ -233,7 +438,7 @@ func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 contnbr := (universeSize + uint64(65535)) / uint64(65536) if contnbr > cardinality { contnbr = cardinality - // we can't have more containers than we have values + // we cannot have more containers than we have values } headermax := 8*contnbr + 4 if 4 > (contnbr+7)/8 { @@ -276,9 +481,9 @@ type intIterator struct { // This way, instead of making up-to 64k allocations per full iteration // we get a single allocation and simply reinitialize the appropriate // iterator and point to it in the generic `iter` member on each key bound. - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerShortIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -341,14 +546,13 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) { // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type IntIterator = intIterator - // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). -func (p *intIterator) Initialize(a *Bitmap) { - p.pos = 0 - p.highlowcontainer = &a.highlowcontainer - p.init() +func (ii *intIterator) Initialize(a *Bitmap) { + ii.pos = 0 + ii.highlowcontainer = &a.highlowcontainer + ii.init() } type intReverseIterator struct { @@ -357,9 +561,9 @@ type intReverseIterator struct { iter shortIterable highlowcontainer *roaringArray - shortIter reverseIterator - runIter runReverseIterator16 - bitmapIter reverseBitmapContainerShortIterator + shortIter reverseIterator + runIter runReverseIterator16 + bitmapIter reverseBitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over @@ -414,10 +618,10 @@ type IntReverseIterator = intReverseIterator // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). -func (p *intReverseIterator) Initialize(a *Bitmap) { - p.highlowcontainer = &a.highlowcontainer - p.pos = a.highlowcontainer.size() - 1 - p.init() +func (ii *intReverseIterator) Initialize(a *Bitmap) { + ii.highlowcontainer = &a.highlowcontainer + ii.pos = a.highlowcontainer.size() - 1 + ii.init() } // ManyIntIterable allows you to iterate over the values in a Bitmap @@ -434,9 +638,9 @@ type manyIntIterator struct { iter manyIterable highlowcontainer *roaringArray - shortIter shortIterator - runIter runIterator16 - bitmapIter bitmapContainerManyIterator + shortIter shortIterator + runIter runIterator16 + bitmapIter bitmapContainerManyIterator } func (ii *manyIntIterator) init() { @@ -495,17 +699,16 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int { return n } - // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type ManyIntIterator = manyIntIterator // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). -func (p *manyIntIterator) Initialize(a *Bitmap) { - p.pos = 0 - p.highlowcontainer = &a.highlowcontainer - p.init() +func (ii *manyIntIterator) Initialize(a *Bitmap) { + ii.pos = 0 + ii.highlowcontainer = &a.highlowcontainer + ii.init() } // String creates a string representation of the Bitmap @@ -569,7 +772,7 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) { // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { - p := new(intIterator) + p := new(intIterator) p.Initialize(rb) return p } @@ -847,7 +1050,7 @@ func (rb *Bitmap) Select(x uint32) (uint32, error) { return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil } } - return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) + return 0, fmt.Errorf("cannot find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) } // And computes the intersection between two bitmaps and stores the result in the current bitmap diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go index eeb3d313..079195dd 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go @@ -4,8 +4,9 @@ import ( "bytes" "encoding/binary" "fmt" - "github.com/RoaringBitmap/roaring/internal" "io" + + "github.com/RoaringBitmap/roaring/internal" ) type container interface { @@ -112,9 +113,10 @@ func newRoaringArray() *roaringArray { // runOptimize compresses the element containers to minimize space consumed. // Q: how does this interact with copyOnWrite and needCopyOnWrite? // A: since we aren't changing the logical content, just the representation, -// we don't bother to check the needCopyOnWrite bits. We replace -// (possibly all) elements of ra.containers in-place with space -// optimized versions. +// +// we don't bother to check the needCopyOnWrite bits. We replace +// (possibly all) elements of ra.containers in-place with space +// optimized versions. func (ra *roaringArray) runOptimize() { for i := range ra.containers { ra.containers[i] = ra.containers[i].toEfficientContainer() @@ -465,9 +467,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { return answer } -// // spec: https://github.com/RoaringBitmap/RoaringFormatSpec -// func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { hasRun := ra.hasRunCompression() isRunSizeInBytes := 0 @@ -544,15 +544,14 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { return n, nil } -// // spec: https://github.com/RoaringBitmap/RoaringFormatSpec -// func (ra *roaringArray) toBytes() ([]byte, error) { var buf bytes.Buffer _, err := ra.writeTo(&buf) return buf.Bytes(), err } +// Reads a serialized roaringArray from a byte slice. func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) { var cookie uint32 var err error @@ -567,6 +566,8 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) } } + // If NextReturnsSafeSlice is false, then willNeedCopyOnWrite should be true + willNeedCopyOnWrite := !stream.NextReturnsSafeSlice() var size uint32 var isRunBitmap []byte @@ -631,7 +632,7 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte key := keycard[2*i] card := int(keycard[2*i+1]) + 1 ra.keys[i] = key - ra.needCopyOnWrite[i] = true + ra.needCopyOnWrite[i] = willNeedCopyOnWrite if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 { // run container diff --git a/vendor/github.com/RoaringBitmap/roaring/runcontainer.go b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go index 4ce48a29..f4829a62 100644 --- a/vendor/github.com/RoaringBitmap/roaring/runcontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go @@ -253,10 +253,8 @@ func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 { } -// // newRunContainer16FromArray populates a new // runContainer16 from the contents of arr. -// func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 { // keep this in sync with newRunContainer16FromVals above @@ -834,24 +832,23 @@ func (rc *runContainer16) numIntervals() int { // If key is not already present, then whichInterval16 is // set as follows: // -// a) whichInterval16 == len(rc.iv)-1 if key is beyond our -// last interval16 in rc.iv; +// a) whichInterval16 == len(rc.iv)-1 if key is beyond our +// last interval16 in rc.iv; // -// b) whichInterval16 == -1 if key is before our first -// interval16 in rc.iv; +// b) whichInterval16 == -1 if key is before our first +// interval16 in rc.iv; // -// c) whichInterval16 is set to the minimum index of rc.iv -// which comes strictly before the key; -// so rc.iv[whichInterval16].last < key, -// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start -// (Note that whichInterval16+1 won't exist when -// whichInterval16 is the last interval.) +// c) whichInterval16 is set to the minimum index of rc.iv +// which comes strictly before the key; +// so rc.iv[whichInterval16].last < key, +// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start +// (Note that whichInterval16+1 won't exist when +// whichInterval16 is the last interval.) // // runContainer16.search always returns whichInterval16 < len(rc.iv). // // The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there // no upper bound. -// func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (whichInterval16 int, alreadyPresent bool, numCompares int) { n := int(len(rc.iv)) if n == 0 { @@ -937,21 +934,20 @@ func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (w // If key is not already present, then whichInterval16 is // set as follows: // -// a) whichInterval16 == len(rc.iv)-1 if key is beyond our -// last interval16 in rc.iv; +// a) whichInterval16 == len(rc.iv)-1 if key is beyond our +// last interval16 in rc.iv; // -// b) whichInterval16 == -1 if key is before our first -// interval16 in rc.iv; +// b) whichInterval16 == -1 if key is before our first +// interval16 in rc.iv; // -// c) whichInterval16 is set to the minimum index of rc.iv -// which comes strictly before the key; -// so rc.iv[whichInterval16].last < key, -// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start -// (Note that whichInterval16+1 won't exist when -// whichInterval16 is the last interval.) +// c) whichInterval16 is set to the minimum index of rc.iv +// which comes strictly before the key; +// so rc.iv[whichInterval16].last < key, +// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start +// (Note that whichInterval16+1 won't exist when +// whichInterval16 is the last interval.) // // runContainer16.search always returns whichInterval16 < len(rc.iv). -// func (rc *runContainer16) search(key int) (whichInterval16 int, alreadyPresent bool, numCompares int) { return rc.searchRange(key, 0, 0) } @@ -994,7 +990,6 @@ func newRunContainer16() *runContainer16 { // newRunContainer16CopyIv creates a run container, initializing // with a copy of the supplied iv slice. -// func newRunContainer16CopyIv(iv []interval16) *runContainer16 { rc := &runContainer16{ iv: make([]interval16, len(iv)), @@ -1011,7 +1006,6 @@ func (rc *runContainer16) Clone() *runContainer16 { // newRunContainer16TakeOwnership returns a new runContainer16 // backed by the provided iv slice, which we will // assume exclusive control over from now on. -// func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 { rc := &runContainer16{ iv: iv, @@ -2006,7 +2000,6 @@ func (rc *runContainer16) not(firstOfRange, endx int) container { // Current routine is correct but // makes 2 more passes through the arrays than should be // strictly necessary. Measure both ways though--this may not matter. -// func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { if firstOfRange > endx { @@ -2329,7 +2322,6 @@ func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16, // the backing array, and then you write // the answer at the beginning. What this // trick does is minimize memory allocations. -// func (rc *runContainer16) lazyIOR(a container) container { // not lazy at the moment return rc.ior(a) diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization.go b/vendor/github.com/RoaringBitmap/roaring/serialization.go index 70e3bbcc..dbfecc84 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization.go @@ -7,7 +7,6 @@ import ( // writeTo for runContainer16 follows this // spec: https://github.com/RoaringBitmap/RoaringFormatSpec -// func (b *runContainer16) writeTo(stream io.Writer) (int, error) { buf := make([]byte, 2+4*len(b.iv)) binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv))) diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go index 2e4ea595..6e3a5d55 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go @@ -79,12 +79,12 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { // Deserialization code follows -//// +// // // These methods (byteSliceAsUint16Slice,...) do not make copies, // they are pointer-based (unsafe). The caller is responsible to // ensure that the input slice does not get garbage collected, deleted // or modified while you hold the returned slince. -//// +// // func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") @@ -295,7 +295,6 @@ func byteSliceAsBoolSlice(slice []byte) (result []bool) { // bitmap derived from this bitmap (e.g., via Or, And) might // also be broken. Thus, before making buf unavailable, you should // call CloneCopyOnWriteContainers on all such bitmaps. -// func (rb *Bitmap) FrozenView(buf []byte) error { return rb.highlowcontainer.frozenView(buf) } @@ -313,7 +312,7 @@ func (rb *Bitmap) FrozenView(buf []byte) error { * uint8_t[num_containers] *
uint32_t * - *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) + *
is a 4-byte value which is a bit union of frozenCookie (15 bits) * and the number of containers (17 bits). * * stores number of elements for every container. @@ -329,43 +328,50 @@ func (rb *Bitmap) FrozenView(buf []byte) error { * All members have their native alignments during deserilization except
, * which is not guaranteed to be aligned by 4 bytes. */ -const FROZEN_COOKIE = 13766 +const frozenCookie = 13766 var ( - FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE") - FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported") - FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap") - FrozenBitmapOverpopulated = errors.New("too many containers") - FrozenBitmapUnexpectedData = errors.New("spurious data in input") - FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode") - FrozenBitmapBufferTooSmall = errors.New("buffer too small") + // ErrFrozenBitmapInvalidCookie is returned when the header does not contain the frozenCookie. + ErrFrozenBitmapInvalidCookie = errors.New("header does not contain the frozenCookie") + // ErrFrozenBitmapBigEndian is returned when the header is big endian. + ErrFrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported") + // ErrFrozenBitmapIncomplete is returned when the buffer is too small to contain a frozen bitmap. + ErrFrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap") + // ErrFrozenBitmapOverpopulated is returned when the number of containers is too large. + ErrFrozenBitmapOverpopulated = errors.New("too many containers") + // ErrFrozenBitmapUnexpectedData is returned when the buffer contains unexpected data. + ErrFrozenBitmapUnexpectedData = errors.New("spurious data in input") + // ErrFrozenBitmapInvalidTypecode is returned when the typecode is invalid. + ErrFrozenBitmapInvalidTypecode = errors.New("unrecognized typecode") + // ErrFrozenBitmapBufferTooSmall is returned when the buffer is too small. + ErrFrozenBitmapBufferTooSmall = errors.New("buffer too small") ) func (ra *roaringArray) frozenView(buf []byte) error { if len(buf) < 4 { - return FrozenBitmapIncomplete + return ErrFrozenBitmapIncomplete } headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:]) - if headerBE&0x7fff == FROZEN_COOKIE { - return FrozenBitmapBigEndian + if headerBE&0x7fff == frozenCookie { + return ErrFrozenBitmapBigEndian } header := binary.LittleEndian.Uint32(buf[len(buf)-4:]) buf = buf[:len(buf)-4] - if header&0x7fff != FROZEN_COOKIE { - return FrozenBitmapInvalidCookie + if header&0x7fff != frozenCookie { + return ErrFrozenBitmapInvalidCookie } nCont := int(header >> 15) if nCont > (1 << 16) { - return FrozenBitmapOverpopulated + return ErrFrozenBitmapOverpopulated } // 1 byte per type, 2 bytes per key, 2 bytes per count. if len(buf) < 5*nCont { - return FrozenBitmapIncomplete + return ErrFrozenBitmapIncomplete } types := buf[len(buf)-nCont:] @@ -390,12 +396,12 @@ func (ra *roaringArray) frozenView(buf []byte) error { nRun++ nRunEl += int(counts[i]) default: - return FrozenBitmapInvalidTypecode + return ErrFrozenBitmapInvalidTypecode } } if len(buf) < (1<<13)*nBitmap+4*nRunEl+2*nArrayEl { - return FrozenBitmapIncomplete + return ErrFrozenBitmapIncomplete } bitsetsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBitmap]) @@ -408,15 +414,15 @@ func (ra *roaringArray) frozenView(buf []byte) error { buf = buf[2*nArrayEl:] if len(buf) != 0 { - return FrozenBitmapUnexpectedData + return ErrFrozenBitmapUnexpectedData } var c container - containersSz := int(unsafe.Sizeof(c))*nCont - bitsetsSz := int(unsafe.Sizeof(bitmapContainer{}))*nBitmap - arraysSz := int(unsafe.Sizeof(arrayContainer{}))*nArray - runsSz := int(unsafe.Sizeof(runContainer16{}))*nRun - needCOWSz := int(unsafe.Sizeof(true))*nCont + containersSz := int(unsafe.Sizeof(c)) * nCont + bitsetsSz := int(unsafe.Sizeof(bitmapContainer{})) * nBitmap + arraysSz := int(unsafe.Sizeof(arrayContainer{})) * nArray + runsSz := int(unsafe.Sizeof(runContainer16{})) * nRun + needCOWSz := int(unsafe.Sizeof(true)) * nCont bitmapArenaSz := containersSz + bitsetsSz + arraysSz + runsSz + needCOWSz bitmapArena := make([]byte, bitmapArenaSz) @@ -475,9 +481,10 @@ func (ra *roaringArray) frozenView(buf []byte) error { return nil } -func (bm *Bitmap) GetFrozenSizeInBytes() uint64 { +// GetFrozenSizeInBytes returns the size in bytes of the frozen bitmap. +func (rb *Bitmap) GetFrozenSizeInBytes() uint64 { nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0) - for _, c := range bm.highlowcontainer.containers { + for _, c := range rb.highlowcontainer.containers { switch v := c.(type) { case *bitmapContainer: nBits++ @@ -487,19 +494,21 @@ func (bm *Bitmap) GetFrozenSizeInBytes() uint64 { nRunEl += uint64(len(v.iv)) } } - return 4 + 5*uint64(len(bm.highlowcontainer.containers)) + + return 4 + 5*uint64(len(rb.highlowcontainer.containers)) + (nBits << 13) + 2*nArrayEl + 4*nRunEl } -func (bm *Bitmap) Freeze() ([]byte, error) { - sz := bm.GetFrozenSizeInBytes() +// Freeze serializes the bitmap in the CRoaring's frozen format. +func (rb *Bitmap) Freeze() ([]byte, error) { + sz := rb.GetFrozenSizeInBytes() buf := make([]byte, sz) - _, err := bm.FreezeTo(buf) + _, err := rb.FreezeTo(buf) return buf, err } -func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { - containers := bm.highlowcontainer.containers +// FreezeTo serializes the bitmap in the CRoaring's frozen format. +func (rb *Bitmap) FreezeTo(buf []byte) (int, error) { + containers := rb.highlowcontainer.containers nCont := len(containers) nBits, nArrayEl, nRunEl := 0, 0, 0 @@ -516,7 +525,7 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { serialSize := 4 + 5*nCont + (1<<13)*nBits + 4*nRunEl + 2*nArrayEl if len(buf) < serialSize { - return 0, FrozenBitmapBufferTooSmall + return 0, ErrFrozenBitmapBufferTooSmall } bitsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBits]) @@ -537,10 +546,10 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { types := buf[:nCont] buf = buf[nCont:] - header := uint32(FROZEN_COOKIE | (nCont << 15)) + header := uint32(frozenCookie | (nCont << 15)) binary.LittleEndian.PutUint32(buf[:4], header) - copy(keys, bm.highlowcontainer.keys[:]) + copy(keys, rb.highlowcontainer.keys[:]) for i, c := range containers { switch v := c.(type) { @@ -567,11 +576,12 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) { return serialSize, nil } -func (bm *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) { +// WriteFrozenTo serializes the bitmap in the CRoaring's frozen format. +func (rb *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) { // FIXME: this is a naive version that iterates 4 times through the // containers and allocates 3*len(containers) bytes; it's quite likely // it can be done more efficiently. - containers := bm.highlowcontainer.containers + containers := rb.highlowcontainer.containers written := 0 for _, c := range containers { @@ -610,7 +620,7 @@ func (bm *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) { } } - n, err := wr.Write(uint16SliceAsByteSlice(bm.highlowcontainer.keys)) + n, err := wr.Write(uint16SliceAsByteSlice(rb.highlowcontainer.keys)) written += n if err != nil { return written, err @@ -642,7 +652,7 @@ func (bm *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) { return written, err } - header := uint32(FROZEN_COOKIE | (len(containers) << 15)) + header := uint32(frozenCookie | (len(containers) << 15)) if err := binary.Write(wr, binary.LittleEndian, header); err != nil { return written, err } diff --git a/vendor/github.com/bits-and-blooms/bitset/README.md b/vendor/github.com/bits-and-blooms/bitset/README.md index 97e83071..848234e2 100644 --- a/vendor/github.com/bits-and-blooms/bitset/README.md +++ b/vendor/github.com/bits-and-blooms/bitset/README.md @@ -7,6 +7,15 @@ [![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc) +This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems: + +* [beego](https://github.com/beego/beego) +* [CubeFS](https://github.com/cubefs/cubefs) +* [Amazon EKS Distro](https://github.com/aws/eks-distro) +* [sourcegraph](https://github.com/sourcegraph/sourcegraph) +* [torrent](https://github.com/anacrolix/torrent) + + ## Description Package bitset implements bitsets, a mapping between non-negative integers and boolean values. @@ -60,19 +69,69 @@ func main() { } ``` -As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc +## Serialization + + +You may serialize a bitset safely and portably to a stream +of bytes as follows: +```Go + const length = 9585 + const oneEvery = 97 + bs := bitset.New(length) + // Add some bits + for i := uint(0); i < length; i += oneEvery { + bs = bs.Set(i) + } + + var buf bytes.Buffer + n, err := bs.WriteTo(&buf) + if err != nil { + // failure + } + // Here n == buf.Len() +``` +You can later deserialize the result as follows: + +```Go + // Read back from buf + bs = bitset.New() + n, err = bs.ReadFrom(&buf) + if err != nil { + // error + } + // n is the number of bytes read +``` + +The `ReadFrom` function attempts to read the data into the existing +BitSet instance, to minimize memory allocations. + + +*Performance tip*: +When reading and writing to a file or a network connection, you may get better performance by +wrapping your streams with `bufio` instances. + +E.g., +```Go + f, err := os.Create("myfile") + w := bufio.NewWriter(f) +``` +```Go + f, err := os.Open("myfile") + r := bufio.NewReader(f) +``` + ## Memory Usage -The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). +The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). ## Implementation Note Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. -It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. +It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped. ## Installation diff --git a/vendor/github.com/bits-and-blooms/bitset/SECURITY.md b/vendor/github.com/bits-and-blooms/bitset/SECURITY.md new file mode 100644 index 00000000..f888420c --- /dev/null +++ b/vendor/github.com/bits-and-blooms/bitset/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +You can report privately a vulnerability by email at daniel@lemire.me (current maintainer). diff --git a/vendor/github.com/bits-and-blooms/bitset/bitset.go b/vendor/github.com/bits-and-blooms/bitset/bitset.go index d688806a..8fb9e9fa 100644 --- a/vendor/github.com/bits-and-blooms/bitset/bitset.go +++ b/vendor/github.com/bits-and-blooms/bitset/bitset.go @@ -33,12 +33,10 @@ Example use: As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. - */ package bitset import ( - "bufio" "bytes" "encoding/base64" "encoding/binary" @@ -52,6 +50,9 @@ import ( // the wordSize of a bit set const wordSize = uint(64) +// the wordSize of a bit set in bytes +const wordBytes = wordSize / 8 + // log2WordSize is lg(wordSize) const log2WordSize = uint(6) @@ -87,9 +88,20 @@ func (b *BitSet) safeSet() []uint64 { return b.set } +// SetBitsetFrom fills the bitset with an array of integers without creating a new BitSet instance +func (b *BitSet) SetBitsetFrom(buf []uint64) { + b.length = uint(len(buf)) * 64 + b.set = buf +} + // From is a constructor used to create a BitSet from an array of integers func From(buf []uint64) *BitSet { - return &BitSet{uint(len(buf)) * 64, buf} + return FromWithLength(uint(len(buf))*64, buf) +} + +// FromWithLength constructs from an array of integers and length. +func FromWithLength(len uint, set []uint64) *BitSet { + return &BitSet{len, set} } // Bytes returns the bitset as array of integers @@ -105,6 +117,17 @@ func wordsNeeded(i uint) int { return int((i + (wordSize - 1)) >> log2WordSize) } +// wordsNeededUnbound calculates the number of words needed for i bits, possibly exceeding the capacity. +// This function is useful if you know that the capacity cannot be exceeded (e.g., you have an existing bitmap). +func wordsNeededUnbound(i uint) int { + return int((i + (wordSize - 1)) >> log2WordSize) +} + +// wordsIndex calculates the index of words in a `uint64` +func wordsIndex(i uint) uint { + return i & (wordSize - 1) +} + // New creates a new BitSet with a hint that length bits will be required func New(length uint) (bset *BitSet) { defer func() { @@ -135,24 +158,22 @@ func (b *BitSet) Len() uint { return b.length } -// extendSetMaybe adds additional words to incorporate new bits if needed -func (b *BitSet) extendSetMaybe(i uint) { - if i >= b.length { // if we need more bits, make 'em - if i >= Cap() { - panic("You are exceeding the capacity") - } - nsize := wordsNeeded(i + 1) - if b.set == nil { - b.set = make([]uint64, nsize) - } else if cap(b.set) >= nsize { - b.set = b.set[:nsize] // fast resize - } else if len(b.set) < nsize { - newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x - copy(newset, b.set) - b.set = newset - } - b.length = i + 1 +// extendSet adds additional words to incorporate new bits if needed +func (b *BitSet) extendSet(i uint) { + if i >= Cap() { + panic("You are exceeding the capacity") } + nsize := wordsNeeded(i + 1) + if b.set == nil { + b.set = make([]uint64, nsize) + } else if cap(b.set) >= nsize { + b.set = b.set[:nsize] // fast resize + } else if len(b.set) < nsize { + newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x + copy(newset, b.set) + b.set = newset + } + b.length = i + 1 } // Test whether bit i is set. @@ -160,7 +181,7 @@ func (b *BitSet) Test(i uint) bool { if i >= b.length { return false } - return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 + return b.set[i>>log2WordSize]&(1<>log2WordSize] |= 1 << (i & (wordSize - 1)) + if i >= b.length { // if we need more bits, make 'em + b.extendSet(i) + } + b.set[i>>log2WordSize] |= 1 << wordsIndex(i) return b } @@ -180,7 +203,7 @@ func (b *BitSet) Clear(i uint) *BitSet { if i >= b.length { return b } - b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] &^= 1 << wordsIndex(i) return b } @@ -205,7 +228,7 @@ func (b *BitSet) Flip(i uint) *BitSet { if i >= b.length { return b.Set(i) } - b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] ^= 1 << wordsIndex(i) return b } @@ -218,15 +241,23 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { if start >= end { return b } - - b.extendSetMaybe(end - 1) + if end-1 >= b.length { // if we need more bits, make 'em + b.extendSet(end - 1) + } var startWord uint = start >> log2WordSize var endWord uint = end >> log2WordSize - b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1))) - for i := startWord; i < endWord; i++ { - b.set[i] = ^b.set[i] + b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) + if endWord > 0 { + // bounds check elimination + data := b.set + _ = data[endWord-1] + for i := startWord; i < endWord; i++ { + data[i] = ^data[i] + } + } + if end&(wordSize-1) != 0 { + b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) } - b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) return b } @@ -254,7 +285,10 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet { copy(shrunk, b.set[:idx]) b.set = shrunk b.length = length - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + lastWordUsedBits := length % 64 + if lastWordUsedBits != 0 { + b.set[idx-1] &= allBits >> uint64(64-wordsIndex(lastWordUsedBits)) + } return b } @@ -283,7 +317,7 @@ func (b *BitSet) Compact() *BitSet { // this method could be extremely slow and in some cases might cause the entire BitSet // to be recopied. func (b *BitSet) InsertAt(idx uint) *BitSet { - insertAtElement := (idx >> log2WordSize) + insertAtElement := idx >> log2WordSize // if length of set is a multiple of wordSize we need to allocate more space first if b.isLenExactMultiple() { @@ -302,13 +336,13 @@ func (b *BitSet) InsertAt(idx uint) *BitSet { // generate a mask to extract the data that we need to shift left // within the element where we insert a bit - dataMask := ^(uint64(1)<> (i & (wordSize - 1)) + w = w >> wordsIndex(i) if w != 0 { return i + trailingZeroes64(w), true } - x = x + 1 + x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { if b.set[x] != 0 { return uint(x)*wordSize + trailingZeroes64(b.set[x]), true } - x = x + 1 + x++ } return 0, false @@ -413,21 +451,20 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // including possibly the current index and up to cap(buffer). // If the returned slice has len zero, then no more set bits were found // -// buffer := make([]uint, 256) // this should be reused -// j := uint(0) -// j, buffer = bitmap.NextSetMany(j, buffer) -// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { -// for k := range buffer { -// do something with buffer[k] -// } -// j += 1 -// } -// +// buffer := make([]uint, 256) // this should be reused +// j := uint(0) +// j, buffer = bitmap.NextSetMany(j, buffer) +// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { +// for k := range buffer { +// do something with buffer[k] +// } +// j += 1 +// } // // It is possible to retrieve all set bits as follow: // -// indices := make([]uint, bitmap.Count()) -// bitmap.NextSetMany(0, indices) +// indices := make([]uint, bitmap.Count()) +// bitmap.NextSetMany(0, indices) // // However if bitmap.Count() is large, it might be preferable to // use several calls to NextSetMany, for performance reasons. @@ -438,7 +475,7 @@ func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { if x >= len(b.set) || capacity == 0 { return 0, myanswer[:0] } - skip := i & (wordSize - 1) + skip := wordsIndex(i) word := b.set[x] >> skip myanswer = myanswer[:capacity] size := int(0) @@ -481,17 +518,23 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { return 0, false } w := b.set[x] - w = w >> (i & (wordSize - 1)) - wA := allBits >> (i & (wordSize - 1)) + w = w >> wordsIndex(i) + wA := allBits >> wordsIndex(i) index := i + trailingZeroes64(^w) if w != wA && index < b.length { return index, true } x++ + // bounds check elimination in the loop + if x < 0 { + return 0, false + } for x < len(b.set) { - index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) - if b.set[x] != allBits && index < b.length { - return index, true + if b.set[x] != allBits { + index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) + if index < b.length { + return index, true + } } x++ } @@ -510,7 +553,7 @@ func (b *BitSet) ClearAll() *BitSet { // wordCount returns the number of words used in a bit set func (b *BitSet) wordCount() int { - return len(b.set) + return wordsNeededUnbound(b.length) } // Clone this BitSet @@ -522,9 +565,10 @@ func (b *BitSet) Clone() *BitSet { return c } -// Copy into a destination BitSet -// Returning the size of the destination BitSet -// like array copy +// Copy into a destination BitSet using the Go array copy semantics: +// the number of bits copied is the minimum of the number of bits in the current +// BitSet (Len()) and the destination Bitset. +// We return the number of bits copied in the destination BitSet. func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return @@ -536,9 +580,33 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { if b.length < c.length { count = b.length } + // Cleaning the last word is needed to keep the invariant that other functions, such as Count, require + // that any bits in the last word that would exceed the length of the bitmask are set to 0. + c.cleanLastWord() return } +// CopyFull copies into a destination BitSet such that the destination is +// identical to the source after the operation, allocating memory if necessary. +func (b *BitSet) CopyFull(c *BitSet) { + if c == nil { + return + } + c.length = b.length + if len(b.set) == 0 { + if c.set != nil { + c.set = c.set[:0] + } + } else { + if cap(c.set) < len(b.set) { + c.set = make([]uint64, len(b.set)) + } else { + c.set = c.set[:len(b.set)] + } + copy(c.set, b.set) + } +} + // Count (number of set bits). // Also known as "popcount" or "population count". func (b *BitSet) Count() uint { @@ -561,10 +629,15 @@ func (b *BitSet) Equal(c *BitSet) bool { if b.length == 0 { // if they have both length == 0, then could have nil set return true } - // testing for equality shoud not transform the bitset (no call to safeSet) - - for p, v := range b.set { - if c.set[p] != v { + wn := b.wordCount() + // bounds check elimination + if wn <= 0 { + return true + } + _ = b.set[wn-1] + _ = c.set[wn-1] + for p := 0; p < wn; p++ { + if c.set[p] != b.set[p] { return false } } @@ -583,9 +656,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } for i := 0; i < l; i++ { result.set[i] = b.set[i] &^ compare.set[i] @@ -597,9 +670,9 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } cnt := uint64(0) cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) @@ -612,12 +685,19 @@ func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() + } + if l <= 0 { + return } + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] for i := 0; i < l; i++ { - b.set[i] &^= compare.set[i] + data[i] &^= cmpData[i] } } @@ -660,18 +740,29 @@ func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceIntersection(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - for i := 0; i < l; i++ { - b.set[i] &= compare.set[i] + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() + } + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] &= cmpData[i] + } } - for i := l; i < len(b.set); i++ { - b.set[i] = 0 + if l >= 0 { + for i := l; i < len(b.set); i++ { + b.set[i] = 0 + } } if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length-1 >= b.length { + b.extendSet(compare.length - 1) + } } } @@ -706,15 +797,22 @@ func (b *BitSet) UnionCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceUnion(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 && compare.length-1 >= b.length { + b.extendSet(compare.length - 1) } - for i := 0; i < l; i++ { - b.set[i] |= compare.set[i] + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + + for i := 0; i < l; i++ { + data[i] |= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { @@ -754,15 +852,21 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) - } - for i := 0; i < l; i++ { - b.set[i] ^= compare.set[i] + l := compare.wordCount() + if l > b.wordCount() { + l = b.wordCount() + } + if compare.length > 0 && compare.length-1 >= b.length { + b.extendSet(compare.length - 1) + } + if l > 0 { + // bounds check elimination + data, cmpData := b.set, compare.set + _ = data[l-1] + _ = cmpData[l-1] + for i := 0; i < l; i++ { + data[i] ^= cmpData[i] + } } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { @@ -773,17 +877,17 @@ func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { // Is the length an exact multiple of word sizes? func (b *BitSet) isLenExactMultiple() bool { - return b.length%wordSize == 0 + return wordsIndex(b.length) == 0 } // Clean last word by setting unused bits to 0 func (b *BitSet) cleanLastWord() { if !b.isLenExactMultiple() { - b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) + b.set[len(b.set)-1] &= allBits >> (wordSize - wordsIndex(b.length)) } } -// Complement computes the (local) complement of a biset (up to length bits) +// Complement computes the (local) complement of a bitset (up to length bits) func (b *BitSet) Complement() (result *BitSet) { panicIfNull(b) result = New(b.length) @@ -811,7 +915,6 @@ func (b *BitSet) None() bool { return false } } - return true } return true } @@ -824,12 +927,16 @@ func (b *BitSet) Any() bool { // IsSuperSet returns true if this is a superset of the other set func (b *BitSet) IsSuperSet(other *BitSet) bool { - for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { - if !b.Test(i) { + l := other.wordCount() + if b.wordCount() < l { + l = b.wordCount() + } + for i, word := range other.set[:l] { + if b.set[i]&word != word { return false } } - return true + return popcntSlice(other.set[l:]) == 0 } // IsStrictSuperSet returns true if this is a strict superset of the other set @@ -850,78 +957,156 @@ func (b *BitSet) DumpAsBits() string { return buffer.String() } -// BinaryStorageSize returns the binary storage requirements +// BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes. func (b *BitSet) BinaryStorageSize() int { - return binary.Size(uint64(0)) + binary.Size(b.set) + return int(wordBytes + wordBytes*uint(b.wordCount())) +} + +func readUint64Array(reader io.Reader, data []uint64) error { + length := len(data) + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < length; i += bufferSize { + end := i + bufferSize + if end > length { + end = length + buffer = buffer[:wordBytes*uint(end-i)] + } + chunk := data[i:end] + if _, err := io.ReadFull(reader, buffer); err != nil { + return err + } + for i := range chunk { + chunk[i] = uint64(binaryOrder.Uint64(buffer[8*i:])) + } + } + return nil } -// WriteTo writes a BitSet to a stream +func writeUint64Array(writer io.Writer, data []uint64) error { + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < len(data); i += bufferSize { + end := i + bufferSize + if end > len(data) { + end = len(data) + buffer = buffer[:wordBytes*uint(end-i)] + } + chunk := data[i:end] + for i, x := range chunk { + binaryOrder.PutUint64(buffer[8*i:], x) + } + _, err := writer.Write(buffer) + if err != nil { + return err + } + } + return nil +} + +// WriteTo writes a BitSet to a stream. The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes written is returned. +// +// Performance: if this function is used to write to a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Writer. +// E.g., +// +// f, err := os.Create("myfile") +// w := bufio.NewWriter(f) func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { length := uint64(b.length) - // Write length - err := binary.Write(stream, binaryOrder, length) + err := binary.Write(stream, binaryOrder, &length) if err != nil { - return 0, err + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(0), err } - - // Write set - err = binary.Write(stream, binaryOrder, b.set) - return int64(b.BinaryStorageSize()), err + err = writeUint64Array(stream, b.set[:b.wordCount()]) + if err != nil { + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(wordBytes), err + } + return int64(b.BinaryStorageSize()), nil } // ReadFrom reads a BitSet from a stream written using WriteTo +// The format is: +// 1. uint64 length +// 2. []uint64 set +// Upon success, the number of bytes read is returned. +// If the current BitSet is not large enough to hold the data, +// it is extended. In case of error, the BitSet is either +// left unchanged or made empty if the error occurs too late +// to preserve the content. +// +// Performance: if this function is used to read from a disk or network +// connection, it might be beneficial to wrap the stream in a bufio.Reader. +// E.g., +// +// f, err := os.Open("myfile") +// r := bufio.NewReader(f) func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { var length uint64 - - // Read length first err := binary.Read(stream, binaryOrder, &length) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } return 0, err } - newset := New(uint(length)) + newlength := uint(length) - if uint64(newset.length) != length { + if uint64(newlength) != length { return 0, errors.New("unmarshalling error: type mismatch") } + nWords := wordsNeeded(uint(newlength)) + if cap(b.set) >= nWords { + b.set = b.set[:nWords] + } else { + b.set = make([]uint64, nWords) + } + + b.length = newlength - // Read remaining bytes as set - err = binary.Read(stream, binaryOrder, newset.set) + err = readUint64Array(stream, b.set) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + // We do not want to leave the BitSet partially filled as + // it is error prone. + b.set = b.set[:0] + b.length = 0 return 0, err } - *b = *newset return int64(b.BinaryStorageSize()), nil } // MarshalBinary encodes a BitSet into a binary form and returns the result. func (b *BitSet) MarshalBinary() ([]byte, error) { var buf bytes.Buffer - writer := bufio.NewWriter(&buf) - - _, err := b.WriteTo(writer) + _, err := b.WriteTo(&buf) if err != nil { return []byte{}, err } - err = writer.Flush() - return buf.Bytes(), err } // UnmarshalBinary decodes the binary form generated by MarshalBinary. func (b *BitSet) UnmarshalBinary(data []byte) error { buf := bytes.NewReader(data) - reader := bufio.NewReader(buf) - - _, err := b.ReadFrom(reader) - + _, err := b.ReadFrom(buf) return err } // MarshalJSON marshals a BitSet as a JSON structure -func (b *BitSet) MarshalJSON() ([]byte, error) { +func (b BitSet) MarshalJSON() ([]byte, error) { buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) _, err := b.WriteTo(buffer) if err != nil { diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go index fc8ff4f3..7855c04b 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset @@ -14,6 +15,10 @@ func popcntSlice(s []uint64) uint64 { func popcntMaskSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] &^ m[i]) } @@ -22,6 +27,10 @@ func popcntMaskSlice(s, m []uint64) uint64 { func popcntAndSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] & m[i]) } @@ -30,6 +39,10 @@ func popcntAndSlice(s, m []uint64) uint64 { func popcntOrSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] | m[i]) } @@ -38,6 +51,10 @@ func popcntOrSlice(s, m []uint64) uint64 { func popcntXorSlice(s, m []uint64) uint64 { var cnt int + // this explicit check eliminates a bounds check in the loop + if len(m) < len(s) { + panic("mask slice is too short") + } for i := range s { cnt += bits.OnesCount64(s[i] ^ m[i]) } diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go index 4cf64f24..116e0444 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go @@ -1,5 +1,5 @@ -// +build !go1.9 -// +build amd64,!appengine +//go:build !go1.9 && amd64 && !appengine +// +build !go1.9,amd64,!appengine package bitset diff --git a/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go b/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go index 21e0ff7b..9e0ad464 100644 --- a/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go +++ b/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go @@ -1,3 +1,4 @@ +//go:build !go1.9 && (!amd64 || appengine) // +build !go1.9 // +build !amd64 appengine diff --git a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go index c52b61be..12336e76 100644 --- a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go +++ b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go @@ -1,3 +1,4 @@ +//go:build !go1.9 // +build !go1.9 package bitset diff --git a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go index 36a988e7..cfb0a840 100644 --- a/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go +++ b/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go @@ -1,3 +1,4 @@ +//go:build go1.9 // +build go1.9 package bitset diff --git a/vendor/modules.txt b/vendor/modules.txt index 0fbb595e..862d9b0e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -20,7 +20,7 @@ github.com/Azure/azure-sdk-for-go/sdk/internal/uuid ## explicit; go 1.16 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/internal -# github.com/RoaringBitmap/roaring v1.2.3 +# github.com/RoaringBitmap/roaring v1.9.0 ## explicit; go 1.14 github.com/RoaringBitmap/roaring github.com/RoaringBitmap/roaring/internal @@ -72,8 +72,8 @@ github.com/aws/aws-sdk-go/service/sts/stsiface # github.com/beorn7/perks v1.0.1 ## explicit; go 1.11 github.com/beorn7/perks/quantile -# github.com/bits-and-blooms/bitset v1.2.0 -## explicit; go 1.14 +# github.com/bits-and-blooms/bitset v1.12.0 +## explicit; go 1.16 github.com/bits-and-blooms/bitset # github.com/cespare/xxhash/v2 v2.2.0 ## explicit; go 1.11