diff options
Diffstat (limited to 'vendor/github.com')
102 files changed, 3715 insertions, 4827 deletions
diff --git a/vendor/github.com/RoaringBitmap/roaring/.drone.yml b/vendor/github.com/RoaringBitmap/roaring/.drone.yml new file mode 100644 index 0000000000..698cd0e7a7 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/.drone.yml @@ -0,0 +1,20 @@ +kind: pipeline +name: default + +workspace: + base: /go + path: src/github.com/RoaringBitmap/roaring + +steps: +- name: test + image: golang + commands: + - go get -t + - go test + - go test -race -run TestConcurrent* + - go build -tags appengine + - go test -tags appengine + - GOARCH=386 go build + - GOARCH=386 go test + - GOARCH=arm go build + - GOARCH=arm64 go build diff --git a/vendor/github.com/RoaringBitmap/roaring/.travis.yml b/vendor/github.com/RoaringBitmap/roaring/.travis.yml index 1fdcf3e057..8839c14fd0 100644 --- a/vendor/github.com/RoaringBitmap/roaring/.travis.yml +++ b/vendor/github.com/RoaringBitmap/roaring/.travis.yml @@ -8,10 +8,12 @@ install: notifications: email: false go: -- 1.7.x -- 1.8.x -- 1.9.x -- 1.10.x +- "1.7.x" +- "1.8.x" +- "1.9.x" +- "1.10.x" +- "1.11.x" +- "1.12.x" - tip # whitelist @@ -21,10 +23,14 @@ branches: script: - goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test - go test -race -run TestConcurrent* +- go build -tags appengine +- go test -tags appengine - GOARCH=arm64 go build - GOARCH=386 go build - GOARCH=386 go test - GOARCH=arm go build +- GOARCH=arm64 go build + matrix: allow_failures: - go: tip diff --git a/vendor/github.com/RoaringBitmap/roaring/AUTHORS b/vendor/github.com/RoaringBitmap/roaring/AUTHORS index 08c074047f..26ec99de9d 100644 --- a/vendor/github.com/RoaringBitmap/roaring/AUTHORS +++ b/vendor/github.com/RoaringBitmap/roaring/AUTHORS @@ -7,4 +7,5 @@ Bob Potter (@bpot), Tyson Maly (@tvmaly), Will Glynn (@willglynn), Brent Pedersen (@brentp) -Maciej Biłas (@maciej) +Maciej Biłas (@maciej), +Joe Nall (@joenall) diff --git a/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS b/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS index 70b4735dad..b1e3a379f0 100644 --- a/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS +++ b/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS @@ -9,4 +9,8 @@ Will Glynn (@willglynn), Brent Pedersen (@brentp), Jason E. Aten (@glycerine), Vali Malinoiu (@0x4139), -Forud Ghafouri (@fzerorubigd)
\ No newline at end of file +Forud Ghafouri (@fzerorubigd), +Joe Nall (@joenall), +(@fredim), +Edd Robinson (@e-dard), +Alexander Petrov (@alldroll) diff --git a/vendor/github.com/RoaringBitmap/roaring/LICENSE b/vendor/github.com/RoaringBitmap/roaring/LICENSE index aff5f9999b..3ccdd00084 100644 --- a/vendor/github.com/RoaringBitmap/roaring/LICENSE +++ b/vendor/github.com/RoaringBitmap/roaring/LICENSE @@ -200,3 +200,36 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +================================================================================ + +Portions of runcontainer.go are from the Go standard library, which is licensed +under: + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/RoaringBitmap/roaring/Makefile b/vendor/github.com/RoaringBitmap/roaring/Makefile index d5259b4c9e..906bd72569 100644 --- a/vendor/github.com/RoaringBitmap/roaring/Makefile +++ b/vendor/github.com/RoaringBitmap/roaring/Makefile @@ -1,4 +1,4 @@ -.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets +.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets @@ -63,7 +63,7 @@ qa: fmtcheck test vet lint # Get the dependencies deps: - GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey + GOPATH=$(GOPATH) go get github.com/stretchr/testify GOPATH=$(GOPATH) go get github.com/willf/bitset GOPATH=$(GOPATH) go get github.com/golang/lint/golint GOPATH=$(GOPATH) go get github.com/mschoch/smat @@ -97,18 +97,8 @@ nuke: rm -rf ./target GOPATH=$(GOPATH) go clean -i ./... -rle: - cp rle.go rle16.go - perl -pi -e 's/32/16/g' rle16.go - cp rle_test.go rle16_test.go - perl -pi -e 's/32/16/g' rle16_test.go -backrle: - cp rle16.go rle.go - perl -pi -e 's/16/32/g' rle.go - perl -pi -e 's/2032/2016/g' rle.go - -ser: rle +ser: go generate cover: diff --git a/vendor/github.com/RoaringBitmap/roaring/README.md b/vendor/github.com/RoaringBitmap/roaring/README.md index 2c096ce8e6..b711d09ec2 100644 --- a/vendor/github.com/RoaringBitmap/roaring/README.md +++ b/vendor/github.com/RoaringBitmap/roaring/README.md @@ -1,4 +1,5 @@ roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) +[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring) ============= This is a go version of the Roaring bitmap data structure. @@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure. Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and -[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. +[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. [lucene]: https://lucene.apache.org/ [solr]: https://lucene.apache.org/solr/ [elasticsearch]: https://www.elastic.co/products/elasticsearch -[druid]: http://druid.io/ +[druid]: https://druid.apache.org/ [spark]: https://spark.apache.org/ [opensearchserver]: http://www.opensearchserver.com [cloudtorrent]: https://github.com/jpillora/cloud-torrent @@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r Dependencies are fetched automatically by giving the `-t` flag to `go get`. they include - - github.com/smartystreets/goconvey/convey - github.com/willf/bitset - github.com/mschoch/smat - github.com/glycerine/go-unsnap-stream @@ -133,6 +133,7 @@ func main() { if rb1.Equals(newrb) { fmt.Println("I wrote the content to a byte stream and read it back.") } + // you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator() } ``` @@ -206,7 +207,7 @@ You can use roaring with gore: - go get -u github.com/motemen/gore - Make sure that ``$GOPATH/bin`` is in your ``$PATH``. -- go get github/RoaringBitmap/roaring +- go get github.com/RoaringBitmap/roaring ```go $ gore diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go index c395868210..621616f5df 100644 --- a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go @@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin } } -func (ac *arrayContainer) getShortIterator() shortIterable { +func (ac *arrayContainer) getShortIterator() shortPeekable { return &shortIterator{ac.content, 0} } +func (ac *arrayContainer) getReverseIterator() shortIterable { + return &reverseIterator{ac.content, len(ac.content) - 1} +} + func (ac *arrayContainer) getManyIterator() manyIterable { - return &manyIterator{ac.content, 0} + return &shortIterator{ac.content, 0} } func (ac *arrayContainer) minimum() uint16 { @@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container { // flip the values in the range [firstOfRange,endx) func (ac *arrayContainer) not(firstOfRange, endx int) container { if firstOfRange >= endx { - //p("arrayContainer.not(): exiting early with ac.clone()") return ac.clone() } return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] @@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container { // flip the values in the range [firstOfRange,lastOfRange] func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] - //p("arrayContainer.notClose(): exiting early with ac.clone()") return ac.clone() } // determine the span of array indices to be affected^M startIndex := binarySearch(ac.content, uint16(firstOfRange)) - //p("startIndex=%v", startIndex) if startIndex < 0 { startIndex = -startIndex - 1 } lastIndex := binarySearch(ac.content, uint16(lastOfRange)) - //p("lastIndex=%v", lastIndex) if lastIndex < 0 { lastIndex = -lastIndex - 2 } @@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { newValuesInRange := spanToBeFlipped - currentValuesInRange cardinalityChange := newValuesInRange - currentValuesInRange newCardinality := len(ac.content) + cardinalityChange - //p("new card is %v", newCardinality) if newCardinality > arrayDefaultMaxSize { - //p("new card over arrayDefaultMaxSize, so returning bitmap") return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) } answer := newArrayContainer() @@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container { } func (ac *arrayContainer) and(a container) container { - //p("ac.and() called") switch x := a.(type) { case *arrayContainer: return ac.andArray(x) @@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container { return ac.iandBitmap(x) case *runContainer16: if x.isFull() { - return ac.clone() + return ac } return x.andArray(ac) } @@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container { // flip the values in the range [firstOfRange,lastOfRange] func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { - //p("ac.inotClose() starting") if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] return ac } @@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { if cardinalityChange > 0 { if newCardinality > len(ac.content) { if newCardinality > arrayDefaultMaxSize { - //p("ac.inotClose() converting to bitmap and doing inot there") bcRet := ac.toBitmapContainer() bcRet.inot(firstOfRange, lastOfRange+1) *ac = *bcRet.toArrayContainer() @@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { } } ac.content = ac.content[:newCardinality] - //p("bottom of ac.inotClose(): returning ac") return ac } @@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container { func (ac *arrayContainer) containerType() contype { return arrayContype } + +func (ac *arrayContainer) addOffset(x uint16) []container { + low := &arrayContainer{} + high := &arrayContainer{} + for _, val := range ac.content { + y := uint32(val) + uint32(x) + if highbits(y) > 0 { + high.content = append(high.content, lowbits(y)) + } else { + low.content = append(low.content, lowbits(y)) + } + } + return []container{low, high} +} diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go index cba6e53e30..6ee670ee51 100644 --- a/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go @@ -6,7 +6,7 @@ package roaring import "github.com/tinylib/msgp/msgp" -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 1 // write "content" @@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 1 @@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *arrayContainer) Msgsize() (s int) { s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) return diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go index 5e58b31f2b..e749721bb2 100644 --- a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go @@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool { return bcsi.i >= 0 } +func (bcsi *bitmapContainerShortIterator) peekNext() uint16 { + return uint16(bcsi.i) +} + +func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) { + if bcsi.hasNext() && bcsi.peekNext() < minval { + bcsi.i = bcsi.ptr.NextSetBit(int(minval)) + } +} + func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { return &bitmapContainerShortIterator{a, a.NextSetBit(0)} } -func (bc *bitmapContainer) getShortIterator() shortIterable { +func (bc *bitmapContainer) getShortIterator() shortPeekable { return newBitmapContainerShortIterator(bc) } +type reverseBitmapContainerShortIterator struct { + ptr *bitmapContainer + i int +} + +func (bcsi *reverseBitmapContainerShortIterator) next() uint16 { + if bcsi.i == -1 { + panic("reverseBitmapContainerShortIterator.next() going beyond what is available") + } + + j := bcsi.i + bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1) + return uint16(j) +} + +func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool { + return bcsi.i >= 0 +} + +func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator { + if a.cardinality == 0 { + return &reverseBitmapContainerShortIterator{a, -1} + } + return &reverseBitmapContainerShortIterator{a, int(a.maximum())} +} + +func (bc *bitmapContainer) getReverseIterator() shortIterable { + return newReverseBitmapContainerShortIterator(bc) +} + type bitmapContainerManyIterator struct { ptr *bitmapContainer base int @@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { for n < len(buf) { if bitset == 0 { - base += 1 + base++ if base >= len(bcmi.ptr.bitmap) { bcmi.base = base bcmi.bitset = bitset @@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int { func bitmapEquals(a, b []uint64) bool { if len(a) != len(b) { - //p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b)) return false } for i, v := range a { if v != b[i] { - //p("bitmaps differ on element i=%v", i) return false } } - //p("bitmapEquals returning true") return true } @@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui func (bc *bitmapContainer) equals(o container) bool { srb, ok := o.(*bitmapContainer) if ok { - //p("bitmapContainers.equals: both are bitmapContainers") if srb.cardinality != bc.cardinality { - //p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality) return false } return bitmapEquals(bc.bitmap, srb.bitmap) @@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container { // iremove returns true if i was found. func (bc *bitmapContainer) iremove(i uint16) bool { - /* branchless code - w := bc.bitmap[i>>6] - mask := uint64(1) << (i % 64) - neww := w &^ mask - bc.cardinality -= int((w ^ neww) >> (i % 64)) - bc.bitmap[i>>6] = neww */ if bc.contains(i) { bc.cardinality-- bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) @@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container // flip all values in range [firstOfRange,endx) func (bc *bitmapContainer) inot(firstOfRange, endx int) container { - p("bc.inot() called with [%v, %v)", firstOfRange, endx) if endx-firstOfRange == maxCapacity { - //p("endx-firstOfRange == maxCapacity") flipBitmapRange(bc.bitmap, firstOfRange, endx) bc.cardinality = maxCapacity - bc.cardinality - //p("bc.cardinality is now %v", bc.cardinality) } else if endx-firstOfRange > maxCapacity/2 { - //p("endx-firstOfRange > maxCapacity/2") flipBitmapRange(bc.bitmap, firstOfRange, endx) bc.computeCardinality() } else { @@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container { func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { answer := bc c := value2.getCardinality() - for k := 0; k < c; k++ { + for k := 0; k+3 < c; k += 4 { + content := (*[4]uint16)(unsafe.Pointer(&value2.content[k])) + vc0 := content[0] + i0 := uint(vc0) >> 6 + answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64)) + + vc1 := content[1] + i1 := uint(vc1) >> 6 + answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64)) + + vc2 := content[2] + i2 := uint(vc2) >> 6 + answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64)) + + vc3 := content[3] + i3 := uint(vc3) >> 6 + answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64)) + } + + for k := c &^ 3; k < c; k++ { vc := value2.content[k] i := uint(vc) >> 6 answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) } + answer.cardinality = invalidCardinality return answer } @@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container { } func (bc *bitmapContainer) iandNot(a container) container { - //p("bitmapContainer.iandNot() starting") - switch x := a.(type) { case *arrayContainer: return bc.iandNotArray(x) @@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container { return ac } -func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer { +func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container { newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) for k := 0; k < len(bc.bitmap); k++ { bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] } bc.cardinality = newCardinality + if bc.getCardinality() <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } return bc } @@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int { return -1 } +func (bc *bitmapContainer) PrevSetBit(i int) int { + if i < 0 { + return -1 + } + x := i / 64 + if x >= len(bc.bitmap) { + return -1 + } + + w := bc.bitmap[x] + + b := i % 64 + + w = w << uint(63-b) + if w != 0 { + return i - countLeadingZeros(w) + } + x-- + for ; x >= 0; x-- { + if bc.bitmap[x] != 0 { + return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x]) + } + } + return -1 +} + // reference the java implementation // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 // @@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer { func (bc *bitmapContainer) containerType() contype { return bitmapContype } + +func (bc *bitmapContainer) addOffset(x uint16) []container { + low := newBitmapContainer() + high := newBitmapContainer() + b := uint32(x) >> 6 + i := uint32(x) % 64 + end := uint32(1024) - b + if i == 0 { + copy(low.bitmap[b:], bc.bitmap[:end]) + copy(high.bitmap[:b], bc.bitmap[end:]) + } else { + low.bitmap[b] = bc.bitmap[0] << i + for k := uint32(1); k < end; k++ { + newval := bc.bitmap[k] << i + if newval == 0 { + newval = bc.bitmap[k-1] >> (64 - i) + } + low.bitmap[b+k] = newval + } + for k := end; k < 1024; k++ { + newval := bc.bitmap[k] << i + if newval == 0 { + newval = bc.bitmap[k-1] >> (64 - i) + } + high.bitmap[k-end] = newval + } + high.bitmap[b] = bc.bitmap[1023] >> (64 - i) + } + low.computeCardinality() + high.computeCardinality() + return []container{low, high} +} diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go index f6c053e650..9b5a465f38 100644 --- a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go @@ -6,7 +6,7 @@ package roaring import "github.com/tinylib/msgp/msgp" -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "cardinality" @@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 @@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *bitmapContainer) Msgsize() (s int) { s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "ptr" @@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 @@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *bitmapContainerShortIterator) Msgsize() (s int) { s = 1 + 4 if z.ptr == nil { diff --git a/vendor/github.com/RoaringBitmap/roaring/byte_input.go b/vendor/github.com/RoaringBitmap/roaring/byte_input.go new file mode 100644 index 0000000000..f7a98a1d40 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/byte_input.go @@ -0,0 +1,161 @@ +package roaring + +import ( + "encoding/binary" + "io" +) + +type byteInput interface { + // next returns a slice containing the next n bytes from the buffer, + // advancing the buffer as if the bytes had been returned by Read. + next(n int) ([]byte, error) + // readUInt32 reads uint32 with LittleEndian order + readUInt32() (uint32, error) + // readUInt16 reads uint16 with LittleEndian order + readUInt16() (uint16, error) + // getReadBytes returns read bytes + getReadBytes() int64 + // skipBytes skips exactly n bytes + skipBytes(n int) error +} + +func newByteInputFromReader(reader io.Reader) byteInput { + return &byteInputAdapter{ + r: reader, + readBytes: 0, + } +} + +func newByteInput(buf []byte) byteInput { + return &byteBuffer{ + buf: buf, + off: 0, + } +} + +type byteBuffer struct { + buf []byte + off int +} + +// next returns a slice containing the next n bytes from the reader +// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned +func (b *byteBuffer) next(n int) ([]byte, error) { + m := len(b.buf) - b.off + + if n > m { + return nil, io.ErrUnexpectedEOF + } + + data := b.buf[b.off : b.off+n] + b.off += n + + return data, nil +} + +// readUInt32 reads uint32 with LittleEndian order +func (b *byteBuffer) readUInt32() (uint32, error) { + if len(b.buf)-b.off < 4 { + return 0, io.ErrUnexpectedEOF + } + + v := binary.LittleEndian.Uint32(b.buf[b.off:]) + b.off += 4 + + return v, nil +} + +// readUInt16 reads uint16 with LittleEndian order +func (b *byteBuffer) readUInt16() (uint16, error) { + if len(b.buf)-b.off < 2 { + return 0, io.ErrUnexpectedEOF + } + + v := binary.LittleEndian.Uint16(b.buf[b.off:]) + b.off += 2 + + return v, nil +} + +// getReadBytes returns read bytes +func (b *byteBuffer) getReadBytes() int64 { + return int64(b.off) +} + +// skipBytes skips exactly n bytes +func (b *byteBuffer) skipBytes(n int) error { + m := len(b.buf) - b.off + + if n > m { + return io.ErrUnexpectedEOF + } + + b.off += n + + return nil +} + +// reset resets the given buffer with a new byte slice +func (b *byteBuffer) reset(buf []byte) { + b.buf = buf + b.off = 0 +} + +type byteInputAdapter struct { + r io.Reader + readBytes int +} + +// next returns a slice containing the next n bytes from the buffer, +// advancing the buffer as if the bytes had been returned by Read. +func (b *byteInputAdapter) next(n int) ([]byte, error) { + buf := make([]byte, n) + m, err := io.ReadAtLeast(b.r, buf, n) + b.readBytes += m + + if err != nil { + return nil, err + } + + return buf, nil +} + +// readUInt32 reads uint32 with LittleEndian order +func (b *byteInputAdapter) readUInt32() (uint32, error) { + buf, err := b.next(4) + + if err != nil { + return 0, err + } + + return binary.LittleEndian.Uint32(buf), nil +} + +// readUInt16 reads uint16 with LittleEndian order +func (b *byteInputAdapter) readUInt16() (uint16, error) { + buf, err := b.next(2) + + if err != nil { + return 0, err + } + + return binary.LittleEndian.Uint16(buf), nil +} + +// getReadBytes returns read bytes +func (b *byteInputAdapter) getReadBytes() int64 { + return int64(b.readBytes) +} + +// skipBytes skips exactly n bytes +func (b *byteInputAdapter) skipBytes(n int) error { + _, err := b.next(n) + + return err +} + +// reset resets the given buffer with a new stream +func (b *byteInputAdapter) reset(stream io.Reader) { + b.r = stream + b.readBytes = 0 +} diff --git a/vendor/github.com/RoaringBitmap/roaring/clz.go b/vendor/github.com/RoaringBitmap/roaring/clz.go new file mode 100644 index 0000000000..bcd80d32f0 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/clz.go @@ -0,0 +1,11 @@ +// +build go1.9 +// "go1.9", from Go version 1.9 onward +// See https://golang.org/pkg/go/build/#hdr-Build_Constraints + +package roaring + +import "math/bits" + +func countLeadingZeros(x uint64) int { + return bits.LeadingZeros64(x) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/clz_compat.go b/vendor/github.com/RoaringBitmap/roaring/clz_compat.go new file mode 100644 index 0000000000..eeef4de35b --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/clz_compat.go @@ -0,0 +1,36 @@ +// +build !go1.9 + +package roaring + +// LeadingZeroBits returns the number of consecutive most significant zero +// bits of x. +func countLeadingZeros(i uint64) int { + if i == 0 { + return 64 + } + n := 1 + x := uint32(i >> 32) + if x == 0 { + n += 32 + x = uint32(i) + } + if (x >> 16) == 0 { + n += 16 + x <<= 16 + } + if (x >> 24) == 0 { + n += 8 + x <<= 8 + } + if x>>28 == 0 { + n += 4 + x <<= 4 + } + if x>>30 == 0 { + n += 2 + x <<= 2 + + } + n -= int(x >> 31) + return n +} diff --git a/vendor/github.com/RoaringBitmap/roaring/go.mod b/vendor/github.com/RoaringBitmap/roaring/go.mod new file mode 100644 index 0000000000..f5aebf3967 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/go.mod @@ -0,0 +1,16 @@ +module github.com/RoaringBitmap/roaring + +go 1.12 + +require ( + github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 + github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect + github.com/golang/snappy v0.0.1 // indirect + github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect + github.com/jtolds/gls v4.20.0+incompatible // indirect + github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae + github.com/philhofer/fwd v1.0.0 // indirect + github.com/stretchr/testify v1.4.0 + github.com/tinylib/msgp v1.1.0 + github.com/willf/bitset v1.1.10 +) diff --git a/vendor/github.com/RoaringBitmap/roaring/go.sum b/vendor/github.com/RoaringBitmap/roaring/go.sum new file mode 100644 index 0000000000..2e27dbb6e6 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/go.sum @@ -0,0 +1,30 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= +github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= +github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= +github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= +github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= +github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= +github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ= +github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= +github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= +github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= +github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/vendor/github.com/RoaringBitmap/roaring/manyiterator.go b/vendor/github.com/RoaringBitmap/roaring/manyiterator.go index b4f630a7b4..3007563775 100644 --- a/vendor/github.com/RoaringBitmap/roaring/manyiterator.go +++ b/vendor/github.com/RoaringBitmap/roaring/manyiterator.go @@ -4,12 +4,7 @@ type manyIterable interface { nextMany(hs uint32, buf []uint32) int } -type manyIterator struct { - slice []uint16 - loc int -} - -func (si *manyIterator) nextMany(hs uint32, buf []uint32) int { +func (si *shortIterator) nextMany(hs uint32, buf []uint32) int { n := 0 l := si.loc s := si.slice diff --git a/vendor/github.com/RoaringBitmap/roaring/parallel.go b/vendor/github.com/RoaringBitmap/roaring/parallel.go index 09f94fe83c..2af1aed48e 100644 --- a/vendor/github.com/RoaringBitmap/roaring/parallel.go +++ b/vendor/github.com/RoaringBitmap/roaring/parallel.go @@ -143,8 +143,8 @@ func toBitmapContainer(c container) container { func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { expectedKeys := -1 appendedKeys := 0 - keys := make([]uint16, 0) - containers := make([]container, 0) + var keys []uint16 + var containers []container for appendedKeys != expectedKeys { select { case item := <-resultChan: @@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { // (if it is set to 0, a default number of workers is chosen) func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { var lKey uint16 = MaxUint16 - var hKey uint16 = 0 + var hKey uint16 bitmapsFiltered := bitmaps[:0] for _, b := range bitmaps { diff --git a/vendor/github.com/RoaringBitmap/roaring/rle.go b/vendor/github.com/RoaringBitmap/roaring/rle.go deleted file mode 100644 index 8f3d4edd68..0000000000 --- a/vendor/github.com/RoaringBitmap/roaring/rle.go +++ /dev/null @@ -1,1667 +0,0 @@ -package roaring - -// -// Copyright (c) 2016 by the roaring authors. -// Licensed under the Apache License, Version 2.0. -// -// We derive a few lines of code from the sort.Search -// function in the golang standard library. That function -// is Copyright 2009 The Go Authors, and licensed -// under the following BSD-style license. -/* -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -import ( - "fmt" - "sort" - "unsafe" -) - -//go:generate msgp -unexported - -// runContainer32 does run-length encoding of sets of -// uint32 integers. -type runContainer32 struct { - iv []interval32 - card int64 - - // avoid allocation during search - myOpts searchOptions `msg:"-"` -} - -// interval32 is the internal to runContainer32 -// structure that maintains the individual [Start, last] -// closed intervals. -type interval32 struct { - start uint32 - last uint32 -} - -// runlen returns the count of integers in the interval. -func (iv interval32) runlen() int64 { - return 1 + int64(iv.last) - int64(iv.start) -} - -// String produces a human viewable string of the contents. -func (iv interval32) String() string { - return fmt.Sprintf("[%d, %d]", iv.start, iv.last) -} - -func ivalString32(iv []interval32) string { - var s string - var j int - var p interval32 - for j, p = range iv { - s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last) - } - return s -} - -// String produces a human viewable string of the contents. -func (rc *runContainer32) String() string { - if len(rc.iv) == 0 { - return "runContainer32{}" - } - is := ivalString32(rc.iv) - return `runContainer32{` + is + `}` -} - -// uint32Slice is a sort.Sort convenience method -type uint32Slice []uint32 - -// Len returns the length of p. -func (p uint32Slice) Len() int { return len(p) } - -// Less returns p[i] < p[j] -func (p uint32Slice) Less(i, j int) bool { return p[i] < p[j] } - -// Swap swaps elements i and j. -func (p uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -//msgp:ignore addHelper - -// addHelper helps build a runContainer32. -type addHelper32 struct { - runstart uint32 - runlen uint32 - actuallyAdded uint32 - m []interval32 - rc *runContainer32 -} - -func (ah *addHelper32) storeIval(runstart, runlen uint32) { - mi := interval32{start: runstart, last: runstart + runlen} - ah.m = append(ah.m, mi) -} - -func (ah *addHelper32) add(cur, prev uint32, i int) { - if cur == prev+1 { - ah.runlen++ - ah.actuallyAdded++ - } else { - if cur < prev { - panic(fmt.Sprintf("newRunContainer32FromVals sees "+ - "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+ - " before calling us with alreadySorted == true.", i, cur, prev)) - } - if cur == prev { - // ignore duplicates - } else { - ah.actuallyAdded++ - ah.storeIval(ah.runstart, ah.runlen) - ah.runstart = cur - ah.runlen = 0 - } - } -} - -// newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast] -func newRunContainer32Range(rangestart uint32, rangelast uint32) *runContainer32 { - rc := &runContainer32{} - rc.iv = append(rc.iv, interval32{start: rangestart, last: rangelast}) - return rc -} - -// newRunContainer32FromVals makes a new container from vals. -// -// For efficiency, vals should be sorted in ascending order. -// Ideally vals should not contain duplicates, but we detect and -// ignore them. If vals is already sorted in ascending order, then -// pass alreadySorted = true. Otherwise, for !alreadySorted, -// we will sort vals before creating a runContainer32 of them. -// We sort the original vals, so this will change what the -// caller sees in vals as a side effect. -func newRunContainer32FromVals(alreadySorted bool, vals ...uint32) *runContainer32 { - // keep this in sync with newRunContainer32FromArray below - - rc := &runContainer32{} - ah := addHelper32{rc: rc} - - if !alreadySorted { - sort.Sort(uint32Slice(vals)) - } - n := len(vals) - var cur, prev uint32 - switch { - case n == 0: - // nothing more - case n == 1: - ah.m = append(ah.m, interval32{start: vals[0], last: vals[0]}) - ah.actuallyAdded++ - default: - ah.runstart = vals[0] - ah.actuallyAdded++ - for i := 1; i < n; i++ { - prev = vals[i-1] - cur = vals[i] - ah.add(cur, prev, i) - } - ah.storeIval(ah.runstart, ah.runlen) - } - rc.iv = ah.m - rc.card = int64(ah.actuallyAdded) - return rc -} - -// newRunContainer32FromBitmapContainer makes a new run container from bc, -// somewhat efficiently. For reference, see the Java -// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192 -func newRunContainer32FromBitmapContainer(bc *bitmapContainer) *runContainer32 { - - rc := &runContainer32{} - nbrRuns := bc.numberOfRuns() - if nbrRuns == 0 { - return rc - } - rc.iv = make([]interval32, nbrRuns) - - longCtr := 0 // index of current long in bitmap - curWord := bc.bitmap[0] // its value - runCount := 0 - for { - // potentially multiword advance to first 1 bit - for curWord == 0 && longCtr < len(bc.bitmap)-1 { - longCtr++ - curWord = bc.bitmap[longCtr] - } - - if curWord == 0 { - // wrap up, no more runs - return rc - } - localRunStart := countTrailingZeros(curWord) - runStart := localRunStart + 64*longCtr - // stuff 1s into number's LSBs - curWordWith1s := curWord | (curWord - 1) - - // find the next 0, potentially in a later word - runEnd := 0 - for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 { - longCtr++ - curWordWith1s = bc.bitmap[longCtr] - } - - if curWordWith1s == maxWord { - // a final unterminated run of 1s - runEnd = wordSizeInBits + longCtr*64 - rc.iv[runCount].start = uint32(runStart) - rc.iv[runCount].last = uint32(runEnd) - 1 - return rc - } - localRunEnd := countTrailingZeros(^curWordWith1s) - runEnd = localRunEnd + longCtr*64 - rc.iv[runCount].start = uint32(runStart) - rc.iv[runCount].last = uint32(runEnd) - 1 - runCount++ - // now, zero out everything right of runEnd. - curWord = curWordWith1s & (curWordWith1s + 1) - // We've lathered and rinsed, so repeat... - } - -} - -// -// newRunContainer32FromArray populates a new -// runContainer32 from the contents of arr. -// -func newRunContainer32FromArray(arr *arrayContainer) *runContainer32 { - // keep this in sync with newRunContainer32FromVals above - - rc := &runContainer32{} - ah := addHelper32{rc: rc} - - n := arr.getCardinality() - var cur, prev uint32 - switch { - case n == 0: - // nothing more - case n == 1: - ah.m = append(ah.m, interval32{start: uint32(arr.content[0]), last: uint32(arr.content[0])}) - ah.actuallyAdded++ - default: - ah.runstart = uint32(arr.content[0]) - ah.actuallyAdded++ - for i := 1; i < n; i++ { - prev = uint32(arr.content[i-1]) - cur = uint32(arr.content[i]) - ah.add(cur, prev, i) - } - ah.storeIval(ah.runstart, ah.runlen) - } - rc.iv = ah.m - rc.card = int64(ah.actuallyAdded) - return rc -} - -// set adds the integers in vals to the set. Vals -// must be sorted in increasing order; if not, you should set -// alreadySorted to false, and we will sort them in place for you. -// (Be aware of this side effect -- it will affect the callers -// view of vals). -// -// If you have a small number of additions to an already -// big runContainer32, calling Add() may be faster. -func (rc *runContainer32) set(alreadySorted bool, vals ...uint32) { - - rc2 := newRunContainer32FromVals(alreadySorted, vals...) - un := rc.union(rc2) - rc.iv = un.iv - rc.card = 0 -} - -// canMerge returns true if the intervals -// a and b either overlap or they are -// contiguous and so can be merged into -// a single interval. -func canMerge32(a, b interval32) bool { - if int64(a.last)+1 < int64(b.start) { - return false - } - return int64(b.last)+1 >= int64(a.start) -} - -// haveOverlap differs from canMerge in that -// it tells you if the intersection of a -// and b would contain an element (otherwise -// it would be the empty set, and we return -// false). -func haveOverlap32(a, b interval32) bool { - if int64(a.last)+1 <= int64(b.start) { - return false - } - return int64(b.last)+1 > int64(a.start) -} - -// mergeInterval32s joins a and b into a -// new interval, and panics if it cannot. -func mergeInterval32s(a, b interval32) (res interval32) { - if !canMerge32(a, b) { - panic(fmt.Sprintf("cannot merge %#v and %#v", a, b)) - } - if b.start < a.start { - res.start = b.start - } else { - res.start = a.start - } - if b.last > a.last { - res.last = b.last - } else { - res.last = a.last - } - return -} - -// intersectInterval32s returns the intersection -// of a and b. The isEmpty flag will be true if -// a and b were disjoint. -func intersectInterval32s(a, b interval32) (res interval32, isEmpty bool) { - if !haveOverlap32(a, b) { - isEmpty = true - return - } - if b.start > a.start { - res.start = b.start - } else { - res.start = a.start - } - if b.last < a.last { - res.last = b.last - } else { - res.last = a.last - } - return -} - -// union merges two runContainer32s, producing -// a new runContainer32 with the union of rc and b. -func (rc *runContainer32) union(b *runContainer32) *runContainer32 { - - // rc is also known as 'a' here, but golint insisted we - // call it rc for consistency with the rest of the methods. - - var m []interval32 - - alim := int64(len(rc.iv)) - blim := int64(len(b.iv)) - - var na int64 // next from a - var nb int64 // next from b - - // merged holds the current merge output, which might - // get additional merges before being appended to m. - var merged interval32 - var mergedUsed bool // is merged being used at the moment? - - var cura interval32 // currently considering this interval32 from a - var curb interval32 // currently considering this interval32 from b - - pass := 0 - for na < alim && nb < blim { - pass++ - cura = rc.iv[na] - curb = b.iv[nb] - - if mergedUsed { - mergedUpdated := false - if canMerge32(cura, merged) { - merged = mergeInterval32s(cura, merged) - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - mergedUpdated = true - } - if canMerge32(curb, merged) { - merged = mergeInterval32s(curb, merged) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - mergedUpdated = true - } - if !mergedUpdated { - // we know that merged is disjoint from cura and curb - m = append(m, merged) - mergedUsed = false - } - continue - - } else { - // !mergedUsed - if !canMerge32(cura, curb) { - if cura.start < curb.start { - m = append(m, cura) - na++ - } else { - m = append(m, curb) - nb++ - } - } else { - merged = mergeInterval32s(cura, curb) - mergedUsed = true - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - } - } - } - var aDone, bDone bool - if na >= alim { - aDone = true - } - if nb >= blim { - bDone = true - } - // finish by merging anything remaining into merged we can: - if mergedUsed { - if !aDone { - aAdds: - for na < alim { - cura = rc.iv[na] - if canMerge32(cura, merged) { - merged = mergeInterval32s(cura, merged) - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - } else { - break aAdds - } - } - - } - - if !bDone { - bAdds: - for nb < blim { - curb = b.iv[nb] - if canMerge32(curb, merged) { - merged = mergeInterval32s(curb, merged) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - } else { - break bAdds - } - } - - } - - m = append(m, merged) - } - if na < alim { - m = append(m, rc.iv[na:]...) - } - if nb < blim { - m = append(m, b.iv[nb:]...) - } - - res := &runContainer32{iv: m} - return res -} - -// unionCardinality returns the cardinality of the merger of two runContainer32s, the union of rc and b. -func (rc *runContainer32) unionCardinality(b *runContainer32) uint64 { - - // rc is also known as 'a' here, but golint insisted we - // call it rc for consistency with the rest of the methods. - answer := uint64(0) - - alim := int64(len(rc.iv)) - blim := int64(len(b.iv)) - - var na int64 // next from a - var nb int64 // next from b - - // merged holds the current merge output, which might - // get additional merges before being appended to m. - var merged interval32 - var mergedUsed bool // is merged being used at the moment? - - var cura interval32 // currently considering this interval32 from a - var curb interval32 // currently considering this interval32 from b - - pass := 0 - for na < alim && nb < blim { - pass++ - cura = rc.iv[na] - curb = b.iv[nb] - - if mergedUsed { - mergedUpdated := false - if canMerge32(cura, merged) { - merged = mergeInterval32s(cura, merged) - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - mergedUpdated = true - } - if canMerge32(curb, merged) { - merged = mergeInterval32s(curb, merged) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - mergedUpdated = true - } - if !mergedUpdated { - // we know that merged is disjoint from cura and curb - //m = append(m, merged) - answer += uint64(merged.last) - uint64(merged.start) + 1 - mergedUsed = false - } - continue - - } else { - // !mergedUsed - if !canMerge32(cura, curb) { - if cura.start < curb.start { - answer += uint64(cura.last) - uint64(cura.start) + 1 - //m = append(m, cura) - na++ - } else { - answer += uint64(curb.last) - uint64(curb.start) + 1 - //m = append(m, curb) - nb++ - } - } else { - merged = mergeInterval32s(cura, curb) - mergedUsed = true - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - } - } - } - var aDone, bDone bool - if na >= alim { - aDone = true - } - if nb >= blim { - bDone = true - } - // finish by merging anything remaining into merged we can: - if mergedUsed { - if !aDone { - aAdds: - for na < alim { - cura = rc.iv[na] - if canMerge32(cura, merged) { - merged = mergeInterval32s(cura, merged) - na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) - } else { - break aAdds - } - } - - } - - if !bDone { - bAdds: - for nb < blim { - curb = b.iv[nb] - if canMerge32(curb, merged) { - merged = mergeInterval32s(curb, merged) - nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) - } else { - break bAdds - } - } - - } - - //m = append(m, merged) - answer += uint64(merged.last) - uint64(merged.start) + 1 - } - for _, r := range rc.iv[na:] { - answer += uint64(r.last) - uint64(r.start) + 1 - } - for _, r := range b.iv[nb:] { - answer += uint64(r.last) - uint64(r.start) + 1 - } - return answer -} - -// indexOfIntervalAtOrAfter is a helper for union. -func (rc *runContainer32) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 { - rc.myOpts.startIndex = startIndex - rc.myOpts.endxIndex = 0 - - w, already, _ := rc.search(key, &rc.myOpts) - if already { - return w - } - return w + 1 -} - -// intersect returns a new runContainer32 holding the -// intersection of rc (also known as 'a') and b. -func (rc *runContainer32) intersect(b *runContainer32) *runContainer32 { - - a := rc - numa := int64(len(a.iv)) - numb := int64(len(b.iv)) - res := &runContainer32{} - if numa == 0 || numb == 0 { - return res - } - - if numa == 1 && numb == 1 { - if !haveOverlap32(a.iv[0], b.iv[0]) { - return res - } - } - - var output []interval32 - - var acuri int64 - var bcuri int64 - - astart := int64(a.iv[acuri].start) - bstart := int64(b.iv[bcuri].start) - - var intersection interval32 - var leftoverstart int64 - var isOverlap, isLeftoverA, isLeftoverB bool - var done bool - pass := 0 -toploop: - for acuri < numa && bcuri < numb { - pass++ - - isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last)) - - if !isOverlap { - switch { - case astart < bstart: - acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) - if done { - break toploop - } - astart = int64(a.iv[acuri].start) - - case astart > bstart: - bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) - if done { - break toploop - } - bstart = int64(b.iv[bcuri].start) - - //default: - // panic("impossible that astart == bstart, since !isOverlap") - } - - } else { - // isOverlap - output = append(output, intersection) - switch { - case isLeftoverA: - // note that we change astart without advancing acuri, - // since we need to capture any 2ndary intersections with a.iv[acuri] - astart = leftoverstart - bcuri++ - if bcuri >= numb { - break toploop - } - bstart = int64(b.iv[bcuri].start) - case isLeftoverB: - // note that we change bstart without advancing bcuri, - // since we need to capture any 2ndary intersections with b.iv[bcuri] - bstart = leftoverstart - acuri++ - if acuri >= numa { - break toploop - } - astart = int64(a.iv[acuri].start) - default: - // neither had leftover, both completely consumed - // optionally, assert for sanity: - //if a.iv[acuri].endx != b.iv[bcuri].endx { - // panic("huh? should only be possible that endx agree now!") - //} - - // advance to next a interval - acuri++ - if acuri >= numa { - break toploop - } - astart = int64(a.iv[acuri].start) - - // advance to next b interval - bcuri++ - if bcuri >= numb { - break toploop - } - bstart = int64(b.iv[bcuri].start) - } - } - } // end for toploop - - if len(output) == 0 { - return res - } - - res.iv = output - return res -} - -// intersectCardinality returns the cardinality of the -// intersection of rc (also known as 'a') and b. -func (rc *runContainer32) intersectCardinality(b *runContainer32) int64 { - answer := int64(0) - - a := rc - numa := int64(len(a.iv)) - numb := int64(len(b.iv)) - if numa == 0 || numb == 0 { - return 0 - } - - if numa == 1 && numb == 1 { - if !haveOverlap32(a.iv[0], b.iv[0]) { - return 0 - } - } - - var acuri int64 - var bcuri int64 - - astart := int64(a.iv[acuri].start) - bstart := int64(b.iv[bcuri].start) - - var intersection interval32 - var leftoverstart int64 - var isOverlap, isLeftoverA, isLeftoverB bool - var done bool - pass := 0 -toploop: - for acuri < numa && bcuri < numb { - pass++ - - isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last)) - - if !isOverlap { - switch { - case astart < bstart: - acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) - if done { - break toploop - } - astart = int64(a.iv[acuri].start) - - case astart > bstart: - bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) - if done { - break toploop - } - bstart = int64(b.iv[bcuri].start) - - //default: - // panic("impossible that astart == bstart, since !isOverlap") - } - - } else { - // isOverlap - answer += int64(intersection.last) - int64(intersection.start) + 1 - switch { - case isLeftoverA: - // note that we change astart without advancing acuri, - // since we need to capture any 2ndary intersections with a.iv[acuri] - astart = leftoverstart - bcuri++ - if bcuri >= numb { - break toploop - } - bstart = int64(b.iv[bcuri].start) - case isLeftoverB: - // note that we change bstart without advancing bcuri, - // since we need to capture any 2ndary intersections with b.iv[bcuri] - bstart = leftoverstart - acuri++ - if acuri >= numa { - break toploop - } - astart = int64(a.iv[acuri].start) - default: - // neither had leftover, both completely consumed - // optionally, assert for sanity: - //if a.iv[acuri].endx != b.iv[bcuri].endx { - // panic("huh? should only be possible that endx agree now!") - //} - - // advance to next a interval - acuri++ - if acuri >= numa { - break toploop - } - astart = int64(a.iv[acuri].start) - - // advance to next b interval - bcuri++ - if bcuri >= numb { - break toploop - } - bstart = int64(b.iv[bcuri].start) - } - } - } // end for toploop - - return answer -} - -// get returns true if key is in the container. -func (rc *runContainer32) contains(key uint32) bool { - _, in, _ := rc.search(int64(key), nil) - return in -} - -// numIntervals returns the count of intervals in the container. -func (rc *runContainer32) numIntervals() int { - return len(rc.iv) -} - -// search returns alreadyPresent to indicate if the -// key is already in one of our interval32s. -// -// If key is alreadyPresent, then whichInterval32 tells -// you where. -// -// If key is not already present, then whichInterval32 is -// set as follows: -// -// a) whichInterval32 == len(rc.iv)-1 if key is beyond our -// last interval32 in rc.iv; -// -// b) whichInterval32 == -1 if key is before our first -// interval32 in rc.iv; -// -// c) whichInterval32 is set to the minimum index of rc.iv -// which comes strictly before the key; -// so rc.iv[whichInterval32].last < key, -// and if whichInterval32+1 exists, then key < rc.iv[whichInterval32+1].start -// (Note that whichInterval32+1 won't exist when -// whichInterval32 is the last interval.) -// -// runContainer32.search always returns whichInterval32 < len(rc.iv). -// -// If not nil, opts can be used to further restrict -// the search space. -// -func (rc *runContainer32) search(key int64, opts *searchOptions) (whichInterval32 int64, alreadyPresent bool, numCompares int) { - n := int64(len(rc.iv)) - if n == 0 { - return -1, false, 0 - } - - startIndex := int64(0) - endxIndex := n - if opts != nil { - startIndex = opts.startIndex - - // let endxIndex == 0 mean no effect - if opts.endxIndex > 0 { - endxIndex = opts.endxIndex - } - } - - // sort.Search returns the smallest index i - // in [0, n) at which f(i) is true, assuming that on the range [0, n), - // f(i) == true implies f(i+1) == true. - // If there is no such index, Search returns n. - - // For correctness, this began as verbatim snippet from - // sort.Search in the Go standard lib. - // We inline our comparison function for speed, and - // annotate with numCompares - // to observe and test that extra bounds are utilized. - i, j := startIndex, endxIndex - for i < j { - h := i + (j-i)/2 // avoid overflow when computing h as the bisector - // i <= h < j - numCompares++ - if !(key < int64(rc.iv[h].start)) { - i = h + 1 - } else { - j = h - } - } - below := i - // end std lib snippet. - - // The above is a simple in-lining and annotation of: - /* below := sort.Search(n, - func(i int) bool { - return key < rc.iv[i].start - }) - */ - whichInterval32 = below - 1 - - if below == n { - // all falses => key is >= start of all interval32s - // ... so does it belong to the last interval32? - if key < int64(rc.iv[n-1].last)+1 { - // yes, it belongs to the last interval32 - alreadyPresent = true - return - } - // no, it is beyond the last interval32. - // leave alreadyPreset = false - return - } - - // INVAR: key is below rc.iv[below] - if below == 0 { - // key is before the first first interval32. - // leave alreadyPresent = false - return - } - - // INVAR: key is >= rc.iv[below-1].start and - // key is < rc.iv[below].start - - // is key in below-1 interval32? - if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last)+1 { - // yes, it is. key is in below-1 interval32. - alreadyPresent = true - return - } - - // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start - // leave alreadyPresent = false - return -} - -// cardinality returns the count of the integers stored in the -// runContainer32. -func (rc *runContainer32) cardinality() int64 { - if len(rc.iv) == 0 { - rc.card = 0 - return 0 - } - if rc.card > 0 { - return rc.card // already cached - } - // have to compute it - var n int64 - for _, p := range rc.iv { - n += p.runlen() - } - rc.card = n // cache it - return n -} - -// AsSlice decompresses the contents into a []uint32 slice. -func (rc *runContainer32) AsSlice() []uint32 { - s := make([]uint32, rc.cardinality()) - j := 0 - for _, p := range rc.iv { - for i := p.start; i <= p.last; i++ { - s[j] = i - j++ - } - } - return s -} - -// newRunContainer32 creates an empty run container. -func newRunContainer32() *runContainer32 { - return &runContainer32{} -} - -// newRunContainer32CopyIv creates a run container, initializing -// with a copy of the supplied iv slice. -// -func newRunContainer32CopyIv(iv []interval32) *runContainer32 { - rc := &runContainer32{ - iv: make([]interval32, len(iv)), - } - copy(rc.iv, iv) - return rc -} - -func (rc *runContainer32) Clone() *runContainer32 { - rc2 := newRunContainer32CopyIv(rc.iv) - return rc2 -} - -// newRunContainer32TakeOwnership returns a new runContainer32 -// backed by the provided iv slice, which we will -// assume exclusive control over from now on. -// -func newRunContainer32TakeOwnership(iv []interval32) *runContainer32 { - rc := &runContainer32{ - iv: iv, - } - return rc -} - -const baseRc32Size = int(unsafe.Sizeof(runContainer32{})) -const perIntervalRc32Size = int(unsafe.Sizeof(interval32{})) - -const baseDiskRc32Size = int(unsafe.Sizeof(uint32(0))) - -// see also runContainer32SerializedSizeInBytes(numRuns int) int - -// getSizeInBytes returns the number of bytes of memory -// required by this runContainer32. -func (rc *runContainer32) getSizeInBytes() int { - return perIntervalRc32Size*len(rc.iv) + baseRc32Size -} - -// runContainer32SerializedSizeInBytes returns the number of bytes of disk -// required to hold numRuns in a runContainer32. -func runContainer32SerializedSizeInBytes(numRuns int) int { - return perIntervalRc32Size*numRuns + baseDiskRc32Size -} - -// Add adds a single value k to the set. -func (rc *runContainer32) Add(k uint32) (wasNew bool) { - // TODO comment from runContainer32.java: - // it might be better and simpler to do return - // toBitmapOrArrayContainer(getCardinality()).add(k) - // but note that some unit tests use this method to build up test - // runcontainers without calling runOptimize - - k64 := int64(k) - - index, present, _ := rc.search(k64, nil) - if present { - return // already there - } - wasNew = true - - // increment card if it is cached already - if rc.card > 0 { - rc.card++ - } - n := int64(len(rc.iv)) - if index == -1 { - // we may need to extend the first run - if n > 0 { - if rc.iv[0].start == k+1 { - rc.iv[0].start = k - return - } - } - // nope, k stands alone, starting the new first interval32. - rc.iv = append([]interval32{{start: k, last: k}}, rc.iv...) - return - } - - // are we off the end? handle both index == n and index == n-1: - if index >= n-1 { - if int64(rc.iv[n-1].last)+1 == k64 { - rc.iv[n-1].last++ - return - } - rc.iv = append(rc.iv, interval32{start: k, last: k}) - return - } - - // INVAR: index and index+1 both exist, and k goes between them. - // - // Now: add k into the middle, - // possibly fusing with index or index+1 interval32 - // and possibly resulting in fusing of two interval32s - // that had a one integer gap. - - left := index - right := index + 1 - - // are we fusing left and right by adding k? - if int64(rc.iv[left].last)+1 == k64 && int64(rc.iv[right].start) == k64+1 { - // fuse into left - rc.iv[left].last = rc.iv[right].last - // remove redundant right - rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...) - return - } - - // are we an addition to left? - if int64(rc.iv[left].last)+1 == k64 { - // yes - rc.iv[left].last++ - return - } - - // are we an addition to right? - if int64(rc.iv[right].start) == k64+1 { - // yes - rc.iv[right].start = k - return - } - - // k makes a standalone new interval32, inserted in the middle - tail := append([]interval32{{start: k, last: k}}, rc.iv[right:]...) - rc.iv = append(rc.iv[:left+1], tail...) - return -} - -//msgp:ignore runIterator - -// runIterator32 advice: you must call Next() at least once -// before calling Cur(); and you should call HasNext() -// before calling Next() to insure there are contents. -type runIterator32 struct { - rc *runContainer32 - curIndex int64 - curPosInIndex uint32 - curSeq int64 -} - -// newRunIterator32 returns a new empty run container. -func (rc *runContainer32) newRunIterator32() *runIterator32 { - return &runIterator32{rc: rc, curIndex: -1} -} - -// HasNext returns false if calling Next will panic. It -// returns true when there is at least one more value -// available in the iteration sequence. -func (ri *runIterator32) hasNext() bool { - if len(ri.rc.iv) == 0 { - return false - } - if ri.curIndex == -1 { - return true - } - return ri.curSeq+1 < ri.rc.cardinality() -} - -// cur returns the current value pointed to by the iterator. -func (ri *runIterator32) cur() uint32 { - return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex -} - -// Next returns the next value in the iteration sequence. -func (ri *runIterator32) next() uint32 { - if !ri.hasNext() { - panic("no Next available") - } - if ri.curIndex >= int64(len(ri.rc.iv)) { - panic("runIterator.Next() going beyond what is available") - } - if ri.curIndex == -1 { - // first time is special - ri.curIndex = 0 - } else { - ri.curPosInIndex++ - if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last)+1 { - ri.curPosInIndex = 0 - ri.curIndex++ - } - ri.curSeq++ - } - return ri.cur() -} - -// remove removes the element that the iterator -// is on from the run container. You can use -// Cur if you want to double check what is about -// to be deleted. -func (ri *runIterator32) remove() uint32 { - n := ri.rc.cardinality() - if n == 0 { - panic("runIterator.Remove called on empty runContainer32") - } - cur := ri.cur() - - ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq) - return cur -} - -// remove removes key from the container. -func (rc *runContainer32) removeKey(key uint32) (wasPresent bool) { - - var index int64 - var curSeq int64 - index, wasPresent, _ = rc.search(int64(key), nil) - if !wasPresent { - return // already removed, nothing to do. - } - pos := key - rc.iv[index].start - rc.deleteAt(&index, &pos, &curSeq) - return -} - -// internal helper functions - -func (rc *runContainer32) deleteAt(curIndex *int64, curPosInIndex *uint32, curSeq *int64) { - rc.card-- - (*curSeq)-- - ci := *curIndex - pos := *curPosInIndex - - // are we first, last, or in the middle of our interval32? - switch { - case pos == 0: - if int64(rc.iv[ci].start) == int64(rc.iv[ci].last) { - // our interval disappears - rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...) - // curIndex stays the same, since the delete did - // the advance for us. - *curPosInIndex = 0 - } else { - rc.iv[ci].start++ // no longer overflowable - } - case int64(pos) == rc.iv[ci].runlen()-1: - // last - rc.iv[ci].last-- - // our interval32 cannot disappear, else we would have been pos == 0, case first above. - (*curPosInIndex)-- - // if we leave *curIndex alone, then Next() will work properly even after the delete. - default: - //middle - // split into two, adding an interval32 - new0 := interval32{ - start: rc.iv[ci].start, - last: rc.iv[ci].start + *curPosInIndex - 1} - - new1start := int64(rc.iv[ci].start) + int64(*curPosInIndex) + 1 - if new1start > int64(MaxUint32) { - panic("overflow?!?!") - } - new1 := interval32{ - start: uint32(new1start), - last: rc.iv[ci].last} - tail := append([]interval32{new0, new1}, rc.iv[ci+1:]...) - rc.iv = append(rc.iv[:ci], tail...) - // update curIndex and curPosInIndex - (*curIndex)++ - *curPosInIndex = 0 - } - -} - -func have4Overlap32(astart, alast, bstart, blast int64) bool { - if alast+1 <= bstart { - return false - } - return blast+1 > astart -} - -func intersectWithLeftover32(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval32) { - if !have4Overlap32(astart, alast, bstart, blast) { - return - } - isOverlap = true - - // do the intersection: - if bstart > astart { - intersection.start = uint32(bstart) - } else { - intersection.start = uint32(astart) - } - switch { - case blast < alast: - isLeftoverA = true - leftoverstart = blast + 1 - intersection.last = uint32(blast) - case alast < blast: - isLeftoverB = true - leftoverstart = alast + 1 - intersection.last = uint32(alast) - default: - // alast == blast - intersection.last = uint32(alast) - } - - return -} - -func (rc *runContainer32) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) { - - rc.myOpts.startIndex = startIndex - rc.myOpts.endxIndex = 0 - - w, _, _ := rc.search(key, &rc.myOpts) - // rc.search always returns w < len(rc.iv) - if w < startIndex { - // not found and comes before lower bound startIndex, - // so just use the lower bound. - if startIndex == int64(len(rc.iv)) { - // also this bump up means that we are done - return startIndex, true - } - return startIndex, false - } - - return w, false -} - -func sliceToString32(m []interval32) string { - s := "" - for i := range m { - s += fmt.Sprintf("%v: %s, ", i, m[i]) - } - return s -} - -// selectInt32 returns the j-th value in the container. -// We panic of j is out of bounds. -func (rc *runContainer32) selectInt32(j uint32) int { - n := rc.cardinality() - if int64(j) > n { - panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) - } - - var offset int64 - for k := range rc.iv { - nextOffset := offset + rc.iv[k].runlen() + 1 - if nextOffset > int64(j) { - return int(int64(rc.iv[k].start) + (int64(j) - offset)) - } - offset = nextOffset - } - panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) -} - -// helper for invert -func (rc *runContainer32) invertlastInterval(origin uint32, lastIdx int) []interval32 { - cur := rc.iv[lastIdx] - if cur.last == MaxUint32 { - if cur.start == origin { - return nil // empty container - } - return []interval32{{start: origin, last: cur.start - 1}} - } - if cur.start == origin { - return []interval32{{start: cur.last + 1, last: MaxUint32}} - } - // invert splits - return []interval32{ - {start: origin, last: cur.start - 1}, - {start: cur.last + 1, last: MaxUint32}, - } -} - -// invert returns a new container (not inplace), that is -// the inversion of rc. For each bit b in rc, the -// returned value has !b -func (rc *runContainer32) invert() *runContainer32 { - ni := len(rc.iv) - var m []interval32 - switch ni { - case 0: - return &runContainer32{iv: []interval32{{0, MaxUint32}}} - case 1: - return &runContainer32{iv: rc.invertlastInterval(0, 0)} - } - var invstart int64 - ult := ni - 1 - for i, cur := range rc.iv { - if i == ult { - // invertlastInteval will add both intervals (b) and (c) in - // diagram below. - m = append(m, rc.invertlastInterval(uint32(invstart), i)...) - break - } - // INVAR: i and cur are not the last interval, there is a next at i+1 - // - // ........[cur.start, cur.last] ...... [next.start, next.last].... - // ^ ^ ^ - // (a) (b) (c) - // - // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it. - if cur.start > 0 { - m = append(m, interval32{start: uint32(invstart), last: cur.start - 1}) - } - invstart = int64(cur.last + 1) - } - return &runContainer32{iv: m} -} - -func (iv interval32) equal(b interval32) bool { - if iv.start == b.start { - return iv.last == b.last - } - return false -} - -func (iv interval32) isSuperSetOf(b interval32) bool { - return iv.start <= b.start && b.last <= iv.last -} - -func (iv interval32) subtractInterval(del interval32) (left []interval32, delcount int64) { - isect, isEmpty := intersectInterval32s(iv, del) - - if isEmpty { - return nil, 0 - } - if del.isSuperSetOf(iv) { - return nil, iv.runlen() - } - - switch { - case isect.start > iv.start && isect.last < iv.last: - new0 := interval32{start: iv.start, last: isect.start - 1} - new1 := interval32{start: isect.last + 1, last: iv.last} - return []interval32{new0, new1}, isect.runlen() - case isect.start == iv.start: - return []interval32{{start: isect.last + 1, last: iv.last}}, isect.runlen() - default: - return []interval32{{start: iv.start, last: isect.start - 1}}, isect.runlen() - } -} - -func (rc *runContainer32) isubtract(del interval32) { - origiv := make([]interval32, len(rc.iv)) - copy(origiv, rc.iv) - n := int64(len(rc.iv)) - if n == 0 { - return // already done. - } - - _, isEmpty := intersectInterval32s( - interval32{ - start: rc.iv[0].start, - last: rc.iv[n-1].last, - }, del) - if isEmpty { - return // done - } - // INVAR there is some intersection between rc and del - istart, startAlready, _ := rc.search(int64(del.start), nil) - ilast, lastAlready, _ := rc.search(int64(del.last), nil) - rc.card = -1 - if istart == -1 { - if ilast == n-1 && !lastAlready { - rc.iv = nil - return - } - } - // some intervals will remain - switch { - case startAlready && lastAlready: - res0, _ := rc.iv[istart].subtractInterval(del) - - // would overwrite values in iv b/c res0 can have len 2. so - // write to origiv instead. - lost := 1 + ilast - istart - changeSize := int64(len(res0)) - lost - newSize := int64(len(rc.iv)) + changeSize - - // rc.iv = append(pre, caboose...) - // return - - if ilast != istart { - res1, _ := rc.iv[ilast].subtractInterval(del) - res0 = append(res0, res1...) - changeSize = int64(len(res0)) - lost - newSize = int64(len(rc.iv)) + changeSize - } - switch { - case changeSize < 0: - // shrink - copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:]) - copy(rc.iv[istart:istart+int64(len(res0))], res0) - rc.iv = rc.iv[:newSize] - return - case changeSize == 0: - // stay the same - copy(rc.iv[istart:istart+int64(len(res0))], res0) - return - default: - // changeSize > 0 is only possible when ilast == istart. - // Hence we now know: changeSize == 1 and len(res0) == 2 - rc.iv = append(rc.iv, interval32{}) - // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize] - - // copy the tail into place - copy(rc.iv[ilast+2:], rc.iv[ilast+1:]) - // copy the new item(s) into place - copy(rc.iv[istart:istart+2], res0) - return - } - - case !startAlready && !lastAlready: - // we get to discard whole intervals - - // from the search() definition: - - // if del.start is not present, then istart is - // set as follows: - // - // a) istart == n-1 if del.start is beyond our - // last interval32 in rc.iv; - // - // b) istart == -1 if del.start is before our first - // interval32 in rc.iv; - // - // c) istart is set to the minimum index of rc.iv - // which comes strictly before the del.start; - // so del.start > rc.iv[istart].last, - // and if istart+1 exists, then del.start < rc.iv[istart+1].startx - - // if del.last is not present, then ilast is - // set as follows: - // - // a) ilast == n-1 if del.last is beyond our - // last interval32 in rc.iv; - // - // b) ilast == -1 if del.last is before our first - // interval32 in rc.iv; - // - // c) ilast is set to the minimum index of rc.iv - // which comes strictly before the del.last; - // so del.last > rc.iv[ilast].last, - // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start - - // INVAR: istart >= 0 - pre := rc.iv[:istart+1] - if ilast == n-1 { - rc.iv = pre - return - } - // INVAR: ilast < n-1 - lost := ilast - istart - changeSize := -lost - newSize := int64(len(rc.iv)) + changeSize - if changeSize != 0 { - copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) - } - rc.iv = rc.iv[:newSize] - return - - case startAlready && !lastAlready: - // we can only shrink or stay the same size - // i.e. we either eliminate the whole interval, - // or just cut off the right side. - res0, _ := rc.iv[istart].subtractInterval(del) - if len(res0) > 0 { - // len(res) must be 1 - rc.iv[istart] = res0[0] - } - lost := 1 + (ilast - istart) - changeSize := int64(len(res0)) - lost - newSize := int64(len(rc.iv)) + changeSize - if changeSize != 0 { - copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) - } - rc.iv = rc.iv[:newSize] - return - - case !startAlready && lastAlready: - // we can only shrink or stay the same size - res1, _ := rc.iv[ilast].subtractInterval(del) - lost := ilast - istart - changeSize := int64(len(res1)) - lost - newSize := int64(len(rc.iv)) + changeSize - if changeSize != 0 { - // move the tail first to make room for res1 - copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) - } - copy(rc.iv[istart+1:], res1) - rc.iv = rc.iv[:newSize] - return - } -} - -// compute rc minus b, and return the result as a new value (not inplace). -// port of run_container_andnot from CRoaring... -// https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496 -func (rc *runContainer32) AndNotRunContainer32(b *runContainer32) *runContainer32 { - - if len(b.iv) == 0 || len(rc.iv) == 0 { - return rc - } - - dst := newRunContainer32() - apos := 0 - bpos := 0 - - a := rc - - astart := a.iv[apos].start - alast := a.iv[apos].last - bstart := b.iv[bpos].start - blast := b.iv[bpos].last - - alen := len(a.iv) - blen := len(b.iv) - - for apos < alen && bpos < blen { - switch { - case alast < bstart: - // output the first run - dst.iv = append(dst.iv, interval32{start: astart, last: alast}) - apos++ - if apos < alen { - astart = a.iv[apos].start - alast = a.iv[apos].last - } - case blast < astart: - // exit the second run - bpos++ - if bpos < blen { - bstart = b.iv[bpos].start - blast = b.iv[bpos].last - } - default: - // a: [ ] - // b: [ ] - // alast >= bstart - // blast >= astart - if astart < bstart { - dst.iv = append(dst.iv, interval32{start: astart, last: bstart - 1}) - } - if alast > blast { - astart = blast + 1 - } else { - apos++ - if apos < alen { - astart = a.iv[apos].start - alast = a.iv[apos].last - } - } - } - } - if apos < alen { - dst.iv = append(dst.iv, interval32{start: astart, last: alast}) - apos++ - if apos < alen { - dst.iv = append(dst.iv, a.iv[apos:]...) - } - } - - return dst -} - -func (rc *runContainer32) numberOfRuns() (nr int) { - return len(rc.iv) -} - -func (rc *runContainer32) containerType() contype { - return run32Contype -} - -func (rc *runContainer32) equals32(srb *runContainer32) bool { - //p("both rc32") - // Check if the containers are the same object. - if rc == srb { - //p("same object") - return true - } - - if len(srb.iv) != len(rc.iv) { - //p("iv len differ") - return false - } - - for i, v := range rc.iv { - if v != srb.iv[i] { - //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i]) - return false - } - } - //p("all intervals same, returning true") - return true -} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle_gen.go b/vendor/github.com/RoaringBitmap/roaring/rle_gen.go deleted file mode 100644 index bc9da75f3a..0000000000 --- a/vendor/github.com/RoaringBitmap/roaring/rle_gen.go +++ /dev/null @@ -1,1118 +0,0 @@ -package roaring - -// NOTE: THIS FILE WAS PRODUCED BY THE -// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) -// DO NOT EDIT - -import "github.com/tinylib/msgp/msgp" - -// DecodeMsg implements msgp.Decodable -func (z *addHelper32) DecodeMsg(dc *msgp.Reader) (err error) { - var field []byte - _ = field - var zbai uint32 - zbai, err = dc.ReadMapHeader() - if err != nil { - return - } - for zbai > 0 { - zbai-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "runstart": - z.runstart, err = dc.ReadUint32() - if err != nil { - return - } - case "runlen": - z.runlen, err = dc.ReadUint32() - if err != nil { - return - } - case "actuallyAdded": - z.actuallyAdded, err = dc.ReadUint32() - if err != nil { - return - } - case "m": - var zcmr uint32 - zcmr, err = dc.ReadArrayHeader() - if err != nil { - return - } - if cap(z.m) >= int(zcmr) { - z.m = (z.m)[:zcmr] - } else { - z.m = make([]interval32, zcmr) - } - for zxvk := range z.m { - var zajw uint32 - zajw, err = dc.ReadMapHeader() - if err != nil { - return - } - for zajw > 0 { - zajw-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.m[zxvk].start, err = dc.ReadUint32() - if err != nil { - return - } - case "last": - z.m[zxvk].last, err = dc.ReadUint32() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - } - case "rc": - if dc.IsNil() { - err = dc.ReadNil() - if err != nil { - return - } - z.rc = nil - } else { - if z.rc == nil { - z.rc = new(runContainer32) - } - var zwht uint32 - zwht, err = dc.ReadMapHeader() - if err != nil { - return - } - for zwht > 0 { - zwht-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "iv": - var zhct uint32 - zhct, err = dc.ReadArrayHeader() - if err != nil { - return - } - if cap(z.rc.iv) >= int(zhct) { - z.rc.iv = (z.rc.iv)[:zhct] - } else { - z.rc.iv = make([]interval32, zhct) - } - for zbzg := range z.rc.iv { - var zcua uint32 - zcua, err = dc.ReadMapHeader() - if err != nil { - return - } - for zcua > 0 { - zcua-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.rc.iv[zbzg].start, err = dc.ReadUint32() - if err != nil { - return - } - case "last": - z.rc.iv[zbzg].last, err = dc.ReadUint32() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - } - case "card": - z.rc.card, err = dc.ReadInt64() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - return -} - -// EncodeMsg implements msgp.Encodable -func (z *addHelper32) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 5 - // write "runstart" - err = en.Append(0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.runstart) - if err != nil { - return - } - // write "runlen" - err = en.Append(0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) - if err != nil { - return err - } - err = en.WriteUint32(z.runlen) - if err != nil { - return - } - // write "actuallyAdded" - err = en.Append(0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) - if err != nil { - return err - } - err = en.WriteUint32(z.actuallyAdded) - if err != nil { - return - } - // write "m" - err = en.Append(0xa1, 0x6d) - if err != nil { - return err - } - err = en.WriteArrayHeader(uint32(len(z.m))) - if err != nil { - return - } - for zxvk := range z.m { - // map header, size 2 - // write "start" - err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.m[zxvk].start) - if err != nil { - return - } - // write "last" - err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.m[zxvk].last) - if err != nil { - return - } - } - // write "rc" - err = en.Append(0xa2, 0x72, 0x63) - if err != nil { - return err - } - if z.rc == nil { - err = en.WriteNil() - if err != nil { - return - } - } else { - // map header, size 2 - // write "iv" - err = en.Append(0x82, 0xa2, 0x69, 0x76) - if err != nil { - return err - } - err = en.WriteArrayHeader(uint32(len(z.rc.iv))) - if err != nil { - return - } - for zbzg := range z.rc.iv { - // map header, size 2 - // write "start" - err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.rc.iv[zbzg].start) - if err != nil { - return - } - // write "last" - err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.rc.iv[zbzg].last) - if err != nil { - return - } - } - // write "card" - err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) - if err != nil { - return err - } - err = en.WriteInt64(z.rc.card) - if err != nil { - return - } - } - return -} - -// MarshalMsg implements msgp.Marshaler -func (z *addHelper32) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 5 - // string "runstart" - o = append(o, 0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) - o = msgp.AppendUint32(o, z.runstart) - // string "runlen" - o = append(o, 0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) - o = msgp.AppendUint32(o, z.runlen) - // string "actuallyAdded" - o = append(o, 0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) - o = msgp.AppendUint32(o, z.actuallyAdded) - // string "m" - o = append(o, 0xa1, 0x6d) - o = msgp.AppendArrayHeader(o, uint32(len(z.m))) - for zxvk := range z.m { - // map header, size 2 - // string "start" - o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - o = msgp.AppendUint32(o, z.m[zxvk].start) - // string "last" - o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) - o = msgp.AppendUint32(o, z.m[zxvk].last) - } - // string "rc" - o = append(o, 0xa2, 0x72, 0x63) - if z.rc == nil { - o = msgp.AppendNil(o) - } else { - // map header, size 2 - // string "iv" - o = append(o, 0x82, 0xa2, 0x69, 0x76) - o = msgp.AppendArrayHeader(o, uint32(len(z.rc.iv))) - for zbzg := range z.rc.iv { - // map header, size 2 - // string "start" - o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - o = msgp.AppendUint32(o, z.rc.iv[zbzg].start) - // string "last" - o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) - o = msgp.AppendUint32(o, z.rc.iv[zbzg].last) - } - // string "card" - o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) - o = msgp.AppendInt64(o, z.rc.card) - } - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *addHelper32) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zxhx uint32 - zxhx, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zxhx > 0 { - zxhx-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "runstart": - z.runstart, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "runlen": - z.runlen, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "actuallyAdded": - z.actuallyAdded, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "m": - var zlqf uint32 - zlqf, bts, err = msgp.ReadArrayHeaderBytes(bts) - if err != nil { - return - } - if cap(z.m) >= int(zlqf) { - z.m = (z.m)[:zlqf] - } else { - z.m = make([]interval32, zlqf) - } - for zxvk := range z.m { - var zdaf uint32 - zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zdaf > 0 { - zdaf-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.m[zxvk].start, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "last": - z.m[zxvk].last, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - } - case "rc": - if msgp.IsNil(bts) { - bts, err = msgp.ReadNilBytes(bts) - if err != nil { - return - } - z.rc = nil - } else { - if z.rc == nil { - z.rc = new(runContainer32) - } - var zpks uint32 - zpks, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zpks > 0 { - zpks-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "iv": - var zjfb uint32 - zjfb, bts, err = msgp.ReadArrayHeaderBytes(bts) - if err != nil { - return - } - if cap(z.rc.iv) >= int(zjfb) { - z.rc.iv = (z.rc.iv)[:zjfb] - } else { - z.rc.iv = make([]interval32, zjfb) - } - for zbzg := range z.rc.iv { - var zcxo uint32 - zcxo, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zcxo > 0 { - zcxo-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.rc.iv[zbzg].start, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "last": - z.rc.iv[zbzg].last, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - } - case "card": - z.rc.card, bts, err = msgp.ReadInt64Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *addHelper32) Msgsize() (s int) { - s = 1 + 9 + msgp.Uint32Size + 7 + msgp.Uint32Size + 14 + msgp.Uint32Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 3 - if z.rc == nil { - s += msgp.NilSize - } else { - s += 1 + 3 + msgp.ArrayHeaderSize + (len(z.rc.iv) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 5 + msgp.Int64Size - } - return -} - -// DecodeMsg implements msgp.Decodable -func (z *interval32) DecodeMsg(dc *msgp.Reader) (err error) { - var field []byte - _ = field - var zeff uint32 - zeff, err = dc.ReadMapHeader() - if err != nil { - return - } - for zeff > 0 { - zeff-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.start, err = dc.ReadUint32() - if err != nil { - return - } - case "last": - z.last, err = dc.ReadUint32() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - return -} - -// EncodeMsg implements msgp.Encodable -func (z interval32) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 2 - // write "start" - err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.start) - if err != nil { - return - } - // write "last" - err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.last) - if err != nil { - return - } - return -} - -// MarshalMsg implements msgp.Marshaler -func (z interval32) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 2 - // string "start" - o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - o = msgp.AppendUint32(o, z.start) - // string "last" - o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) - o = msgp.AppendUint32(o, z.last) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *interval32) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zrsw uint32 - zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zrsw > 0 { - zrsw-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.start, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "last": - z.last, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z interval32) Msgsize() (s int) { - s = 1 + 6 + msgp.Uint32Size + 5 + msgp.Uint32Size - return -} - -// DecodeMsg implements msgp.Decodable -func (z *runContainer32) DecodeMsg(dc *msgp.Reader) (err error) { - var field []byte - _ = field - var zdnj uint32 - zdnj, err = dc.ReadMapHeader() - if err != nil { - return - } - for zdnj > 0 { - zdnj-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "iv": - var zobc uint32 - zobc, err = dc.ReadArrayHeader() - if err != nil { - return - } - if cap(z.iv) >= int(zobc) { - z.iv = (z.iv)[:zobc] - } else { - z.iv = make([]interval32, zobc) - } - for zxpk := range z.iv { - var zsnv uint32 - zsnv, err = dc.ReadMapHeader() - if err != nil { - return - } - for zsnv > 0 { - zsnv-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.iv[zxpk].start, err = dc.ReadUint32() - if err != nil { - return - } - case "last": - z.iv[zxpk].last, err = dc.ReadUint32() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - } - case "card": - z.card, err = dc.ReadInt64() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - return -} - -// EncodeMsg implements msgp.Encodable -func (z *runContainer32) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 2 - // write "iv" - err = en.Append(0x82, 0xa2, 0x69, 0x76) - if err != nil { - return err - } - err = en.WriteArrayHeader(uint32(len(z.iv))) - if err != nil { - return - } - for zxpk := range z.iv { - // map header, size 2 - // write "start" - err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.iv[zxpk].start) - if err != nil { - return - } - // write "last" - err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) - if err != nil { - return err - } - err = en.WriteUint32(z.iv[zxpk].last) - if err != nil { - return - } - } - // write "card" - err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) - if err != nil { - return err - } - err = en.WriteInt64(z.card) - if err != nil { - return - } - return -} - -// MarshalMsg implements msgp.Marshaler -func (z *runContainer32) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 2 - // string "iv" - o = append(o, 0x82, 0xa2, 0x69, 0x76) - o = msgp.AppendArrayHeader(o, uint32(len(z.iv))) - for zxpk := range z.iv { - // map header, size 2 - // string "start" - o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) - o = msgp.AppendUint32(o, z.iv[zxpk].start) - // string "last" - o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) - o = msgp.AppendUint32(o, z.iv[zxpk].last) - } - // string "card" - o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) - o = msgp.AppendInt64(o, z.card) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *runContainer32) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zkgt uint32 - zkgt, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zkgt > 0 { - zkgt-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "iv": - var zema uint32 - zema, bts, err = msgp.ReadArrayHeaderBytes(bts) - if err != nil { - return - } - if cap(z.iv) >= int(zema) { - z.iv = (z.iv)[:zema] - } else { - z.iv = make([]interval32, zema) - } - for zxpk := range z.iv { - var zpez uint32 - zpez, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zpez > 0 { - zpez-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "start": - z.iv[zxpk].start, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "last": - z.iv[zxpk].last, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - } - case "card": - z.card, bts, err = msgp.ReadInt64Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *runContainer32) Msgsize() (s int) { - s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 5 + msgp.Int64Size - return -} - -// DecodeMsg implements msgp.Decodable -func (z *runIterator32) DecodeMsg(dc *msgp.Reader) (err error) { - var field []byte - _ = field - var zqke uint32 - zqke, err = dc.ReadMapHeader() - if err != nil { - return - } - for zqke > 0 { - zqke-- - field, err = dc.ReadMapKeyPtr() - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "rc": - if dc.IsNil() { - err = dc.ReadNil() - if err != nil { - return - } - z.rc = nil - } else { - if z.rc == nil { - z.rc = new(runContainer32) - } - err = z.rc.DecodeMsg(dc) - if err != nil { - return - } - } - case "curIndex": - z.curIndex, err = dc.ReadInt64() - if err != nil { - return - } - case "curPosInIndex": - z.curPosInIndex, err = dc.ReadUint32() - if err != nil { - return - } - case "curSeq": - z.curSeq, err = dc.ReadInt64() - if err != nil { - return - } - default: - err = dc.Skip() - if err != nil { - return - } - } - } - return -} - -// EncodeMsg implements msgp.Encodable -func (z *runIterator32) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 4 - // write "rc" - err = en.Append(0x84, 0xa2, 0x72, 0x63) - if err != nil { - return err - } - if z.rc == nil { - err = en.WriteNil() - if err != nil { - return - } - } else { - err = z.rc.EncodeMsg(en) - if err != nil { - return - } - } - // write "curIndex" - err = en.Append(0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) - if err != nil { - return err - } - err = en.WriteInt64(z.curIndex) - if err != nil { - return - } - // write "curPosInIndex" - err = en.Append(0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) - if err != nil { - return err - } - err = en.WriteUint32(z.curPosInIndex) - if err != nil { - return - } - // write "curSeq" - err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) - if err != nil { - return err - } - err = en.WriteInt64(z.curSeq) - if err != nil { - return - } - return -} - -// MarshalMsg implements msgp.Marshaler -func (z *runIterator32) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 4 - // string "rc" - o = append(o, 0x84, 0xa2, 0x72, 0x63) - if z.rc == nil { - o = msgp.AppendNil(o) - } else { - o, err = z.rc.MarshalMsg(o) - if err != nil { - return - } - } - // string "curIndex" - o = append(o, 0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) - o = msgp.AppendInt64(o, z.curIndex) - // string "curPosInIndex" - o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) - o = msgp.AppendUint32(o, z.curPosInIndex) - // string "curSeq" - o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) - o = msgp.AppendInt64(o, z.curSeq) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *runIterator32) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zqyh uint32 - zqyh, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - return - } - for zqyh > 0 { - zqyh-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - return - } - switch msgp.UnsafeString(field) { - case "rc": - if msgp.IsNil(bts) { - bts, err = msgp.ReadNilBytes(bts) - if err != nil { - return - } - z.rc = nil - } else { - if z.rc == nil { - z.rc = new(runContainer32) - } - bts, err = z.rc.UnmarshalMsg(bts) - if err != nil { - return - } - } - case "curIndex": - z.curIndex, bts, err = msgp.ReadInt64Bytes(bts) - if err != nil { - return - } - case "curPosInIndex": - z.curPosInIndex, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - case "curSeq": - z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) - if err != nil { - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *runIterator32) Msgsize() (s int) { - s = 1 + 3 - if z.rc == nil { - s += msgp.NilSize - } else { - s += z.rc.Msgsize() - } - s += 9 + msgp.Int64Size + 14 + msgp.Uint32Size + 7 + msgp.Int64Size - return -} - -// DecodeMsg implements msgp.Decodable -func (z *uint32Slice) DecodeMsg(dc *msgp.Reader) (err error) { - var zjpj uint32 - zjpj, err = dc.ReadArrayHeader() - if err != nil { - return - } - if cap((*z)) >= int(zjpj) { - (*z) = (*z)[:zjpj] - } else { - (*z) = make(uint32Slice, zjpj) - } - for zywj := range *z { - (*z)[zywj], err = dc.ReadUint32() - if err != nil { - return - } - } - return -} - -// EncodeMsg implements msgp.Encodable -func (z uint32Slice) EncodeMsg(en *msgp.Writer) (err error) { - err = en.WriteArrayHeader(uint32(len(z))) - if err != nil { - return - } - for zzpf := range z { - err = en.WriteUint32(z[zzpf]) - if err != nil { - return - } - } - return -} - -// MarshalMsg implements msgp.Marshaler -func (z uint32Slice) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - o = msgp.AppendArrayHeader(o, uint32(len(z))) - for zzpf := range z { - o = msgp.AppendUint32(o, z[zzpf]) - } - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *uint32Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { - var zgmo uint32 - zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) - if err != nil { - return - } - if cap((*z)) >= int(zgmo) { - (*z) = (*z)[:zgmo] - } else { - (*z) = make(uint32Slice, zgmo) - } - for zrfe := range *z { - (*z)[zrfe], bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - return - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z uint32Slice) Msgsize() (s int) { - s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint32Size)) - return -} diff --git a/vendor/github.com/RoaringBitmap/roaring/rlecommon.go b/vendor/github.com/RoaringBitmap/roaring/rlecommon.go deleted file mode 100644 index 133636787a..0000000000 --- a/vendor/github.com/RoaringBitmap/roaring/rlecommon.go +++ /dev/null @@ -1,163 +0,0 @@ -package roaring - -import ( - "fmt" -) - -// common to rle32.go and rle16.go - -// rleVerbose controls whether p() prints show up. -// The testing package sets this based on -// testing.Verbose(). -var rleVerbose bool - -// p is a shorthand for fmt.Printf with beginning and -// trailing newlines. p() makes it easy -// to add diagnostic print statements. -func p(format string, args ...interface{}) { - if rleVerbose { - fmt.Printf("\n"+format+"\n", args...) - } -} - -// MaxUint32 is the largest uint32 value. -const MaxUint32 = 4294967295 - -// MaxUint16 is the largest 16 bit unsigned int. -// This is the largest value an interval16 can store. -const MaxUint16 = 65535 - -// searchOptions allows us to accelerate runContainer32.search with -// prior knowledge of (mostly lower) bounds. This is used by Union -// and Intersect. -type searchOptions struct { - // start here instead of at 0 - startIndex int64 - - // upper bound instead of len(rc.iv); - // endxIndex == 0 means ignore the bound and use - // endxIndex == n ==len(rc.iv) which is also - // naturally the default for search() - // when opt = nil. - endxIndex int64 -} - -// And finds the intersection of rc and b. -func (rc *runContainer32) And(b *Bitmap) *Bitmap { - out := NewBitmap() - for _, p := range rc.iv { - for i := p.start; i <= p.last; i++ { - if b.Contains(i) { - out.Add(i) - } - } - } - return out -} - -// Xor returns the exclusive-or of rc and b. -func (rc *runContainer32) Xor(b *Bitmap) *Bitmap { - out := b.Clone() - for _, p := range rc.iv { - for v := p.start; v <= p.last; v++ { - if out.Contains(v) { - out.RemoveRange(uint64(v), uint64(v+1)) - } else { - out.Add(v) - } - } - } - return out -} - -// Or returns the union of rc and b. -func (rc *runContainer32) Or(b *Bitmap) *Bitmap { - out := b.Clone() - for _, p := range rc.iv { - for v := p.start; v <= p.last; v++ { - out.Add(v) - } - } - return out -} - -// trial is used in the randomized testing of runContainers -type trial struct { - n int - percentFill float64 - ntrial int - - // only in the union test - // only subtract test - percentDelete float64 - - // only in 067 randomized operations - // we do this + 1 passes - numRandomOpsPass int - - // allow sampling range control - // only recent tests respect this. - srang *interval16 -} - -// And finds the intersection of rc and b. -func (rc *runContainer16) And(b *Bitmap) *Bitmap { - out := NewBitmap() - for _, p := range rc.iv { - plast := p.last() - for i := p.start; i <= plast; i++ { - if b.Contains(uint32(i)) { - out.Add(uint32(i)) - } - } - } - return out -} - -// Xor returns the exclusive-or of rc and b. -func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { - out := b.Clone() - for _, p := range rc.iv { - plast := p.last() - for v := p.start; v <= plast; v++ { - w := uint32(v) - if out.Contains(w) { - out.RemoveRange(uint64(w), uint64(w+1)) - } else { - out.Add(w) - } - } - } - return out -} - -// Or returns the union of rc and b. -func (rc *runContainer16) Or(b *Bitmap) *Bitmap { - out := b.Clone() - for _, p := range rc.iv { - plast := p.last() - for v := p.start; v <= plast; v++ { - out.Add(uint32(v)) - } - } - return out -} - -//func (rc *runContainer32) and(container) container { -// panic("TODO. not yet implemented") -//} - -// serializedSizeInBytes returns the number of bytes of memory -// required by this runContainer16. This is for the -// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ -func (rc *runContainer16) serializedSizeInBytes() int { - // number of runs in one uint16, then each run - // needs two more uint16 - return 2 + len(rc.iv)*4 -} - -// serializedSizeInBytes returns the number of bytes of memory -// required by this runContainer32. -func (rc *runContainer32) serializedSizeInBytes() int { - return 4 + len(rc.iv)*8 -} diff --git a/vendor/github.com/RoaringBitmap/roaring/rlei.go b/vendor/github.com/RoaringBitmap/roaring/rlei.go deleted file mode 100644 index a15a017e47..0000000000 --- a/vendor/github.com/RoaringBitmap/roaring/rlei.go +++ /dev/null @@ -1,695 +0,0 @@ -package roaring - -/////////////////////////////////////////////////// -// -// container interface methods for runContainer16 -// -/////////////////////////////////////////////////// - -import ( - "fmt" -) - -// compile time verify we meet interface requirements -var _ container = &runContainer16{} - -func (rc *runContainer16) clone() container { - return newRunContainer16CopyIv(rc.iv) -} - -func (rc *runContainer16) minimum() uint16 { - return rc.iv[0].start // assume not empty -} - -func (rc *runContainer16) maximum() uint16 { - return rc.iv[len(rc.iv)-1].last() // assume not empty -} - -func (rc *runContainer16) isFull() bool { - return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) -} - -func (rc *runContainer16) and(a container) container { - if rc.isFull() { - return a.clone() - } - switch c := a.(type) { - case *runContainer16: - return rc.intersect(c) - case *arrayContainer: - return rc.andArray(c) - case *bitmapContainer: - return rc.andBitmapContainer(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) andCardinality(a container) int { - switch c := a.(type) { - case *runContainer16: - return int(rc.intersectCardinality(c)) - case *arrayContainer: - return rc.andArrayCardinality(c) - case *bitmapContainer: - return rc.andBitmapContainerCardinality(c) - } - panic("unsupported container type") -} - -// andBitmapContainer finds the intersection of rc and b. -func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { - bc2 := newBitmapContainerFromRun(rc) - return bc2.andBitmap(bc) -} - -func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { - pos := 0 - answer := 0 - maxpos := ac.getCardinality() - if maxpos == 0 { - return 0 // won't happen in actual code - } - v := ac.content[pos] -mainloop: - for _, p := range rc.iv { - for v < p.start { - pos++ - if pos == maxpos { - break mainloop - } - v = ac.content[pos] - } - for v <= p.last() { - answer++ - pos++ - if pos == maxpos { - break mainloop - } - v = ac.content[pos] - } - } - return answer -} - -func (rc *runContainer16) iand(a container) container { - if rc.isFull() { - return a.clone() - } - switch c := a.(type) { - case *runContainer16: - return rc.inplaceIntersect(c) - case *arrayContainer: - return rc.andArray(c) - case *bitmapContainer: - return rc.iandBitmapContainer(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { - // TODO: optimize by doing less allocation, possibly? - - // sect will be new - sect := rc.intersect(rc2) - *rc = *sect - return rc -} - -func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { - isect := rc.andBitmapContainer(bc) - *rc = *newRunContainer16FromContainer(isect) - return rc -} - -func (rc *runContainer16) andArray(ac *arrayContainer) container { - if len(rc.iv) == 0 { - return newArrayContainer() - } - - acCardinality := ac.getCardinality() - c := newArrayContainerCapacity(acCardinality) - - for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { - iv := rc.iv[rlePos] - arrayVal := ac.content[arrayPos] - - for iv.last() < arrayVal { - rlePos++ - if rlePos == len(rc.iv) { - return c - } - iv = rc.iv[rlePos] - } - - if iv.start > arrayVal { - arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) - } else { - c.content = append(c.content, arrayVal) - arrayPos++ - } - } - return c -} - -func (rc *runContainer16) andNot(a container) container { - switch c := a.(type) { - case *arrayContainer: - return rc.andNotArray(c) - case *bitmapContainer: - return rc.andNotBitmap(c) - case *runContainer16: - return rc.andNotRunContainer16(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { - k := 0 - var val int64 - for _, p := range rc.iv { - n := p.runlen() - for j := int64(0); j < n; j++ { - val = int64(p.start) + j - x[k+i] = uint32(val) | mask - k++ - } - } -} - -func (rc *runContainer16) getShortIterator() shortIterable { - return rc.newRunIterator16() -} - -func (rc *runContainer16) getManyIterator() manyIterable { - return rc.newManyRunIterator16() -} - -// add the values in the range [firstOfRange, endx). endx -// is still abe to express 2^16 because it is an int not an uint16. -func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { - - if firstOfRange >= endx { - panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) - } - addme := newRunContainer16TakeOwnership([]interval16{ - { - start: uint16(firstOfRange), - length: uint16(endx - 1 - firstOfRange), - }, - }) - *rc = *rc.union(addme) - return rc -} - -// remove the values in the range [firstOfRange,endx) -func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { - if firstOfRange >= endx { - panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ - " nothing to do.", firstOfRange, endx)) - //return rc - } - x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) - rc.isubtract(x) - return rc -} - -// not flip the values in the range [firstOfRange,endx) -func (rc *runContainer16) not(firstOfRange, endx int) container { - if firstOfRange >= endx { - panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) - } - - return rc.Not(firstOfRange, endx) -} - -// Not flips the values in the range [firstOfRange,endx). -// This is not inplace. Only the returned value has the flipped bits. -// -// Currently implemented as (!A intersect B) union (A minus B), -// where A is rc, and B is the supplied [firstOfRange, endx) interval. -// -// TODO(time optimization): convert this to a single pass -// algorithm by copying AndNotRunContainer16() and modifying it. -// Current routine is correct but -// makes 2 more passes through the arrays than should be -// strictly necessary. Measure both ways though--this may not matter. -// -func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { - - if firstOfRange >= endx { - panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) - } - - if firstOfRange >= endx { - return rc.Clone() - } - - a := rc - // algo: - // (!A intersect B) union (A minus B) - - nota := a.invert() - - bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} - b := newRunContainer16TakeOwnership(bs) - - notAintersectB := nota.intersect(b) - - aMinusB := a.AndNotRunContainer16(b) - - rc2 := notAintersectB.union(aMinusB) - return rc2 -} - -// equals is now logical equals; it does not require the -// same underlying container type. -func (rc *runContainer16) equals(o container) bool { - srb, ok := o.(*runContainer16) - - if !ok { - // maybe value instead of pointer - val, valok := o.(*runContainer16) - if valok { - srb = val - ok = true - } - } - if ok { - // Check if the containers are the same object. - if rc == srb { - return true - } - - if len(srb.iv) != len(rc.iv) { - return false - } - - for i, v := range rc.iv { - if v != srb.iv[i] { - return false - } - } - return true - } - - // use generic comparison - if o.getCardinality() != rc.getCardinality() { - return false - } - rit := rc.getShortIterator() - bit := o.getShortIterator() - - //k := 0 - for rit.hasNext() { - if bit.next() != rit.next() { - return false - } - //k++ - } - return true -} - -func (rc *runContainer16) iaddReturnMinimized(x uint16) container { - rc.Add(x) - return rc -} - -func (rc *runContainer16) iadd(x uint16) (wasNew bool) { - return rc.Add(x) -} - -func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { - rc.removeKey(x) - return rc -} - -func (rc *runContainer16) iremove(x uint16) bool { - return rc.removeKey(x) -} - -func (rc *runContainer16) or(a container) container { - if rc.isFull() { - return rc.clone() - } - switch c := a.(type) { - case *runContainer16: - return rc.union(c) - case *arrayContainer: - return rc.orArray(c) - case *bitmapContainer: - return rc.orBitmapContainer(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) orCardinality(a container) int { - switch c := a.(type) { - case *runContainer16: - return int(rc.unionCardinality(c)) - case *arrayContainer: - return rc.orArrayCardinality(c) - case *bitmapContainer: - return rc.orBitmapContainerCardinality(c) - } - panic("unsupported container type") -} - -// orBitmapContainer finds the union of rc and bc. -func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { - bc2 := newBitmapContainerFromRun(rc) - return bc2.iorBitmap(bc) -} - -func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { - answer := 0 - for i := range rc.iv { - answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) - } - //bc.computeCardinality() - return answer -} - -func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { - return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) -} - -// orArray finds the union of rc and ac. -func (rc *runContainer16) orArray(ac *arrayContainer) container { - bc1 := newBitmapContainerFromRun(rc) - bc2 := ac.toBitmapContainer() - return bc1.orBitmap(bc2) -} - -// orArray finds the union of rc and ac. -func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { - return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) -} - -func (rc *runContainer16) ior(a container) container { - if rc.isFull() { - return rc - } - switch c := a.(type) { - case *runContainer16: - return rc.inplaceUnion(c) - case *arrayContainer: - return rc.iorArray(c) - case *bitmapContainer: - return rc.iorBitmapContainer(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { - p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv)) - for _, p := range rc2.iv { - last := int64(p.last()) - for i := int64(p.start); i <= last; i++ { - rc.Add(uint16(i)) - } - } - return rc -} - -func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { - - it := bc.getShortIterator() - for it.hasNext() { - rc.Add(it.next()) - } - return rc -} - -func (rc *runContainer16) iorArray(ac *arrayContainer) container { - it := ac.getShortIterator() - for it.hasNext() { - rc.Add(it.next()) - } - return rc -} - -// lazyIOR is described (not yet implemented) in -// this nice note from @lemire on -// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 -// -// Description of lazyOR and lazyIOR from @lemire: -// -// Lazy functions are optional and can be simply -// wrapper around non-lazy functions. -// -// The idea of "laziness" is as follows. It is -// inspired by the concept of lazy evaluation -// you might be familiar with (functional programming -// and all that). So a roaring bitmap is -// such that all its containers are, in some -// sense, chosen to use as little memory as -// possible. This is nice. Also, all bitsets -// are "cardinality aware" so that you can do -// fast rank/select queries, or query the -// cardinality of the whole bitmap... very fast, -// without latency. -// -// However, imagine that you are aggregating 100 -// bitmaps together. So you OR the first two, then OR -// that with the third one and so forth. Clearly, -// intermediate bitmaps don't need to be as -// compressed as possible, right? They can be -// in a "dirty state". You only need the end -// result to be in a nice state... which you -// can achieve by calling repairAfterLazy at the end. -// -// The Java/C code does something special for -// the in-place lazy OR runs. The idea is that -// instead of taking two run containers and -// generating a new one, we actually try to -// do the computation in-place through a -// technique invented by @gssiyankai (pinging him!). -// What you do is you check whether the host -// run container has lots of extra capacity. -// If it does, you move its data at the end of -// the backing array, and then you write -// the answer at the beginning. What this -// trick does is minimize memory allocations. -// -func (rc *runContainer16) lazyIOR(a container) container { - // not lazy at the moment - // TODO: make it lazy - return rc.ior(a) - - /* - switch c := a.(type) { - case *arrayContainer: - return rc.lazyIorArray(c) - case *bitmapContainer: - return rc.lazyIorBitmap(c) - case *runContainer16: - return rc.lazyIorRun16(c) - } - panic("unsupported container type") - */ -} - -// lazyOR is described above in lazyIOR. -func (rc *runContainer16) lazyOR(a container) container { - - // not lazy at the moment - // TODO: make it lazy - return rc.or(a) - - /* - switch c := a.(type) { - case *arrayContainer: - return rc.lazyOrArray(c) - case *bitmapContainer: - return rc.lazyOrBitmap(c) - case *runContainer16: - return rc.lazyOrRunContainer16(c) - } - panic("unsupported container type") - */ -} - -func (rc *runContainer16) intersects(a container) bool { - // TODO: optimize by doing inplace/less allocation, possibly? - isect := rc.and(a) - return isect.getCardinality() > 0 -} - -func (rc *runContainer16) xor(a container) container { - switch c := a.(type) { - case *arrayContainer: - return rc.xorArray(c) - case *bitmapContainer: - return rc.xorBitmap(c) - case *runContainer16: - return rc.xorRunContainer16(c) - } - panic("unsupported container type") -} - -func (rc *runContainer16) iandNot(a container) container { - switch c := a.(type) { - case *arrayContainer: - return rc.iandNotArray(c) - case *bitmapContainer: - return rc.iandNotBitmap(c) - case *runContainer16: - return rc.iandNotRunContainer16(c) - } - panic("unsupported container type") -} - -// flip the values in the range [firstOfRange,endx) -func (rc *runContainer16) inot(firstOfRange, endx int) container { - if firstOfRange >= endx { - panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) - } - // TODO: minimize copies, do it all inplace; not() makes a copy. - rc = rc.Not(firstOfRange, endx) - return rc -} - -func (rc *runContainer16) getCardinality() int { - return int(rc.cardinality()) -} - -func (rc *runContainer16) rank(x uint16) int { - n := int64(len(rc.iv)) - xx := int64(x) - w, already, _ := rc.search(xx, nil) - if w < 0 { - return 0 - } - if !already && w == n-1 { - return rc.getCardinality() - } - var rnk int64 - if !already { - for i := int64(0); i <= w; i++ { - rnk += rc.iv[i].runlen() - } - return int(rnk) - } - for i := int64(0); i < w; i++ { - rnk += rc.iv[i].runlen() - } - rnk += int64(x-rc.iv[w].start) + 1 - return int(rnk) -} - -func (rc *runContainer16) selectInt(x uint16) int { - return rc.selectInt16(x) -} - -func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { - return rc.AndNotRunContainer16(b) -} - -func (rc *runContainer16) andNotArray(ac *arrayContainer) container { - rcb := rc.toBitmapContainer() - acb := ac.toBitmapContainer() - return rcb.andNotBitmap(acb) -} - -func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { - rcb := rc.toBitmapContainer() - return rcb.andNotBitmap(bc) -} - -func (rc *runContainer16) toBitmapContainer() *bitmapContainer { - p("run16 toBitmap starting; rc has %v ranges", len(rc.iv)) - bc := newBitmapContainer() - for i := range rc.iv { - bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) - } - bc.computeCardinality() - return bc -} - -func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { - rcb := rc.toBitmapContainer() - x2b := x2.toBitmapContainer() - rcb.iandNotBitmapSurely(x2b) - // TODO: check size and optimize the return value - // TODO: is inplace modification really required? If not, elide the copy. - rc2 := newRunContainer16FromBitmapContainer(rcb) - *rc = *rc2 - return rc -} - -func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { - rcb := rc.toBitmapContainer() - acb := ac.toBitmapContainer() - rcb.iandNotBitmapSurely(acb) - // TODO: check size and optimize the return value - // TODO: is inplace modification really required? If not, elide the copy. - rc2 := newRunContainer16FromBitmapContainer(rcb) - *rc = *rc2 - return rc -} - -func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { - rcb := rc.toBitmapContainer() - rcb.iandNotBitmapSurely(bc) - // TODO: check size and optimize the return value - // TODO: is inplace modification really required? If not, elide the copy. - rc2 := newRunContainer16FromBitmapContainer(rcb) - *rc = *rc2 - return rc -} - -func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { - rcb := rc.toBitmapContainer() - x2b := x2.toBitmapContainer() - return rcb.xorBitmap(x2b) -} - -func (rc *runContainer16) xorArray(ac *arrayContainer) container { - rcb := rc.toBitmapContainer() - acb := ac.toBitmapContainer() - return rcb.xorBitmap(acb) -} - -func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { - rcb := rc.toBitmapContainer() - return rcb.xorBitmap(bc) -} - -// convert to bitmap or array *if needed* -func (rc *runContainer16) toEfficientContainer() container { - - // runContainer16SerializedSizeInBytes(numRuns) - sizeAsRunContainer := rc.getSizeInBytes() - sizeAsBitmapContainer := bitmapContainerSizeInBytes() - card := int(rc.cardinality()) - sizeAsArrayContainer := arrayContainerSizeInBytes(card) - if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { - return rc - } - if card <= arrayDefaultMaxSize { - return rc.toArrayContainer() - } - bc := newBitmapContainerFromRun(rc) - return bc -} - -func (rc *runContainer16) toArrayContainer() *arrayContainer { - ac := newArrayContainer() - for i := range rc.iv { - ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) - } - return ac -} - -func newRunContainer16FromContainer(c container) *runContainer16 { - - switch x := c.(type) { - case *runContainer16: - return x.Clone() - case *arrayContainer: - return newRunContainer16FromArray(x) - case *bitmapContainer: - return newRunContainer16FromBitmapContainer(x) - } - panic("unsupported container type") -} diff --git a/vendor/github.com/RoaringBitmap/roaring/roaring.go b/vendor/github.com/RoaringBitmap/roaring/roaring.go index 5045a41933..df58cc30b5 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaring.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaring.go @@ -6,12 +6,12 @@ package roaring import ( - "bufio" "bytes" "encoding/base64" "fmt" "io" "strconv" + "sync" ) // Bitmap represents a compressed bitmap where you can add integers. @@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { return rb.highlowcontainer.toBytes() } -// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized +// Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized // version of this bitmap to stream. The format is not // compatible with the WriteTo() format, and is // experimental: it may produce smaller on disk @@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) { // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec -func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { - return rb.highlowcontainer.readFrom(stream) +func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) { + stream := byteInputAdapterPool.Get().(*byteInputAdapter) + stream.reset(reader) + + p, err = rb.highlowcontainer.readFrom(stream) + byteInputAdapterPool.Put(stream) + + return } // FromBuffer creates a bitmap from its serialized version stored in buffer @@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { // You should *not* change the copy-on-write status of the resulting // bitmaps (SetCopyOnWrite). // -func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { - return rb.highlowcontainer.fromBuffer(buf) +// If buf becomes unavailable, then a bitmap created with +// FromBuffer would be effectively broken. Furthermore, any +// bitmap derived from this bitmap (e.g., via Or, And) might +// also be broken. Thus, before making buf unavailable, you should +// call CloneCopyOnWriteContainers on all such bitmaps. +// +func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { + stream := byteBufferPool.Get().(*byteBuffer) + stream.reset(buf) + + p, err = rb.highlowcontainer.readFrom(stream) + byteBufferPool.Put(stream) + + return } +var ( + byteBufferPool = sync.Pool{ + New: func() interface{} { + return &byteBuffer{} + }, + } + + byteInputAdapterPool = sync.Pool{ + New: func() interface{} { + return &byteInputAdapter{} + }, + } +) + // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap func (rb *Bitmap) RunOptimize() { rb.highlowcontainer.runOptimize() @@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool { return rb.highlowcontainer.hasRunCompression() } -// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized +// Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized // version of this bitmap from stream. The format is // expected is that written by the WriteToMsgpack() // call; see additional notes there. @@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) { } // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap +// (same as ToBytes) func (rb *Bitmap) MarshalBinary() ([]byte, error) { - var buf bytes.Buffer - writer := bufio.NewWriter(&buf) - _, err := rb.WriteTo(writer) - if err != nil { - return nil, err - } - err = writer.Flush() - if err != nil { - return nil, err - } - return buf.Bytes(), nil + return rb.ToBytes() } // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap func (rb *Bitmap) UnmarshalBinary(data []byte) error { - var buf bytes.Buffer - _, err := buf.Write(data) - if err != nil { - return err - } - reader := bufio.NewReader(&buf) - _, err = rb.ReadFrom(reader) + r := bytes.NewReader(data) + _, err := rb.ReadFrom(r) return err } @@ -215,10 +233,20 @@ type IntIterable interface { Next() uint32 } +// IntPeekable allows you to look at the next value without advancing and +// advance as long as the next value is smaller than minval +type IntPeekable interface { + IntIterable + // PeekNext peeks the next value without advancing the iterator + PeekNext() uint32 + // AdvanceIfNeeded advances as long as the next value is smaller than minval + AdvanceIfNeeded(minval uint32) +} + type intIterator struct { pos int hs uint32 - iter shortIterable + iter shortPeekable highlowcontainer *roaringArray } @@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 { return x } +// PeekNext peeks the next value without advancing the iterator +func (ii *intIterator) PeekNext() uint32 { + return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs +} + +// AdvanceIfNeeded advances as long as the next value is smaller than minval +func (ii *intIterator) AdvanceIfNeeded(minval uint32) { + to := minval >> 16 + + for ii.HasNext() && (ii.hs>>16) < to { + ii.pos++ + ii.init() + } + + if ii.HasNext() && (ii.hs>>16) == to { + ii.iter.advanceIfNeeded(lowbits(minval)) + + if !ii.iter.hasNext() { + ii.pos++ + ii.init() + } + } +} + func newIntIterator(a *Bitmap) *intIterator { p := new(intIterator) p.pos = 0 @@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator { return p } +type intReverseIterator struct { + pos int + hs uint32 + iter shortIterable + highlowcontainer *roaringArray +} + +// HasNext returns true if there are more integers to iterate over +func (ii *intReverseIterator) HasNext() bool { + return ii.pos >= 0 +} + +func (ii *intReverseIterator) init() { + if ii.pos >= 0 { + ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator() + ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + } else { + ii.iter = nil + } +} + +// Next returns the next integer +func (ii *intReverseIterator) Next() uint32 { + x := uint32(ii.iter.next()) | ii.hs + if !ii.iter.hasNext() { + ii.pos = ii.pos - 1 + ii.init() + } + return x +} + +func newIntReverseIterator(a *Bitmap) *intReverseIterator { + p := new(intReverseIterator) + p.highlowcontainer = &a.highlowcontainer + p.pos = a.highlowcontainer.size() - 1 + p.init() + return p +} + // ManyIntIterable allows you to iterate over the values in a Bitmap type ManyIntIterable interface { // pass in a buffer to fill up with values, returns how many values were returned @@ -325,12 +416,20 @@ func (rb *Bitmap) String() string { return buffer.String() } -// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order -func (rb *Bitmap) Iterator() IntIterable { +// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; +// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func (rb *Bitmap) Iterator() IntPeekable { return newIntIterator(rb) } -// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order +// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; +// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). +func (rb *Bitmap) ReverseIterator() IntIterable { + return newIntReverseIterator(rb) +} + +// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; +// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) ManyIterator() ManyIntIterable { return newManyIntIterator(rb) } @@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool { return false } +// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process +func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { + containerOffset := highbits(offset) + inOffset := lowbits(offset) + if inOffset == 0 { + answer = x.Clone() + for pos := 0; pos < answer.highlowcontainer.size(); pos++ { + key := answer.highlowcontainer.getKeyAtIndex(pos) + key += containerOffset + answer.highlowcontainer.keys[pos] = key + } + } else { + answer = New() + for pos := 0; pos < x.highlowcontainer.size(); pos++ { + key := x.highlowcontainer.getKeyAtIndex(pos) + key += containerOffset + c := x.highlowcontainer.getContainerAtIndex(pos) + offsetted := c.addOffset(inOffset) + if offsetted[0].getCardinality() > 0 { + curSize := answer.highlowcontainer.size() + lastkey := uint16(0) + if curSize > 0 { + lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1) + } + if curSize > 0 && lastkey == key { + prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) + orrseult := prev.ior(offsetted[0]) + answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) + } else { + answer.highlowcontainer.appendContainer(key, offsetted[0], false) + } + } + if offsetted[1].getCardinality() > 0 { + answer.highlowcontainer.appendContainer(key+1, offsetted[1], false) + } + } + } + return answer +} + // Add the integer x to the bitmap func (rb *Bitmap) Add(x uint32) { hb := highbits(x) @@ -794,11 +933,6 @@ main: } } -/*func (rb *Bitmap) Or(x2 *Bitmap) { - results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage - rb.highlowcontainer = results.highlowcontainer -}*/ - // AndNot computes the difference between two bitmaps and stores the result in the current bitmap func (rb *Bitmap) AndNot(x2 *Bitmap) { pos1 := 0 @@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { return } - hbStart := highbits(uint32(rangeStart)) - lbStart := lowbits(uint32(rangeStart)) - hbLast := highbits(uint32(rangeEnd - 1)) - lbLast := lowbits(uint32(rangeEnd - 1)) + hbStart := uint32(highbits(uint32(rangeStart))) + lbStart := uint32(lowbits(uint32(rangeStart))) + hbLast := uint32(highbits(uint32(rangeEnd - 1))) + lbLast := uint32(lowbits(uint32(rangeEnd - 1))) var max uint32 = maxLowBit for hb := hbStart; hb <= hbLast; hb++ { @@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { containerLast = uint32(lbLast) } - i := rb.highlowcontainer.getIndex(hb) + i := rb.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) @@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { } } else { // *think* the range of ones must never be // empty. - rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) + rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } } @@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { lbLast := uint32(lowbits(uint32(rangeEnd - 1))) var max uint32 = maxLowBit - for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ { + for hb := hbStart; hb <= hbLast; hb++ { containerStart := uint32(0) - if hb == uint16(hbStart) { + if hb == hbStart { containerStart = lbStart } containerLast := max - if hb == uint16(hbLast) { + if hb == hbLast { containerLast = lbLast } - i := rb.highlowcontainer.getIndex(hb) + i := rb.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) rb.highlowcontainer.setContainerAtIndex(i, c) } else { // *think* the range of ones must never be // empty. - rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) + rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } } @@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { } answer := NewBitmap() - hbStart := highbits(uint32(rangeStart)) - lbStart := lowbits(uint32(rangeStart)) - hbLast := highbits(uint32(rangeEnd - 1)) - lbLast := lowbits(uint32(rangeEnd - 1)) + hbStart := uint32(highbits(uint32(rangeStart))) + lbStart := uint32(lowbits(uint32(rangeStart))) + hbLast := uint32(highbits(uint32(rangeEnd - 1))) + lbLast := uint32(lowbits(uint32(rangeEnd - 1))) // copy the containers before the active area - answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart) + answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart)) var max uint32 = maxLowBit for hb := hbStart; hb <= hbLast; hb++ { @@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { containerLast = uint32(lbLast) } - i := bm.highlowcontainer.getIndex(hb) - j := answer.highlowcontainer.getIndex(hb) + i := bm.highlowcontainer.getIndex(uint16(hb)) + j := answer.highlowcontainer.getIndex(uint16(hb)) if i >= 0 { c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) if c.getCardinality() > 0 { - answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c) + answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c) } } else { // *think* the range of ones must never be // empty. - answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, + answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) } } // copy the containers after the active area. - answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast) + answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast)) return answer } @@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) { return rb.highlowcontainer.copyOnWrite } +// CloneCopyOnWriteContainers clones all containers which have +// needCopyOnWrite set to true. +// This can be used to make sure it is safe to munmap a []byte +// that the roaring array may still have a reference to, after +// calling FromBuffer. +// More generally this function is useful if you call FromBuffer +// to construct a bitmap with a backing array buf +// and then later discard the buf array. Note that you should call +// CloneCopyOnWriteContainers on all bitmaps that were derived +// from the 'FromBuffer' bitmap since they map have dependencies +// on the buf array as well. +func (rb *Bitmap) CloneCopyOnWriteContainers() { + rb.highlowcontainer.cloneCopyOnWriteContainers() +} + // FlipInt calls Flip after casting the parameters (convenience method) func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go index d9659159d6..d9d5edda73 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go @@ -4,16 +4,16 @@ import ( "bytes" "encoding/binary" "fmt" - "io" - "io/ioutil" - snappy "github.com/glycerine/go-unsnap-stream" "github.com/tinylib/msgp/msgp" + "io" ) //go:generate msgp -unexported type container interface { + addOffset(uint16) []container + clone() container and(container) container andCardinality(container) int @@ -37,7 +37,8 @@ type container interface { not(start, final int) container // range is [firstOfRange,lastOfRange) inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) xor(r container) container - getShortIterator() shortIterable + getShortIterator() shortPeekable + getReverseIterator() shortIterable getManyIterator() manyIterable contains(i uint16) bool maximum() uint16 @@ -61,7 +62,6 @@ type container interface { iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) selectInt(x uint16) int // selectInt returns the xth integer in the container serializedSizeInBytes() int - readFrom(io.Reader) (int, error) writeTo(io.Writer) (int, error) numberOfRuns() int @@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray { return &sa } +// clone all containers which have needCopyOnWrite set to true +// This can be used to make sure it is safe to munmap a []byte +// that the roaring array may still have a reference to. +func (ra *roaringArray) cloneCopyOnWriteContainers() { + for i, needCopyOnWrite := range ra.needCopyOnWrite { + if needCopyOnWrite { + ra.containers[i] = ra.containers[i].clone() + ra.needCopyOnWrite[i] = false + } + } +} + // unused function: //func (ra *roaringArray) containsKey(x uint16) bool { // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) @@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) toBytes() ([]byte, error) { - stream := &bytes.Buffer{} +func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { hasRun := ra.hasRunCompression() isRunSizeInBytes := 0 cookieSize := 8 @@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) { } } - _, err := stream.Write(buf[:nw]) + written, err := w.Write(buf[:nw]) if err != nil { - return nil, err + return n, err } - for i, c := range ra.containers { - _ = i - _, err := c.writeTo(stream) + n += int64(written) + + for _, c := range ra.containers { + written, err := c.writeTo(w) if err != nil { - return nil, err + return n, err } + n += int64(written) } - return stream.Bytes(), nil + return n, nil } // // spec: https://github.com/RoaringBitmap/RoaringFormatSpec // -func (ra *roaringArray) writeTo(out io.Writer) (int64, error) { - by, err := ra.toBytes() - if err != nil { - return 0, err - } - n, err := out.Write(by) - if err == nil && n < len(by) { - err = io.ErrShortWrite - } - return int64(n), err +func (ra *roaringArray) toBytes() ([]byte, error) { + var buf bytes.Buffer + _, err := ra.writeTo(&buf) + return buf.Bytes(), err } -func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { - pos := 0 - if len(buf) < 8 { - return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) +func (ra *roaringArray) readFrom(stream byteInput) (int64, error) { + cookie, err := stream.readUInt32() + + if err != nil { + return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) } - cookie := binary.LittleEndian.Uint32(buf) - pos += 4 - var size uint32 // number of containers - haveRunContainers := false + var size uint32 var isRunBitmap []byte - // cookie header if cookie&0x0000FFFF == serialCookie { - haveRunContainers = true - size = uint32(uint16(cookie>>16) + 1) // number of containers - + size = uint32(uint16(cookie>>16) + 1) // create is-run-container bitmap isRunBitmapSize := (int(size) + 7) / 8 - if pos+isRunBitmapSize > len(buf) { - return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize) - } + isRunBitmap, err = stream.next(isRunBitmapSize) - isRunBitmap = buf[pos : pos+isRunBitmapSize] - pos += isRunBitmapSize + if err != nil { + return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err) + } } else if cookie == serialCookieNoRunContainer { - size = binary.LittleEndian.Uint32(buf[pos:]) - pos += 4 + size, err = stream.readUInt32() + + if err != nil { + return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err) + } } else { - return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") + return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") } + if size > (1 << 16) { - return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") + return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers") } + // descriptive header - // keycard - is {key, cardinality} tuple slice - if pos+2*2*int(size) > len(buf) { - return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size)) + buf, err := stream.next(2 * 2 * int(size)) + + if err != nil { + return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err) } - keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)]) - pos += 2 * 2 * int(size) - if !haveRunContainers || size >= noOffsetThreshold { - pos += 4 * int(size) + keycard := byteSliceAsUint16Slice(buf) + + if isRunBitmap == nil || size >= noOffsetThreshold { + if err := stream.skipBytes(int(size) * 4); err != nil { + return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err) + } } // Allocate slices upfront as number of containers is known @@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { } else { ra.containers = make([]container, size) } + if cap(ra.keys) >= int(size) { ra.keys = ra.keys[:size] } else { ra.keys = make([]uint16, size) } + if cap(ra.needCopyOnWrite) >= int(size) { ra.needCopyOnWrite = ra.needCopyOnWrite[:size] } else { @@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { } for i := uint32(0); i < size; i++ { - key := uint16(keycard[2*i]) + key := keycard[2*i] card := int(keycard[2*i+1]) + 1 ra.keys[i] = key ra.needCopyOnWrite[i] = true - if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { + if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 { // run container - nr := binary.LittleEndian.Uint16(buf[pos:]) - pos += 2 - if pos+int(nr)*4 > len(buf) { - return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) + nr, err := stream.readUInt16() + + if err != nil { + return 0, fmt.Errorf("failed to read runtime container size: %s", err) + } + + buf, err := stream.next(int(nr) * 4) + + if err != nil { + return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err) } + nb := runContainer16{ - iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), + iv: byteSliceAsInterval16Slice(buf), card: int64(card), } - pos += int(nr) * 4 + ra.containers[i] = &nb } else if card > arrayDefaultMaxSize { // bitmap container + buf, err := stream.next(arrayDefaultMaxSize * 2) + + if err != nil { + return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err) + } + nb := bitmapContainer{ cardinality: card, - bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), + bitmap: byteSliceAsUint64Slice(buf), } - pos += arrayDefaultMaxSize * 2 + ra.containers[i] = &nb } else { // array container - nb := arrayContainer{ - byteSliceAsUint16Slice(buf[pos : pos+card*2]), - } - pos += card * 2 - ra.containers[i] = &nb - } - } - - return int64(pos), nil -} + buf, err := stream.next(card * 2) -func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) { - pos := 0 - var cookie uint32 - err := binary.Read(stream, binary.LittleEndian, &cookie) - if err != nil { - return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) - } - pos += 4 - var size uint32 - haveRunContainers := false - var isRun *bitmapContainer - if cookie&0x0000FFFF == serialCookie { - haveRunContainers = true - size = uint32(uint16(cookie>>16) + 1) - bytesToRead := (int(size) + 7) / 8 - numwords := (bytesToRead + 7) / 8 - by := make([]byte, bytesToRead, numwords*8) - nr, err := io.ReadFull(stream, by) - if err != nil { - return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+ - "runContainer bit flags of length %v bytes: %v", bytesToRead, err) - } - pos += bytesToRead - by = by[:cap(by)] - isRun = newBitmapContainer() - for i := 0; i < numwords; i++ { - isRun.bitmap[i] = binary.LittleEndian.Uint64(by) - by = by[8:] - } - } else if cookie == serialCookieNoRunContainer { - err = binary.Read(stream, binary.LittleEndian, &size) - if err != nil { - return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err) - } - pos += 4 - } else { - return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") - } - if size > (1 << 16) { - return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") - } - // descriptive header - keycard := make([]uint16, 2*size, 2*size) - err = binary.Read(stream, binary.LittleEndian, keycard) - if err != nil { - return 0, err - } - pos += 2 * 2 * int(size) - // offset header - if !haveRunContainers || size >= noOffsetThreshold { - io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored - pos += 4 * int(size) - } - for i := uint32(0); i < size; i++ { - key := int(keycard[2*i]) - card := int(keycard[2*i+1]) + 1 - if haveRunContainers && isRun.contains(uint16(i)) { - nb := newRunContainer16() - nr, err := nb.readFrom(stream) if err != nil { - return 0, err + return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err) } - pos += nr - ra.appendContainer(uint16(key), nb, false) - } else if card > arrayDefaultMaxSize { - nb := newBitmapContainer() - nr, err := nb.readFrom(stream) - if err != nil { - return 0, err - } - nb.cardinality = card - pos += nr - ra.appendContainer(keycard[2*i], nb, false) - } else { - nb := newArrayContainerSize(card) - nr, err := nb.readFrom(stream) - if err != nil { - return 0, err + + nb := arrayContainer{ + byteSliceAsUint16Slice(buf), } - pos += nr - ra.appendContainer(keycard[2*i], nb, false) + + ra.containers[i] = &nb } } - return int64(pos), nil + + return stream.getReadBytes(), nil } func (ra *roaringArray) hasRunCompression() bool { diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go index 99fb0f6972..dcd718756a 100644 --- a/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go @@ -8,7 +8,7 @@ import ( "github.com/tinylib/msgp/msgp" ) -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "t" @@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 @@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *containerSerz) Msgsize() (s int) { s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { { var zajw uint8 @@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z contype) EncodeMsg(en *msgp.Writer) (err error) { err = en.WriteUint8(uint8(z)) if err != nil { @@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z contype) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) o = msgp.AppendUint8(o, uint8(z)) return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { { var zwht uint8 @@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z contype) Msgsize() (s int) { s = msgp.Uint8Size return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 4 // write "keys" @@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 4 @@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *roaringArray) Msgsize() (s int) { s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize for zxhx := range z.conserz { diff --git a/vendor/github.com/RoaringBitmap/roaring/rle16.go b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go index 951af65f3f..cbffdaf24d 100644 --- a/vendor/github.com/RoaringBitmap/roaring/rle16.go +++ b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go @@ -853,6 +853,21 @@ func (rc *runContainer16) numIntervals() int { return len(rc.iv) } +// searchOptions allows us to accelerate search with +// prior knowledge of (mostly lower) bounds. This is used by Union +// and Intersect. +type searchOptions struct { + // start here instead of at 0 + startIndex int64 + + // upper bound instead of len(rc.iv); + // endxIndex == 0 means ignore the bound and use + // endxIndex == n ==len(rc.iv) which is also + // naturally the default for search() + // when opt = nil. + endxIndex int64 +} + // search returns alreadyPresent to indicate if the // key is already in one of our interval16s. // @@ -1134,135 +1149,163 @@ func (rc *runContainer16) Add(k uint16) (wasNew bool) { //msgp:ignore runIterator -// runIterator16 advice: you must call Next() at least once -// before calling Cur(); and you should call HasNext() -// before calling Next() to insure there are contents. +// runIterator16 advice: you must call hasNext() +// before calling next()/peekNext() to insure there are contents. type runIterator16 struct { rc *runContainer16 curIndex int64 curPosInIndex uint16 - curSeq int64 } // newRunIterator16 returns a new empty run container. func (rc *runContainer16) newRunIterator16() *runIterator16 { - return &runIterator16{rc: rc, curIndex: -1} + return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0} } -// HasNext returns false if calling Next will panic. It +// hasNext returns false if calling next will panic. It // returns true when there is at least one more value // available in the iteration sequence. func (ri *runIterator16) hasNext() bool { - if len(ri.rc.iv) == 0 { - return false - } - if ri.curIndex == -1 { - return true + return int64(len(ri.rc.iv)) > ri.curIndex+1 || + (int64(len(ri.rc.iv)) == ri.curIndex+1 && ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex) +} + +// next returns the next value in the iteration sequence. +func (ri *runIterator16) next() uint16 { + next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex + + if ri.curPosInIndex == ri.rc.iv[ri.curIndex].length { + ri.curPosInIndex = 0 + ri.curIndex++ + } else { + ri.curPosInIndex++ } - return ri.curSeq+1 < ri.rc.cardinality() + + return next } -// cur returns the current value pointed to by the iterator. -func (ri *runIterator16) cur() uint16 { +// peekNext returns the next value in the iteration sequence without advancing the iterator +func (ri *runIterator16) peekNext() uint16 { return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex } -// Next returns the next value in the iteration sequence. -func (ri *runIterator16) next() uint16 { - if !ri.hasNext() { - panic("no Next available") +// advanceIfNeeded advances as long as the next value is smaller than minval +func (ri *runIterator16) advanceIfNeeded(minval uint16) { + if !ri.hasNext() || ri.peekNext() >= minval { + return } - if ri.curIndex >= int64(len(ri.rc.iv)) { - panic("runIterator.Next() going beyond what is available") + + opt := &searchOptions{ + startIndex: ri.curIndex, + endxIndex: int64(len(ri.rc.iv)), } - if ri.curIndex == -1 { - // first time is special - ri.curIndex = 0 + + // interval cannot be -1 because of minval > peekNext + interval, isPresent, _ := ri.rc.search(int64(minval), opt) + + // if the minval is present, set the curPosIndex at the right position + if isPresent { + ri.curIndex = interval + ri.curPosInIndex = minval - ri.rc.iv[ri.curIndex].start } else { - ri.curPosInIndex++ - if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last())+1 { - ri.curPosInIndex = 0 - ri.curIndex++ - } - ri.curSeq++ + // otherwise interval is set to to the minimum index of rc.iv + // which comes strictly before the key, that's why we set the next interval + ri.curIndex = interval + 1 + ri.curPosInIndex = 0 } - return ri.cur() } -// remove removes the element that the iterator -// is on from the run container. You can use -// Cur if you want to double check what is about -// to be deleted. -func (ri *runIterator16) remove() uint16 { - n := ri.rc.cardinality() - if n == 0 { - panic("runIterator.Remove called on empty runContainer16") +// runReverseIterator16 advice: you must call hasNext() +// before calling next() to insure there are contents. +type runReverseIterator16 struct { + rc *runContainer16 + curIndex int64 // index into rc.iv + curPosInIndex uint16 // offset in rc.iv[curIndex] +} + +// newRunReverseIterator16 returns a new empty run iterator. +func (rc *runContainer16) newRunReverseIterator16() *runReverseIterator16 { + index := int64(len(rc.iv)) - 1 + pos := uint16(0) + + if index >= 0 { + pos = rc.iv[index].length } - cur := ri.cur() - ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq) - return cur + return &runReverseIterator16{ + rc: rc, + curIndex: index, + curPosInIndex: pos, + } } -type manyRunIterator16 struct { - rc *runContainer16 - curIndex int64 - curPosInIndex uint16 - curSeq int64 +// hasNext returns false if calling next will panic. It +// returns true when there is at least one more value +// available in the iteration sequence. +func (ri *runReverseIterator16) hasNext() bool { + return ri.curIndex > 0 || ri.curIndex == 0 && ri.curPosInIndex >= 0 } -func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 { - return &manyRunIterator16{rc: rc, curIndex: -1} -} +// next returns the next value in the iteration sequence. +func (ri *runReverseIterator16) next() uint16 { + next := ri.rc.iv[ri.curIndex].start + ri.curPosInIndex -func (ri *manyRunIterator16) hasNext() bool { - if len(ri.rc.iv) == 0 { - return false - } - if ri.curIndex == -1 { - return true + if ri.curPosInIndex > 0 { + ri.curPosInIndex-- + } else { + ri.curIndex-- + + if ri.curIndex >= 0 { + ri.curPosInIndex = ri.rc.iv[ri.curIndex].length + } } - return ri.curSeq+1 < ri.rc.cardinality() + + return next +} + +func (rc *runContainer16) newManyRunIterator16() *runIterator16 { + return rc.newRunIterator16() } // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany -func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int { +func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int { n := 0 + if !ri.hasNext() { return n } + // start and end are inclusive for n < len(buf) { - if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 { + moreVals := 0 + + if ri.rc.iv[ri.curIndex].length >= ri.curPosInIndex { + // add as many as you can from this seq + moreVals = minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex)+1, len(buf)-n) + base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex) | hs + + // allows BCE + buf2 := buf[n : n+moreVals] + for i := range buf2 { + buf2[i] = base + uint32(i) + } + + // update values + n += moreVals + } + + if moreVals+int(ri.curPosInIndex) > int(ri.rc.iv[ri.curIndex].length) { ri.curPosInIndex = 0 ri.curIndex++ + if ri.curIndex == int64(len(ri.rc.iv)) { break } - buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs - if ri.curIndex != 0 { - ri.curSeq += 1 - } - n += 1 - // not strictly necessarily due to len(buf)-n min check, but saves some work - continue - } - // add as many as you can from this seq - moreVals := minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n) - - base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs - - // allows BCE - buf2 := buf[n : n+moreVals] - for i := range buf2 { - buf2[i] = base + uint32(i) + } else { + ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 } - - // update values - ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 - ri.curSeq += int64(moreVals) - n += moreVals } + return n } @@ -1270,21 +1313,19 @@ func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int { func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { var index int64 - var curSeq int64 index, wasPresent, _ = rc.search(int64(key), nil) if !wasPresent { return // already removed, nothing to do. } pos := key - rc.iv[index].start - rc.deleteAt(&index, &pos, &curSeq) + rc.deleteAt(&index, &pos) return } // internal helper functions -func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16, curSeq *int64) { +func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16) { rc.card-- - *curSeq-- ci := *curIndex pos := *curPosInIndex @@ -1401,7 +1442,7 @@ func (rc *runContainer16) selectInt16(j uint16) int { var offset int64 for k := range rc.iv { - nextOffset := offset + rc.iv[k].runlen() + 1 + nextOffset := offset + rc.iv[k].runlen() if nextOffset > int64(j) { return int(int64(rc.iv[k].start) + (int64(j) - offset)) } @@ -1724,24 +1765,750 @@ func (rc *runContainer16) containerType() contype { } func (rc *runContainer16) equals16(srb *runContainer16) bool { - //p("both rc16") // Check if the containers are the same object. if rc == srb { - //p("same object") return true } if len(srb.iv) != len(rc.iv) { - //p("iv len differ") return false } for i, v := range rc.iv { if v != srb.iv[i] { - //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i]) return false } } - //p("all intervals same, returning true") return true } + +// compile time verify we meet interface requirements +var _ container = &runContainer16{} + +func (rc *runContainer16) clone() container { + return newRunContainer16CopyIv(rc.iv) +} + +func (rc *runContainer16) minimum() uint16 { + return rc.iv[0].start // assume not empty +} + +func (rc *runContainer16) maximum() uint16 { + return rc.iv[len(rc.iv)-1].last() // assume not empty +} + +func (rc *runContainer16) isFull() bool { + return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) +} + +func (rc *runContainer16) and(a container) container { + if rc.isFull() { + return a.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.intersect(c) + case *arrayContainer: + return rc.andArray(c) + case *bitmapContainer: + return rc.andBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) andCardinality(a container) int { + switch c := a.(type) { + case *runContainer16: + return int(rc.intersectCardinality(c)) + case *arrayContainer: + return rc.andArrayCardinality(c) + case *bitmapContainer: + return rc.andBitmapContainerCardinality(c) + } + panic("unsupported container type") +} + +// andBitmapContainer finds the intersection of rc and b. +func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { + bc2 := newBitmapContainerFromRun(rc) + return bc2.andBitmap(bc) +} + +func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { + pos := 0 + answer := 0 + maxpos := ac.getCardinality() + if maxpos == 0 { + return 0 // won't happen in actual code + } + v := ac.content[pos] +mainloop: + for _, p := range rc.iv { + for v < p.start { + pos++ + if pos == maxpos { + break mainloop + } + v = ac.content[pos] + } + for v <= p.last() { + answer++ + pos++ + if pos == maxpos { + break mainloop + } + v = ac.content[pos] + } + } + return answer +} + +func (rc *runContainer16) iand(a container) container { + if rc.isFull() { + return a.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.inplaceIntersect(c) + case *arrayContainer: + return rc.andArray(c) + case *bitmapContainer: + return rc.iandBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { + // TODO: optimize by doing less allocation, possibly? + // sect will be new + sect := rc.intersect(rc2) + *rc = *sect + return rc +} + +func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { + isect := rc.andBitmapContainer(bc) + *rc = *newRunContainer16FromContainer(isect) + return rc +} + +func (rc *runContainer16) andArray(ac *arrayContainer) container { + if len(rc.iv) == 0 { + return newArrayContainer() + } + + acCardinality := ac.getCardinality() + c := newArrayContainerCapacity(acCardinality) + + for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { + iv := rc.iv[rlePos] + arrayVal := ac.content[arrayPos] + + for iv.last() < arrayVal { + rlePos++ + if rlePos == len(rc.iv) { + return c + } + iv = rc.iv[rlePos] + } + + if iv.start > arrayVal { + arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) + } else { + c.content = append(c.content, arrayVal) + arrayPos++ + } + } + return c +} + +func (rc *runContainer16) andNot(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.andNotArray(c) + case *bitmapContainer: + return rc.andNotBitmap(c) + case *runContainer16: + return rc.andNotRunContainer16(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { + k := 0 + var val int64 + for _, p := range rc.iv { + n := p.runlen() + for j := int64(0); j < n; j++ { + val = int64(p.start) + j + x[k+i] = uint32(val) | mask + k++ + } + } +} + +func (rc *runContainer16) getShortIterator() shortPeekable { + return rc.newRunIterator16() +} + +func (rc *runContainer16) getReverseIterator() shortIterable { + return rc.newRunReverseIterator16() +} + +func (rc *runContainer16) getManyIterator() manyIterable { + return rc.newManyRunIterator16() +} + +// add the values in the range [firstOfRange, endx). endx +// is still abe to express 2^16 because it is an int not an uint16. +func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { + + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) + } + addme := newRunContainer16TakeOwnership([]interval16{ + { + start: uint16(firstOfRange), + length: uint16(endx - 1 - firstOfRange), + }, + }) + *rc = *rc.union(addme) + return rc +} + +// remove the values in the range [firstOfRange,endx) +func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ + " nothing to do.", firstOfRange, endx)) + //return rc + } + x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) + rc.isubtract(x) + return rc +} + +// not flip the values in the range [firstOfRange,endx) +func (rc *runContainer16) not(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) + } + + return rc.Not(firstOfRange, endx) +} + +// Not flips the values in the range [firstOfRange,endx). +// This is not inplace. Only the returned value has the flipped bits. +// +// Currently implemented as (!A intersect B) union (A minus B), +// where A is rc, and B is the supplied [firstOfRange, endx) interval. +// +// TODO(time optimization): convert this to a single pass +// algorithm by copying AndNotRunContainer16() and modifying it. +// Current routine is correct but +// makes 2 more passes through the arrays than should be +// strictly necessary. Measure both ways though--this may not matter. +// +func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { + + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) + } + + if firstOfRange >= endx { + return rc.Clone() + } + + a := rc + // algo: + // (!A intersect B) union (A minus B) + + nota := a.invert() + + bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} + b := newRunContainer16TakeOwnership(bs) + + notAintersectB := nota.intersect(b) + + aMinusB := a.AndNotRunContainer16(b) + + rc2 := notAintersectB.union(aMinusB) + return rc2 +} + +// equals is now logical equals; it does not require the +// same underlying container type. +func (rc *runContainer16) equals(o container) bool { + srb, ok := o.(*runContainer16) + + if !ok { + // maybe value instead of pointer + val, valok := o.(*runContainer16) + if valok { + srb = val + ok = true + } + } + if ok { + // Check if the containers are the same object. + if rc == srb { + return true + } + + if len(srb.iv) != len(rc.iv) { + return false + } + + for i, v := range rc.iv { + if v != srb.iv[i] { + return false + } + } + return true + } + + // use generic comparison + if o.getCardinality() != rc.getCardinality() { + return false + } + rit := rc.getShortIterator() + bit := o.getShortIterator() + + //k := 0 + for rit.hasNext() { + if bit.next() != rit.next() { + return false + } + //k++ + } + return true +} + +func (rc *runContainer16) iaddReturnMinimized(x uint16) container { + rc.Add(x) + return rc +} + +func (rc *runContainer16) iadd(x uint16) (wasNew bool) { + return rc.Add(x) +} + +func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { + rc.removeKey(x) + return rc +} + +func (rc *runContainer16) iremove(x uint16) bool { + return rc.removeKey(x) +} + +func (rc *runContainer16) or(a container) container { + if rc.isFull() { + return rc.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.union(c) + case *arrayContainer: + return rc.orArray(c) + case *bitmapContainer: + return rc.orBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) orCardinality(a container) int { + switch c := a.(type) { + case *runContainer16: + return int(rc.unionCardinality(c)) + case *arrayContainer: + return rc.orArrayCardinality(c) + case *bitmapContainer: + return rc.orBitmapContainerCardinality(c) + } + panic("unsupported container type") +} + +// orBitmapContainer finds the union of rc and bc. +func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { + bc2 := newBitmapContainerFromRun(rc) + return bc2.iorBitmap(bc) +} + +func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { + answer := 0 + for i := range rc.iv { + answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) + } + //bc.computeCardinality() + return answer +} + +func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { + return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) +} + +// orArray finds the union of rc and ac. +func (rc *runContainer16) orArray(ac *arrayContainer) container { + bc1 := newBitmapContainerFromRun(rc) + bc2 := ac.toBitmapContainer() + return bc1.orBitmap(bc2) +} + +// orArray finds the union of rc and ac. +func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { + return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) +} + +func (rc *runContainer16) ior(a container) container { + if rc.isFull() { + return rc + } + switch c := a.(type) { + case *runContainer16: + return rc.inplaceUnion(c) + case *arrayContainer: + return rc.iorArray(c) + case *bitmapContainer: + return rc.iorBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { + for _, p := range rc2.iv { + last := int64(p.last()) + for i := int64(p.start); i <= last; i++ { + rc.Add(uint16(i)) + } + } + return rc +} + +func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { + + it := bc.getShortIterator() + for it.hasNext() { + rc.Add(it.next()) + } + return rc +} + +func (rc *runContainer16) iorArray(ac *arrayContainer) container { + it := ac.getShortIterator() + for it.hasNext() { + rc.Add(it.next()) + } + return rc +} + +// lazyIOR is described (not yet implemented) in +// this nice note from @lemire on +// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 +// +// Description of lazyOR and lazyIOR from @lemire: +// +// Lazy functions are optional and can be simply +// wrapper around non-lazy functions. +// +// The idea of "laziness" is as follows. It is +// inspired by the concept of lazy evaluation +// you might be familiar with (functional programming +// and all that). So a roaring bitmap is +// such that all its containers are, in some +// sense, chosen to use as little memory as +// possible. This is nice. Also, all bitsets +// are "cardinality aware" so that you can do +// fast rank/select queries, or query the +// cardinality of the whole bitmap... very fast, +// without latency. +// +// However, imagine that you are aggregating 100 +// bitmaps together. So you OR the first two, then OR +// that with the third one and so forth. Clearly, +// intermediate bitmaps don't need to be as +// compressed as possible, right? They can be +// in a "dirty state". You only need the end +// result to be in a nice state... which you +// can achieve by calling repairAfterLazy at the end. +// +// The Java/C code does something special for +// the in-place lazy OR runs. The idea is that +// instead of taking two run containers and +// generating a new one, we actually try to +// do the computation in-place through a +// technique invented by @gssiyankai (pinging him!). +// What you do is you check whether the host +// run container has lots of extra capacity. +// If it does, you move its data at the end of +// the backing array, and then you write +// the answer at the beginning. What this +// trick does is minimize memory allocations. +// +func (rc *runContainer16) lazyIOR(a container) container { + // not lazy at the moment + return rc.ior(a) +} + +// lazyOR is described above in lazyIOR. +func (rc *runContainer16) lazyOR(a container) container { + // not lazy at the moment + return rc.or(a) +} + +func (rc *runContainer16) intersects(a container) bool { + // TODO: optimize by doing inplace/less allocation, possibly? + isect := rc.and(a) + return isect.getCardinality() > 0 +} + +func (rc *runContainer16) xor(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.xorArray(c) + case *bitmapContainer: + return rc.xorBitmap(c) + case *runContainer16: + return rc.xorRunContainer16(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) iandNot(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.iandNotArray(c) + case *bitmapContainer: + return rc.iandNotBitmap(c) + case *runContainer16: + return rc.iandNotRunContainer16(c) + } + panic("unsupported container type") +} + +// flip the values in the range [firstOfRange,endx) +func (rc *runContainer16) inot(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) + } + // TODO: minimize copies, do it all inplace; not() makes a copy. + rc = rc.Not(firstOfRange, endx) + return rc +} + +func (rc *runContainer16) getCardinality() int { + return int(rc.cardinality()) +} + +func (rc *runContainer16) rank(x uint16) int { + n := int64(len(rc.iv)) + xx := int64(x) + w, already, _ := rc.search(xx, nil) + if w < 0 { + return 0 + } + if !already && w == n-1 { + return rc.getCardinality() + } + var rnk int64 + if !already { + for i := int64(0); i <= w; i++ { + rnk += rc.iv[i].runlen() + } + return int(rnk) + } + for i := int64(0); i < w; i++ { + rnk += rc.iv[i].runlen() + } + rnk += int64(x-rc.iv[w].start) + 1 + return int(rnk) +} + +func (rc *runContainer16) selectInt(x uint16) int { + return rc.selectInt16(x) +} + +func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { + return rc.AndNotRunContainer16(b) +} + +func (rc *runContainer16) andNotArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + return rcb.andNotBitmap(acb) +} + +func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + return rcb.andNotBitmap(bc) +} + +func (rc *runContainer16) toBitmapContainer() *bitmapContainer { + bc := newBitmapContainer() + for i := range rc.iv { + bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) + } + bc.computeCardinality() + return bc +} + +func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { + rcb := rc.toBitmapContainer() + x2b := x2.toBitmapContainer() + rcb.iandNotBitmapSurely(x2b) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + rcb.iandNotBitmapSurely(acb) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + rcb.iandNotBitmapSurely(bc) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { + rcb := rc.toBitmapContainer() + x2b := x2.toBitmapContainer() + return rcb.xorBitmap(x2b) +} + +func (rc *runContainer16) xorArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + return rcb.xorBitmap(acb) +} + +func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + return rcb.xorBitmap(bc) +} + +// convert to bitmap or array *if needed* +func (rc *runContainer16) toEfficientContainer() container { + + // runContainer16SerializedSizeInBytes(numRuns) + sizeAsRunContainer := rc.getSizeInBytes() + sizeAsBitmapContainer := bitmapContainerSizeInBytes() + card := int(rc.cardinality()) + sizeAsArrayContainer := arrayContainerSizeInBytes(card) + if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { + return rc + } + if card <= arrayDefaultMaxSize { + return rc.toArrayContainer() + } + bc := newBitmapContainerFromRun(rc) + return bc +} + +func (rc *runContainer16) toArrayContainer() *arrayContainer { + ac := newArrayContainer() + for i := range rc.iv { + ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) + } + return ac +} + +func newRunContainer16FromContainer(c container) *runContainer16 { + + switch x := c.(type) { + case *runContainer16: + return x.Clone() + case *arrayContainer: + return newRunContainer16FromArray(x) + case *bitmapContainer: + return newRunContainer16FromBitmapContainer(x) + } + panic("unsupported container type") +} + +// And finds the intersection of rc and b. +func (rc *runContainer16) And(b *Bitmap) *Bitmap { + out := NewBitmap() + for _, p := range rc.iv { + plast := p.last() + for i := p.start; i <= plast; i++ { + if b.Contains(uint32(i)) { + out.Add(uint32(i)) + } + } + } + return out +} + +// Xor returns the exclusive-or of rc and b. +func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + plast := p.last() + for v := p.start; v <= plast; v++ { + w := uint32(v) + if out.Contains(w) { + out.RemoveRange(uint64(w), uint64(w+1)) + } else { + out.Add(w) + } + } + } + return out +} + +// Or returns the union of rc and b. +func (rc *runContainer16) Or(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + plast := p.last() + for v := p.start; v <= plast; v++ { + out.Add(uint32(v)) + } + } + return out +} + +// serializedSizeInBytes returns the number of bytes of memory +// required by this runContainer16. This is for the +// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ +func (rc *runContainer16) serializedSizeInBytes() int { + // number of runs in one uint16, then each run + // needs two more uint16 + return 2 + len(rc.iv)*4 +} + +func (rc *runContainer16) addOffset(x uint16) []container { + low := newRunContainer16() + high := newRunContainer16() + + for _, iv := range rc.iv { + val := int(iv.start) + int(x) + finalVal := int(val) + int(iv.length) + if val <= 0xffff { + if finalVal <= 0xffff { + low.iv = append(low.iv, interval16{uint16(val), iv.length}) + } else { + low.iv = append(low.iv, interval16{uint16(val), uint16(0xffff - val)}) + high.iv = append(high.iv, interval16{uint16(0), uint16(finalVal & 0xffff)}) + } + } else { + high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length}) + } + } + return []container{low, high} +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle16_gen.go b/vendor/github.com/RoaringBitmap/roaring/runcontainer_gen.go index 05bf4463f1..84537d087f 100644 --- a/vendor/github.com/RoaringBitmap/roaring/rle16_gen.go +++ b/vendor/github.com/RoaringBitmap/roaring/runcontainer_gen.go @@ -6,7 +6,7 @@ package roaring import "github.com/tinylib/msgp/msgp" -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 5 // write "runstart" @@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 5 @@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *addHelper16) Msgsize() (s int) { s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 if z.rc == nil { @@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) { return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "start" @@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 @@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z interval16) Msgsize() (s int) { s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { // map header, size 2 // write "iv" @@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 2 @@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *runContainer16) Msgsize() (s int) { s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte _ = field @@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { if err != nil { return } - case "curSeq": - z.curSeq, err = dc.ReadInt64() - if err != nil { - return - } default: err = dc.Skip() if err != nil { @@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 4 + // map header, size 3 // write "rc" - err = en.Append(0x84, 0xa2, 0x72, 0x63) + err = en.Append(0x83, 0xa2, 0x72, 0x63) if err != nil { return err } @@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { if err != nil { return } - // write "curSeq" - err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) - if err != nil { - return err - } - err = en.WriteInt64(z.curSeq) - if err != nil { - return - } return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) - // map header, size 4 + // map header, size 3 // string "rc" - o = append(o, 0x84, 0xa2, 0x72, 0x63) + o = append(o, 0x83, 0xa2, 0x72, 0x63) if z.rc == nil { o = msgp.AppendNil(o) } else { @@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { // string "curPosInIndex" o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) o = msgp.AppendUint16(o, z.curPosInIndex) - // string "curSeq" - o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) - o = msgp.AppendInt64(o, z.curSeq) return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field @@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { if err != nil { return } - case "curSeq": - z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) - if err != nil { - return - } default: bts, err = msgp.Skip(bts) if err != nil { @@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z *runIterator16) Msgsize() (s int) { s = 1 + 3 if z.rc == nil { @@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) { } else { s += z.rc.Msgsize() } - s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size + s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size return } -// DecodeMsg implements msgp.Decodable +// Deprecated: DecodeMsg implements msgp.Decodable func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { var zjpj uint32 zjpj, err = dc.ReadArrayHeader() @@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { return } -// EncodeMsg implements msgp.Encodable +// Deprecated: EncodeMsg implements msgp.Encodable func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { err = en.WriteArrayHeader(uint32(len(z))) if err != nil { @@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { return } -// MarshalMsg implements msgp.Marshaler +// Deprecated: MarshalMsg implements msgp.Marshaler func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) o = msgp.AppendArrayHeader(o, uint32(len(z))) @@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { return } -// UnmarshalMsg implements msgp.Unmarshaler +// Deprecated: UnmarshalMsg implements msgp.Unmarshaler func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { var zgmo uint32 zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) @@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { return } -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (z uint16Slice) Msgsize() (s int) { s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) return diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization.go b/vendor/github.com/RoaringBitmap/roaring/serialization.go index 59c39a6630..7b7ed29b0a 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization.go @@ -2,8 +2,6 @@ package roaring import ( "encoding/binary" - "errors" - "fmt" "io" "github.com/tinylib/msgp/msgp" @@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) { return stream.Write(buf) } -func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) { - bts, err := b.MarshalMsg(nil) - if err != nil { - return 0, err - } - return stream.Write(bts) -} - func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { bts, err := b.MarshalMsg(nil) if err != nil { @@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { return stream.Write(bts) } -func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) { - err := msgp.Decode(stream, b) - return 0, err -} - func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { err := msgp.Decode(stream, b) return 0, err } - -var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected") - -func (b *runContainer16) readFrom(stream io.Reader) (int, error) { - b.iv = b.iv[:0] - b.card = 0 - var numRuns uint16 - err := binary.Read(stream, binary.LittleEndian, &numRuns) - if err != nil { - return 0, err - } - nr := int(numRuns) - encRun := make([]uint16, 2*nr) - by := make([]byte, 4*nr) - err = binary.Read(stream, binary.LittleEndian, &by) - if err != nil { - return 0, err - } - for i := range encRun { - if len(by) < 2 { - return 0, errCorruptedStream - } - encRun[i] = binary.LittleEndian.Uint16(by) - by = by[2:] - } - for i := 0; i < nr; i++ { - if i > 0 && b.iv[i-1].last() >= encRun[i*2] { - return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2]) - } - b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]}) - b.card += int64(encRun[i*2+1]) + 1 - } - return 0, err -} diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go index 7fcef7691b..4b9d9e3d48 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go @@ -4,6 +4,7 @@ package roaring import ( "encoding/binary" + "errors" "io" ) @@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) { } func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { + if b.cardinality <= arrayDefaultMaxSize { + return 0, errors.New("refusing to write bitmap container with cardinality of array container") + } + // Write set buf := make([]byte, 8*len(b.bitmap)) for i, v := range b.bitmap { @@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { return by } +func uint16SliceAsByteSlice(slice []uint16) []byte { + by := make([]byte, len(slice)*2) + + for i, v := range slice { + binary.LittleEndian.PutUint16(by[i*2:], v) + } + + return by +} + func byteSliceAsUint16Slice(slice []byte) []uint16 { if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go index c1d3ad3046..818a06c80b 100644 --- a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go @@ -3,8 +3,10 @@ package roaring import ( + "errors" "io" "reflect" + "runtime" "unsafe" ) @@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) { } func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { + if bc.cardinality <= arrayDefaultMaxSize { + return 0, errors.New("refusing to write bitmap container with cardinality of array container") + } buf := uint64SliceAsByteSlice(bc.bitmap) return stream.Write(buf) } -// readFrom reads an arrayContainer from stream. -// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content) -// *before* you call readFrom. We can't guess the size in the stream -// by this point. -func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) { - buf := uint16SliceAsByteSlice(ac.content) - return io.ReadFull(stream, buf) -} - -func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) { - buf := uint64SliceAsByteSlice(bc.bitmap) - n, err := io.ReadFull(stream, buf) - bc.computeCardinality() - return n, err -} - func uint64SliceAsByteSlice(slice []uint64) []byte { // make a new slice header header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) @@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { header.Len *= 8 header.Cap *= 8 + // instantiate result and use KeepAlive so data isn't unmapped. + result := *(*[]byte)(unsafe.Pointer(&header)) + runtime.KeepAlive(&slice) + // return it - return *(*[]byte)(unsafe.Pointer(&header)) + return result } func uint16SliceAsByteSlice(slice []uint16) []byte { @@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte { header.Len *= 2 header.Cap *= 2 + // instantiate result and use KeepAlive so data isn't unmapped. + result := *(*[]byte)(unsafe.Pointer(&header)) + runtime.KeepAlive(&slice) + // return it - return *(*[]byte)(unsafe.Pointer(&header)) + return result } func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { @@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { // Deserialization code follows -func byteSliceAsUint16Slice(slice []byte) []uint16 { +//// +// These methods (byteSliceAsUint16Slice,...) do not make copies, +// they are pointer-based (unsafe). The caller is responsible to +// ensure that the input slice does not get garbage collected, deleted +// or modified while you hold the returned slince. +//// +func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder if len(slice)%2 != 0 { panic("Slice size should be divisible by 2") } + // reference: https://go101.org/article/unsafe.html // make a new slice header - header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) - // update its capacity and length - header.Len /= 2 - header.Cap /= 2 + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / 2 + rHeader.Cap = bHeader.Cap / 2 - // return it - return *(*[]uint16)(unsafe.Pointer(&header)) + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return } -func byteSliceAsUint64Slice(slice []byte) []uint64 { +func byteSliceAsUint64Slice(slice []byte) (result []uint64) { if len(slice)%8 != 0 { panic("Slice size should be divisible by 8") } + // reference: https://go101.org/article/unsafe.html // make a new slice header - header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) - // update its capacity and length - header.Len /= 8 - header.Cap /= 8 + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / 8 + rHeader.Cap = bHeader.Cap / 8 - // return it - return *(*[]uint64)(unsafe.Pointer(&header)) + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return } -func byteSliceAsInterval16Slice(slice []byte) []interval16 { +func byteSliceAsInterval16Slice(slice []byte) (result []interval16) { if len(slice)%4 != 0 { panic("Slice size should be divisible by 4") } + // reference: https://go101.org/article/unsafe.html // make a new slice header - header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) - // update its capacity and length - header.Len /= 4 - header.Cap /= 4 + // transfer the data from the given slice to a new variable (our result) + rHeader.Data = bHeader.Data + rHeader.Len = bHeader.Len / 4 + rHeader.Cap = bHeader.Cap / 4 - // return it - return *(*[]interval16)(unsafe.Pointer(&header)) + // instantiate result and use KeepAlive so data isn't unmapped. + runtime.KeepAlive(&slice) // it is still crucial, GC can free it) + + // return result + return } diff --git a/vendor/github.com/RoaringBitmap/roaring/shortiterator.go b/vendor/github.com/RoaringBitmap/roaring/shortiterator.go index ef0acbd1ca..15b78bd0c1 100644 --- a/vendor/github.com/RoaringBitmap/roaring/shortiterator.go +++ b/vendor/github.com/RoaringBitmap/roaring/shortiterator.go @@ -5,6 +5,12 @@ type shortIterable interface { next() uint16 } +type shortPeekable interface { + shortIterable + peekNext() uint16 + advanceIfNeeded(minval uint16) +} + type shortIterator struct { slice []uint16 loc int @@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 { si.loc++ return a } + +func (si *shortIterator) peekNext() uint16 { + return si.slice[si.loc] +} + +func (si *shortIterator) advanceIfNeeded(minval uint16) { + if si.hasNext() && si.peekNext() < minval { + si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval) + } +} + +type reverseIterator struct { + slice []uint16 + loc int +} + +func (si *reverseIterator) hasNext() bool { + return si.loc >= 0 +} + +func (si *reverseIterator) next() uint16 { + a := si.slice[si.loc] + si.loc-- + return a +} diff --git a/vendor/github.com/RoaringBitmap/roaring/util.go b/vendor/github.com/RoaringBitmap/roaring/util.go index d212660d58..3a9a47236b 100644 --- a/vendor/github.com/RoaringBitmap/roaring/util.go +++ b/vendor/github.com/RoaringBitmap/roaring/util.go @@ -14,6 +14,17 @@ const ( serialCookie = 12347 // runs, arrays, and bitmaps noOffsetThreshold = 4 + // MaxUint32 is the largest uint32 value. + MaxUint32 = 4294967295 + + // MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper + // bound for ranges. + MaxRange uint64 = MaxUint32 + 1 + + // MaxUint16 is the largest 16 bit unsigned int. + // This is the largest value an interval16 can store. + MaxUint16 = 65535 + // Compute wordSizeInBytes, the size of a word in bytes. _m = ^uint64(0) _logS = _m>>8&1 + _m>>16&1 + _m>>32&1 @@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) { endword := (end - 1) / 64 bitmap[firstword] ^= ^(^uint64(0) << uint(start%64)) for i := firstword; i < endword; i++ { - //p("flipBitmapRange on i=%v", i) bitmap[i] = ^bitmap[i] } bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64) @@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 { } return b } - -func maxInt(a, b int) int { - if a > b { - return a - } - return b -} - -func maxUint16(a, b uint16) uint16 { - if a > b { - return a - } - return b -} - -func minUint16(a, b uint16) uint16 { - if a < b { - return a - } - return b -} diff --git a/vendor/github.com/blevesearch/bleve/.travis.yml b/vendor/github.com/blevesearch/bleve/.travis.yml index 35f7b60f23..e00e7b9948 100644 --- a/vendor/github.com/blevesearch/bleve/.travis.yml +++ b/vendor/github.com/blevesearch/bleve/.travis.yml @@ -3,9 +3,9 @@ sudo: false language: go go: - - "1.9.x" - "1.10.x" - "1.11.x" + - "1.12.x" script: - go get golang.org/x/tools/cmd/cover @@ -15,7 +15,12 @@ script: - gvt restore - go test -race -v $(go list ./... | grep -v vendor/) - go vet $(go list ./... | grep -v vendor/) - - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) + - go test ./test -v -indexType scorch + - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then + echo "errcheck skipped for go version" $TRAVIS_GO_VERSION; + else + errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/); + fi - docs/project-code-coverage.sh - docs/build_children.sh diff --git a/vendor/github.com/blevesearch/bleve/document/field_text.go b/vendor/github.com/blevesearch/bleve/document/field_text.go index c8e871c9d5..6bd74c7127 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_text.go +++ b/vendor/github.com/blevesearch/bleve/document/field_text.go @@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { return fieldLength, tokenFreqs } +func (t *TextField) Analyzer() *analysis.Analyzer { + return t.analyzer +} + func (t *TextField) Value() []byte { return t.value } diff --git a/vendor/github.com/blevesearch/bleve/geo/geo.go b/vendor/github.com/blevesearch/bleve/geo/geo.go index 86861b4f3b..583451e308 100644 --- a/vendor/github.com/blevesearch/bleve/geo/geo.go +++ b/vendor/github.com/blevesearch/bleve/geo/geo.go @@ -37,6 +37,12 @@ var geoTolerance = 1E-6 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 +// Point represents a geo point. +type Point struct { + Lon float64 + Lat float64 +} + // MortonHash computes the morton hash value for the provided geo point // This point is ordered as lon, lat. func MortonHash(lon, lat float64) uint64 { @@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error { } return nil } + +func BoundingRectangleForPolygon(polygon []Point) ( + float64, float64, float64, float64, error) { + err := checkLongitude(polygon[0].Lon) + if err != nil { + return 0, 0, 0, 0, err + } + err = checkLatitude(polygon[0].Lat) + if err != nil { + return 0, 0, 0, 0, err + } + maxY, minY := polygon[0].Lat, polygon[0].Lat + maxX, minX := polygon[0].Lon, polygon[0].Lon + for i := 1; i < len(polygon); i++ { + err := checkLongitude(polygon[i].Lon) + if err != nil { + return 0, 0, 0, 0, err + } + err = checkLatitude(polygon[i].Lat) + if err != nil { + return 0, 0, 0, 0, err + } + + maxY = math.Max(maxY, polygon[i].Lat) + minY = math.Min(minY, polygon[i].Lat) + + maxX = math.Max(maxX, polygon[i].Lon) + minX = math.Min(minX, polygon[i].Lon) + } + + return minX, maxY, maxX, minY, nil +} diff --git a/vendor/github.com/blevesearch/bleve/geo/geohash.go b/vendor/github.com/blevesearch/bleve/geo/geohash.go index 35db720c0f..d3d4dfa8b5 100644 --- a/vendor/github.com/blevesearch/bleve/geo/geohash.go +++ b/vendor/github.com/blevesearch/bleve/geo/geohash.go @@ -1,32 +1,21 @@ -// The code here was obtained from: -// https://github.com/mmcloughlin/geohash - -// The MIT License (MIT) -// Copyright (c) 2015 Michael McLoughlin -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// This implementation is inspired from the geohash-js +// ref: https://github.com/davetroy/geohash-js package geo -import ( - "math" -) - // encoding encapsulates an encoding defined by a given base32 alphabet. type encoding struct { enc string @@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding { return e } -// Decode string into bits of a 64-bit word. The string s may be at most 12 -// characters. -func (e *encoding) decode(s string) uint64 { - x := uint64(0) - for i := 0; i < len(s); i++ { - x = (x << 5) | uint64(e.dec[s[i]]) - } - return x -} - -// Encode bits of 64-bit word into a string. -func (e *encoding) encode(x uint64) string { - b := [12]byte{} - for i := 0; i < 12; i++ { - b[11-i] = e.enc[x&0x1f] - x >>= 5 - } - return string(b[:]) -} - -// Base32Encoding with the Geohash alphabet. +// base32encoding with the Geohash alphabet. var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") -// BoundingBox returns the region encoded by the given string geohash. -func geoBoundingBox(hash string) geoBox { - bits := uint(5 * len(hash)) - inthash := base32encoding.decode(hash) - return geoBoundingBoxIntWithPrecision(inthash, bits) -} - -// Box represents a rectangle in latitude/longitude space. -type geoBox struct { - minLat float64 - maxLat float64 - minLng float64 - maxLng float64 -} - -// Round returns a point inside the box, making an effort to round to minimal -// precision. -func (b geoBox) round() (lat, lng float64) { - x := maxDecimalPower(b.maxLat - b.minLat) - lat = math.Ceil(b.minLat/x) * x - x = maxDecimalPower(b.maxLng - b.minLng) - lng = math.Ceil(b.minLng/x) * x - return -} - -// precalculated for performance -var exp232 = math.Exp2(32) - -// errorWithPrecision returns the error range in latitude and longitude for in -// integer geohash with bits of precision. -func errorWithPrecision(bits uint) (latErr, lngErr float64) { - b := int(bits) - latBits := b / 2 - lngBits := b - latBits - latErr = math.Ldexp(180.0, -latBits) - lngErr = math.Ldexp(360.0, -lngBits) - return -} - -// minDecimalPlaces returns the minimum number of decimal places such that -// there must exist an number with that many places within any range of width -// r. This is intended for returning minimal precision coordinates inside a -// box. -func maxDecimalPower(r float64) float64 { - m := int(math.Floor(math.Log10(r))) - return math.Pow10(m) -} - -// Encode the position of x within the range -r to +r as a 32-bit integer. -func encodeRange(x, r float64) uint32 { - p := (x + r) / (2 * r) - return uint32(p * exp232) -} - -// Decode the 32-bit range encoding X back to a value in the range -r to +r. -func decodeRange(X uint32, r float64) float64 { - p := float64(X) / exp232 - x := 2*r*p - r - return x -} - -// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are -// ignored, and may take any value. -func squash(X uint64) uint32 { - X &= 0x5555555555555555 - X = (X | (X >> 1)) & 0x3333333333333333 - X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f - X = (X | (X >> 4)) & 0x00ff00ff00ff00ff - X = (X | (X >> 8)) & 0x0000ffff0000ffff - X = (X | (X >> 16)) & 0x00000000ffffffff - return uint32(X) -} +var masks = []uint64{16, 8, 4, 2, 1} + +// DecodeGeoHash decodes the string geohash faster with +// higher precision. This api is in experimental phase. +func DecodeGeoHash(geoHash string) (float64, float64) { + even := true + lat := []float64{-90.0, 90.0} + lon := []float64{-180.0, 180.0} + + for i := 0; i < len(geoHash); i++ { + cd := uint64(base32encoding.dec[geoHash[i]]) + for j := 0; j < 5; j++ { + if even { + if cd&masks[j] > 0 { + lon[0] = (lon[0] + lon[1]) / 2 + } else { + lon[1] = (lon[0] + lon[1]) / 2 + } + } else { + if cd&masks[j] > 0 { + lat[0] = (lat[0] + lat[1]) / 2 + } else { + lat[1] = (lat[0] + lat[1]) / 2 + } + } + even = !even + } + } -// Deinterleave the bits of X into 32-bit words containing the even and odd -// bitlevels of X, respectively. -func deinterleave(X uint64) (uint32, uint32) { - return squash(X), squash(X >> 1) + return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2 } -// BoundingBoxIntWithPrecision returns the region encoded by the integer -// geohash with the specified precision. -func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox { - fullHash := hash << (64 - bits) - latInt, lngInt := deinterleave(fullHash) - lat := decodeRange(latInt, 90) - lng := decodeRange(lngInt, 180) - latErr, lngErr := errorWithPrecision(bits) - return geoBox{ - minLat: lat, - maxLat: lat + latErr, - minLng: lng, - maxLng: lng + lngErr, +func EncodeGeoHash(lat, lon float64) string { + even := true + lats := []float64{-90.0, 90.0} + lons := []float64{-180.0, 180.0} + precision := 12 + var ch, bit uint64 + var geoHash string + + for len(geoHash) < precision { + if even { + mid := (lons[0] + lons[1]) / 2 + if lon > mid { + ch |= masks[bit] + lons[0] = mid + } else { + lons[1] = mid + } + } else { + mid := (lats[0] + lats[1]) / 2 + if lat > mid { + ch |= masks[bit] + lats[0] = mid + } else { + lats[1] = mid + } + } + even = !even + if bit < 4 { + bit++ + } else { + geoHash += string(base32encoding.enc[ch]) + ch = 0 + bit = 0 + } } -} - -// ---------------------------------------------------------------------- -// Decode the string geohash to a (lat, lng) point. -func GeoHashDecode(hash string) (lat, lng float64) { - box := geoBoundingBox(hash) - return box.round() + return geoHash } diff --git a/vendor/github.com/blevesearch/bleve/geo/parse.go b/vendor/github.com/blevesearch/bleve/geo/parse.go index 0511fea7b6..5d833d9110 100644 --- a/vendor/github.com/blevesearch/bleve/geo/parse.go +++ b/vendor/github.com/blevesearch/bleve/geo/parse.go @@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { } } else { // geohash - lat, lon = GeoHashDecode(geoStr) + lat, lon = DecodeGeoHash(geoStr) foundLat = true foundLon = true } diff --git a/vendor/github.com/blevesearch/bleve/index.go b/vendor/github.com/blevesearch/bleve/index.go index 99357eee01..ef6ede9343 100644 --- a/vendor/github.com/blevesearch/bleve/index.go +++ b/vendor/github.com/blevesearch/bleve/index.go @@ -117,6 +117,8 @@ func (b *Batch) String() string { // be re-used in the future. func (b *Batch) Reset() { b.internal.Reset() + b.lastDocSize = 0 + b.totalSize = 0 } func (b *Batch) Merge(o *Batch) { diff --git a/vendor/github.com/blevesearch/bleve/index/index.go b/vendor/github.com/blevesearch/bleve/index/index.go index 6aa444cfd8..3e866f3aab 100644 --- a/vendor/github.com/blevesearch/bleve/index/index.go +++ b/vendor/github.com/blevesearch/bleve/index/index.go @@ -121,6 +121,10 @@ type IndexReaderOnly interface { FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) } +type IndexReaderContains interface { + FieldDictContains(field string) (FieldDictContains, error) +} + // FieldTerms contains the terms used by a document, keyed by field type FieldTerms map[string][]string @@ -230,6 +234,10 @@ type FieldDict interface { Close() error } +type FieldDictContains interface { + Contains(key []byte) (bool, error) +} + // DocIDReader is the interface exposing enumeration of documents identifiers. // Close the reader to release associated resources. type DocIDReader interface { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go index 2d04bd38e5..ac627796f5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go @@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { fileSegments++ } } + } // before the newMerge introduction, need to clean the newly @@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { } } } - // In case where all the docs in the newly merged segment getting // deleted by the time we reach here, can skip the introduction. if nextMerge.new != nil && @@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { newSnapshot.AddRef() // 1 ref for the nextMerge.notify response newSnapshot.updateSize() - s.rootLock.Lock() // swap in new index snapshot newSnapshot.epoch = s.nextSnapshotEpoch @@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { } newSnapshot.updateSize() - // swap in new snapshot rootPrev := s.root s.root = newSnapshot diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go index bcbf5b7106..d7144772fd 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "os" + "strings" "sync/atomic" "time" @@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) return nil } - atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) // process tasks in serial for now var notifications []chan *IndexSnapshot + var filenames []string for _, task := range resultMergePlan.Tasks { if len(task.Segments) == 0 { atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) @@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, segmentsToMerge = append(segmentsToMerge, zapSeg) docsToDrop = append(docsToDrop, segSnapshot.deleted) } + // track the files getting merged for unsetting the + // removal ineligibility. This helps to unflip files + // even with fast merger, slow persister work flows. + path := zapSeg.Path() + filenames = append(filenames, + strings.TrimPrefix(path, s.path+string(os.PathSeparator))) } } } @@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) return err } + err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment)) + if err != nil { + s.unmarkIneligibleForRemoval(filename) + return fmt.Errorf("merge validation failed: %v", err) + } oldNewDocNums = make(map[uint64][]uint64) for i, segNewDocNums := range newDocNums { oldNewDocNums[task.Segments[i].Id()] = segNewDocNums @@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, } } + // once all the newly merged segment introductions are done, + // its safe to unflip the removal ineligibility for the replaced + // older segments + for _, f := range filenames { + s.unmarkIneligibleForRemoval(f) + } + return nil } @@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotMemMergeErr, 1) return nil, 0, err } + err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment)) + if err != nil { + return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err) + } // update persisted stats atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go index 349ccdc0e9..064e9e6a85 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go @@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() { var persistWatchers []*epochWatcher var lastPersistedEpoch, lastMergedEpoch uint64 var ew *epochWatcher + + var unpersistedCallbacks []index.BatchCallback + po, err := s.parsePersisterOptions() if err != nil { s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) @@ -111,7 +114,6 @@ OUTER: if ew != nil && ew.epoch > lastMergedEpoch { lastMergedEpoch = ew.epoch } - lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, lastMergedEpoch, persistWatchers, po) @@ -150,11 +152,25 @@ OUTER: _ = ourSnapshot.DecRef() break OUTER } + + // save this current snapshot's persistedCallbacks, to invoke during + // the retry attempt + unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) + s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) _ = ourSnapshot.DecRef() atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) continue OUTER } + + if unpersistedCallbacks != nil { + // in the event of this being a retry attempt for persisting a snapshot + // that had earlier failed, prepend the persistedCallbacks associated + // with earlier segment(s) to the latest persistedCallbacks + ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) + unpersistedCallbacks = nil + } + for i := range ourPersistedCallbacks { ourPersistedCallbacks[i](err) } @@ -179,7 +195,6 @@ OUTER: s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) if changed { - s.removeOldData() atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) continue OUTER } @@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64, return watchersNext } -func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, - persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { +func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, + lastMergedEpoch uint64, persistWatchers []*epochWatcher, + po *persisterOptions) (uint64, []*epochWatcher) { - // first, let the watchers proceed if they lag behind + // First, let the watchers proceed if they lag behind persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) - // check the merger lag by counting the segment files on disk, + // Check the merger lag by counting the segment files on disk, + numFilesOnDisk, _ := s.diskFileStats() + // On finding fewer files on disk, persister takes a short pause // for sufficient in-memory segments to pile up for the next // memory merge cum persist loop. - // On finding too many files on disk, persister pause until the merger - // catches up to reduce the segment file count under the threshold. - // But if there is memory pressure, then skip this sleep maneuvers. - numFilesOnDisk, _ := s.diskFileStats() if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && po.PersisterNapTimeMSec > 0 && s.paused() == 0 { select { @@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM return lastMergedEpoch, persistWatchers } + // Finding too many files on disk could be due to two reasons. + // 1. Too many older snapshots awaiting the clean up. + // 2. The merger could be lagging behind on merging the disk files. + if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) { + s.removeOldData() + numFilesOnDisk, _ = s.diskFileStats() + } + + // Persister pause until the merger catches up to reduce the segment + // file count under the threshold. + // But if there is memory pressure, then skip this sleep maneuvers. OUTER: for po.PersisterNapUnderNumFiles > 0 && numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && @@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { } func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { + rv := &IndexSnapshot{ parent: s, internal: make(map[string][]byte), refs: 1, creator: "loadSnapshot", } - var running uint64 c := snapshot.Cursor() for k, _ := c.First(); k != nil; k, _ = c.Next() { @@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { running += segmentSnapshot.segment.Count() } } - return rv, nil } @@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() { if err != nil { s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) } + atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed)) - if removed > 0 { - err = s.removeOldZapFiles() - if err != nil { - s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) - } + err = s.removeOldZapFiles() + if err != nil { + s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) } } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go index 3f3d8bffce..44a97d1ea6 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go @@ -41,12 +41,14 @@ const Version uint8 = 2 var ErrClosed = fmt.Errorf("scorch closed") type Scorch struct { + nextSegmentID uint64 + stats Stats + iStats internalStats + readOnly bool version uint8 config map[string]interface{} analysisQueue *index.AnalysisQueue - stats Stats - nextSegmentID uint64 path string unsafeBatch bool @@ -73,8 +75,6 @@ type Scorch struct { onEvent func(event Event) onAsyncError func(err error) - iStats internalStats - pauseLock sync.RWMutex pauseCount uint64 @@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { // FIXME could sort ids list concurrent with analysis? - if len(batch.IndexOps) > 0 { + if numUpdates > 0 { go func() { for _, doc := range batch.IndexOps { if doc != nil { @@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["CurOnDiskBytes"] = numBytesUsedDisk m["CurOnDiskFiles"] = numFilesOnDisk + s.rootLock.RLock() + m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval)) + s.rootLock.RUnlock() // TODO: consider one day removing these backwards compatible // names for apps using the old names m["updates"] = m["TotUpdates"] diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go index 165a01bc16..fdc407a747 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go @@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte, return &EmptyDictionaryIterator{} } +func (e *EmptyDictionary) Contains(key []byte) (bool, error) { + return false, nil +} + type EmptyDictionaryIterator struct{} func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { return nil, nil } +func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) { + return false, nil +} + func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { return nil, nil } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go index a4836ebf8a..55299d8f7a 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go @@ -19,7 +19,10 @@ package segment -import "fmt" +import ( + "errors" + "fmt" +) const ( MaxVarintSize = 9 @@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { } return b[length:], v, nil } + +// ------------------------------------------------------------ + +type MemUvarintReader struct { + C int // index of next byte to read from S + S []byte +} + +func NewMemUvarintReader(s []byte) *MemUvarintReader { + return &MemUvarintReader{S: s} +} + +// Len returns the number of unread bytes. +func (r *MemUvarintReader) Len() int { + n := len(r.S) - r.C + if n < 0 { + return 0 + } + return n +} + +var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow") + +// ReadUvarint reads an encoded uint64. The original code this was +// based on is at encoding/binary/ReadUvarint(). +func (r *MemUvarintReader) ReadUvarint() (uint64, error) { + var x uint64 + var s uint + var C = r.C + var S = r.S + + for { + b := S[C] + C++ + + if b < 0x80 { + r.C = C + + // why 63? The original code had an 'i += 1' loop var and + // checked for i > 9 || i == 9 ...; but, we no longer + // check for the i var, but instead check here for s, + // which is incremented by 7. So, 7*9 == 63. + // + // why the "extra" >= check? The normal case is that s < + // 63, so we check this single >= guard first so that we + // hit the normal, nil-error return pathway sooner. + if s >= 63 && (s > 63 || s == 63 && b > 1) { + return 0, ErrMemUvarintReaderOverflow + } + + return x | uint64(b)<<s, nil + } + + x |= uint64(b&0x7f) << s + s += 7 + } +} + +// SkipUvarint skips ahead one encoded uint64. +func (r *MemUvarintReader) SkipUvarint() { + for { + b := r.S[r.C] + r.C++ + + if b < 0x80 { + return + } + } +} + +// SkipBytes skips a count number of bytes. +func (r *MemUvarintReader) SkipBytes(count int) { + r.C = r.C + count +} + +func (r *MemUvarintReader) Reset(s []byte) { + r.C = 0 + r.S = s +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go index 3aa151d64d..3a31f41498 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go @@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string { s = s.Sub[0] } - if s.Op == syntax.OpLiteral { + if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) { return string(s.Rune) } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go index b94d6f979f..34c2bc2048 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go @@ -59,6 +59,8 @@ type TermDictionary interface { AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) DictionaryIterator OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator + + Contains(key []byte) (bool, error) } type DictionaryIterator interface { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go index 91bfd4e24e..c02333cee0 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go @@ -16,6 +16,7 @@ package zap import ( "bufio" + "github.com/couchbase/vellum" "math" "os" ) @@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, docValueOffset: docValueOffset, dictLocs: dictLocs, fieldDvReaders: make(map[uint16]*docValueReader), + fieldFSTs: make(map[uint16]*vellum.FST), } sb.updateSize() diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go index 2c0e1bf2ad..ad4a8f8dc5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go @@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) return rv } +func (d *Dictionary) Contains(key []byte) (bool, error) { + return d.fst.Contains(key) +} + // Iterator returns an iterator for this dictionary func (d *Dictionary) Iterator() segment.DictionaryIterator { rv := &DictionaryIterator{ @@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator } // need to increment the end position to be inclusive - endBytes := []byte(end) - if endBytes[len(endBytes)-1] < 0xff { - endBytes[len(endBytes)-1]++ - } else { - endBytes = append(endBytes, 0xff) + var endBytes []byte + if len(end) > 0 { + endBytes = []byte(end) + if endBytes[len(endBytes)-1] < 0xff { + endBytes[len(endBytes)-1]++ + } else { + endBytes = append(endBytes, 0xff) + } } if d.fst != nil { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go index bcc0f94728..a819ca239f 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go @@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error type docVisitState struct { dvrs map[uint16]*docValueReader - segment *Segment + segment *SegmentBase } type docValueReader struct { @@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { // get the docValue offset for the given fields if fieldDvLocStart == fieldNotUninverted { - return nil, fmt.Errorf("loadFieldDocValueReader: "+ - "no docValues found for field: %s", field) + // no docValues found, nothing to do + return nil, nil } // read the number of chunks, and chunk offsets position @@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) // acquire position of chunk offsets chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen + } else { + return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart) } fdvIter := &docValueReader{ @@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { // VisitDocumentFieldTerms is an implementation of the // DocumentFieldTermVisitable interface -func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, +func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error) { dvs, ok := dvsIn.(*docVisitState) @@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { // check if the chunk is already loaded if docInChunk != dvr.curChunkNumber() { - err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) + err := dvr.loadDvChunk(docInChunk, s) if err != nil { return dvs, err } @@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, // VisitableDocValueFields returns the list of fields with // persisted doc value terms ready to be visitable using the // VisitDocumentFieldTerms method. -func (s *Segment) VisitableDocValueFields() ([]string, error) { +func (s *SegmentBase) VisitableDocValueFields() ([]string, error) { return s.fieldDvNames, nil } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go index 4ef222c1a2..50bd7207a5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go @@ -31,6 +31,14 @@ import ( var DefaultFileMergerBufferSize = 1024 * 1024 +// ValidateMerge can be set by applications to perform additional checks +// on a new segment produced by a merge, by default this does nothing. +// Caller should provide EITHER segments or memSegments, but not both. +// This API is experimental and may be removed at any time. +var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error { + return nil +} + const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc // Merge takes a slice of zap segments and bit masks describing which diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go index 22b69913e4..c108ec16dd 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go @@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100 var NewSegmentBufferNumResultsFactor float64 = 1.0 var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 +// ValidateDocFields can be set by applications to perform additional checks +// on fields in a document being added to a new segment, by default it does +// nothing. +// This API is experimental and may be removed at any time. +var ValidateDocFields = func(field document.Field) error { + return nil +} + // AnalysisResultsToSegmentBase produces an in-memory zap-encoded // SegmentBase from analysis results func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, @@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() ( if opts.IncludeDocValues() { s.IncludeDocValues[fieldID] = true } + + err := ValidateDocFields(field) + if err != nil { + return 0, err + } } var curr int diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go index 26378c27e0..4c43fdb9b9 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go @@ -15,10 +15,8 @@ package zap import ( - "bytes" "encoding/binary" "fmt" - "io" "math" "reflect" @@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, } rv.postings = p - rv.includeFreqNorm = includeFreq || includeNorm + rv.includeFreqNorm = includeFreq || includeNorm || includeLocs rv.includeLocs = includeLocs if p.normBits1Hit != 0 { @@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, // Count returns the number of items on this postings list func (p *PostingsList) Count() uint64 { - var n uint64 + var n, e uint64 if p.normBits1Hit != 0 { n = 1 + if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) { + e = 1 + } } else if p.postings != nil { n = p.postings.GetCardinality() - } - var e uint64 - if p.except != nil { - e = p.except.GetCardinality() - } - if n <= e { - return 0 + if p.except != nil { + e = p.postings.AndCardinality(p.except) + } } return n - e } @@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error { // PostingsIterator provides a way to iterate through the postings list type PostingsIterator struct { postings *PostingsList - all roaring.IntIterable - Actual roaring.IntIterable + all roaring.IntPeekable + Actual roaring.IntPeekable ActualBM *roaring.Bitmap currChunk uint32 currChunkFreqNorm []byte currChunkLoc []byte - freqNormReader *bytes.Reader - locReader *bytes.Reader + freqNormReader *segment.MemUvarintReader + locReader *segment.MemUvarintReader freqChunkOffsets []uint64 freqChunkStart uint64 @@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { end += e i.currChunkFreqNorm = i.postings.sb.mem[start:end] if i.freqNormReader == nil { - i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm) + i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm) } else { i.freqNormReader.Reset(i.currChunkFreqNorm) } @@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { end += e i.currChunkLoc = i.postings.sb.mem[start:end] if i.locReader == nil { - i.locReader = bytes.NewReader(i.currChunkLoc) + i.locReader = segment.NewMemUvarintReader(i.currChunkLoc) } else { i.locReader.Reset(i.currChunkLoc) } @@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) { return 1, i.normBits1Hit, false, nil } - freqHasLocs, err := binary.ReadUvarint(i.freqNormReader) + freqHasLocs, err := i.freqNormReader.ReadUvarint() if err != nil { return 0, 0, false, fmt.Errorf("error reading frequency: %v", err) } + freq, hasLocs := decodeFreqHasLocs(freqHasLocs) - normBits, err := binary.ReadUvarint(i.freqNormReader) + normBits, err := i.freqNormReader.ReadUvarint() if err != nil { return 0, 0, false, fmt.Errorf("error reading norm: %v", err) } - return freq, normBits, hasLocs, err + return freq, normBits, hasLocs, nil +} + +func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) { + if i.normBits1Hit != 0 { + return false, nil + } + + freqHasLocs, err := i.freqNormReader.ReadUvarint() + if err != nil { + return false, fmt.Errorf("error reading freqHasLocs: %v", err) + } + + i.freqNormReader.SkipUvarint() // Skip normBits. + + return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs. } func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 { @@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) { } // readLocation processes all the integers on the stream representing a single -// location. if you care about it, pass in a non-nil location struct, and we -// will fill it. if you don't care about it, pass in nil and we safely consume -// the contents. +// location. func (i *PostingsIterator) readLocation(l *Location) error { // read off field - fieldID, err := binary.ReadUvarint(i.locReader) + fieldID, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location field: %v", err) } // read off pos - pos, err := binary.ReadUvarint(i.locReader) + pos, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location pos: %v", err) } // read off start - start, err := binary.ReadUvarint(i.locReader) + start, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location start: %v", err) } // read off end - end, err := binary.ReadUvarint(i.locReader) + end, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location end: %v", err) } // read off num array pos - numArrayPos, err := binary.ReadUvarint(i.locReader) + numArrayPos, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location num array pos: %v", err) } - // group these together for less branching - if l != nil { - l.field = i.postings.sb.fieldsInv[fieldID] - l.pos = pos - l.start = start - l.end = end - if cap(l.ap) < int(numArrayPos) { - l.ap = make([]uint64, int(numArrayPos)) - } else { - l.ap = l.ap[:int(numArrayPos)] - } + l.field = i.postings.sb.fieldsInv[fieldID] + l.pos = pos + l.start = start + l.end = end + + if cap(l.ap) < int(numArrayPos) { + l.ap = make([]uint64, int(numArrayPos)) + } else { + l.ap = l.ap[:int(numArrayPos)] } // read off array positions for k := 0; k < int(numArrayPos); k++ { - ap, err := binary.ReadUvarint(i.locReader) + ap, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading array position: %v", err) } - if l != nil { - l.ap[k] = ap - } + + l.ap[k] = ap } return nil @@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err } rv.locs = i.nextSegmentLocs[:0] - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return nil, fmt.Errorf("error reading location numLocsBytes: %v", err) } @@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() ( if hasLocs { startLoc := len(i.currChunkLoc) - i.locReader.Len() - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return 0, 0, 0, nil, nil, fmt.Errorf("error reading location nextBytes numLocs: %v", err) } // skip over all the location bytes - _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) - if err != nil { - return 0, 0, 0, nil, nil, err - } + i.locReader.SkipBytes(int(numLocsBytes)) endLoc := len(i.currChunkLoc) - i.locReader.Len() bytesLoc = i.currChunkLoc[startLoc:endLoc] @@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, return i.nextDocNumAtOrAfterClean(atOrAfter) } - n := i.Actual.Next() - for uint64(n) < atOrAfter && i.Actual.HasNext() { - n = i.Actual.Next() - } - if uint64(n) < atOrAfter { + i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) + + if !i.Actual.HasNext() { // couldn't find anything return 0, false, nil } + + n := i.Actual.Next() allN := i.all.Next() nChunk := n / i.postings.sb.chunkFactor @@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, // no deletions) where the all bitmap is the same as the actual bitmap func (i *PostingsIterator) nextDocNumAtOrAfterClean( atOrAfter uint64) (uint64, bool, error) { - n := i.Actual.Next() if !i.includeFreqNorm { - for uint64(n) < atOrAfter && i.Actual.HasNext() { - n = i.Actual.Next() - } + i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) - if uint64(n) < atOrAfter { + if !i.Actual.HasNext() { return 0, false, nil // couldn't find anything } - return uint64(n), true, nil + return uint64(i.Actual.Next()), true, nil } // freq-norm's needed, so maintain freq-norm chunk reader sameChunkNexts := 0 // # of times we called Next() in the same chunk - + n := i.Actual.Next() nChunk := n / i.postings.sb.chunkFactor for uint64(n) < atOrAfter && i.Actual.HasNext() { @@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error { } // read off freq/offsets even though we don't care about them - _, _, hasLocs, err := i.readFreqNormHasLocs() + hasLocs, err := i.skipFreqNormReadHasLocs() if err != nil { return err } if i.includeLocs && hasLocs { - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location numLocsBytes: %v", err) } // skip over all the location bytes - _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) - if err != nil { - return err - } + i.locReader.SkipBytes(int(numLocsBytes)) } return nil diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go index 7ba28c2366..5aa33a26c9 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go @@ -20,8 +20,8 @@ import ( "fmt" "io" "os" - "reflect" "sync" + "unsafe" "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index/scorch/segment" @@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int func init() { var sb SegmentBase - reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) + reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb)) } // Open returns a zap impl of a segment @@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) { mem: mm[0 : len(mm)-FooterSize], fieldsMap: make(map[string]uint16), fieldDvReaders: make(map[uint16]*docValueReader), + fieldFSTs: make(map[uint16]*vellum.FST), }, f: f, mm: mm, @@ -101,6 +102,9 @@ type SegmentBase struct { fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field fieldDvNames []string // field names cached in fieldDvReaders size uint64 + + m sync.Mutex + fieldFSTs map[uint16]*vellum.FST } func (sb *SegmentBase) Size() int { @@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { dictStart := sb.dictLocs[rv.fieldID] if dictStart > 0 { - // read the length of the vellum data - vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) - fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] - if fstBytes != nil { + var ok bool + sb.m.Lock() + if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok { + // read the length of the vellum data + vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) + fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] rv.fst, err = vellum.Load(fstBytes) if err != nil { + sb.m.Unlock() return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) } - rv.fstReader, err = rv.fst.Reader() - if err != nil { - return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) - } + + sb.fieldFSTs[rv.fieldID] = rv.fst } + + sb.m.Unlock() + rv.fstReader, err = rv.fst.Reader() + if err != nil { + return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) + } + } } @@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) { } func (s *SegmentBase) loadDvReaders() error { - if s.docValueOffset == fieldNotUninverted { + if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 { return nil } @@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error { } read += uint64(n) - fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) + fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) + if err != nil { + return err + } if fieldDvReader != nil { s.fieldDvReaders[uint16(fieldID)] = fieldDvReader s.fieldDvNames = append(s.fieldDvNames, field) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go index 8babb31fa4..47cc809b21 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go @@ -28,13 +28,14 @@ import ( "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/scorch/segment" "github.com/couchbase/vellum" - lev2 "github.com/couchbase/vellum/levenshtein2" + lev "github.com/couchbase/vellum/levenshtein" ) // re usable, threadsafe levenshtein builders -var lb1, lb2 *lev2.LevenshteinAutomatonBuilder +var lb1, lb2 *lev.LevenshteinAutomatonBuilder type asynchSegmentResult struct { + dict segment.TermDictionary dictItr segment.DictionaryIterator index int @@ -51,11 +52,11 @@ func init() { var is interface{} = IndexSnapshot{} reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) var err error - lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) + lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true) if err != nil { panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) } - lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) + lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true) if err != nil { panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) } @@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() { } } -func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { +func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, + makeItr func(i segment.TermDictionary) segment.DictionaryIterator, + randomLookup bool) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) for index, segment := range i.segment { @@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if err != nil { results <- &asynchSegmentResult{err: err} } else { - results <- &asynchSegmentResult{dictItr: makeItr(dict)} + if randomLookup { + results <- &asynchSegmentResult{dict: dict} + } else { + results <- &asynchSegmentResult{dictItr: makeItr(dict)} + } } }(index, segment) } @@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if asr.err != nil && err == nil { err = asr.err } else { - next, err2 := asr.dictItr.Next() - if err2 != nil && err == nil { - err = err2 - } - if next != nil { + if !randomLookup { + next, err2 := asr.dictItr.Next() + if err2 != nil && err == nil { + err = err2 + } + if next != nil { + rv.cursors = append(rv.cursors, &segmentDictCursor{ + itr: asr.dictItr, + curr: *next, + }) + } + } else { rv.cursors = append(rv.cursors, &segmentDictCursor{ - itr: asr.dictItr, - curr: *next, + dict: asr.dict, }) } } @@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if err != nil { return nil, err } - // prepare heap - heap.Init(rv) + + if !randomLookup { + // prepare heap + heap.Init(rv) + } return rv, nil } @@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.Iterator() - }) + }, false) } func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.RangeIterator(string(startTerm), string(endTerm)) - }) + }, false) } func (i *IndexSnapshot) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.PrefixIterator(string(termPrefix)) - }) + }, false) } func (i *IndexSnapshot) FieldDictRegexp(field string, @@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) - }) + }, false) } func (i *IndexSnapshot) getLevAutomaton(term string, @@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) - }) + }, false) } func (i *IndexSnapshot) FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.OnlyIterator(onlyTerms, includeCount) - }) + }, false) +} + +func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) { + return i.newIndexSnapshotFieldDict(field, nil, true) } func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go index abd3bde8c1..47486c2554 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go @@ -22,6 +22,7 @@ import ( ) type segmentDictCursor struct { + dict segment.TermDictionary itr segment.DictionaryIterator curr index.DictEntry } @@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { func (i *IndexSnapshotFieldDict) Close() error { return nil } + +func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) { + if len(i.cursors) == 0 { + return false, nil + } + + for _, cursor := range i.cursors { + if found, _ := cursor.dict.Contains(key); found { + return true, nil + } + } + + return false, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go index f3a2c56a98..96742b4f94 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go @@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { } type cachedDocs struct { + size uint64 m sync.Mutex // As the cache is asynchronously prepared, need a lock cache map[string]*cachedFieldDocs // Keyed by field - size uint64 } func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go index 2eb832f2cf..6549fddf51 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go @@ -107,6 +107,9 @@ type Stats struct { TotFileMergeIntroductionsDone uint64 TotFileMergeIntroductionsSkipped uint64 + CurFilesIneligibleForRemoval uint64 + TotSnapshotsRemovedFromMetaStore uint64 + TotMemMergeBeg uint64 TotMemMergeErr uint64 TotMemMergeDone uint64 diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go index e4bc3d8f02..24f5aae949 100644 --- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go +++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go @@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error { func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() - numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) @@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { return } + return udc.UpdateWithAnalysis(doc, result, backIndexRow) +} + +func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document, + result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) { // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter @@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) - atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) + atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes()) } else { atomic.AddUint64(&udc.stats.errors, 1) } @@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] } func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { + persistedCallback := batch.PersistedCallback() + if persistedCallback != nil { + defer persistedCallback(err) + } analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) @@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { } } - if len(batch.IndexOps) > 0 { + if numUpdates > 0 { go func() { for _, doc := range batch.IndexOps { if doc != nil { @@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&udc.stats.errors, 1) } - persistedCallback := batch.PersistedCallback() - if persistedCallback != nil { - persistedCallback(err) - } return } diff --git a/vendor/github.com/blevesearch/bleve/index_alias_impl.go b/vendor/github.com/blevesearch/bleve/index_alias_impl.go index 335fcade2e..4366fc7956 100644 --- a/vendor/github.com/blevesearch/bleve/index_alias_impl.go +++ b/vendor/github.com/blevesearch/bleve/index_alias_impl.go @@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { Sort: req.Sort.Copy(), IncludeLocations: req.IncludeLocations, Score: req.Score, + SearchAfter: req.SearchAfter, + SearchBefore: req.SearchBefore, } return &rv } @@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se searchStart := time.Now() asyncResults := make(chan *asyncSearchResult, len(indexes)) + var reverseQueryExecution bool + if req.SearchBefore != nil { + reverseQueryExecution = true + req.Sort.Reverse() + req.SearchAfter = req.SearchBefore + req.SearchBefore = nil + } + // run search on each index in separate go routine var waitGroup sync.WaitGroup @@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se // sort all hits with the requested order if len(req.Sort) > 0 { - sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) + sorter := newSearchHitSorter(req.Sort, sr.Hits) sort.Sort(sorter) } @@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se sr.Facets.Fixup(name, fr.Size) } + if reverseQueryExecution { + // reverse the sort back to the original + req.Sort.Reverse() + // resort using the original order + mhs := newSearchHitSorter(req.Sort, sr.Hits) + sort.Sort(mhs) + // reset request + req.SearchBefore = req.SearchAfter + req.SearchAfter = nil + } + // fix up original request sr.Request = req searchDuration := time.Since(searchStart) @@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error { defer f.index.mutex.RUnlock() return f.fieldDict.Close() } - -type multiSearchHitSorter struct { - hits search.DocumentMatchCollection - sort search.SortOrder - cachedScoring []bool - cachedDesc []bool -} - -func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { - return &multiSearchHitSorter{ - sort: sort, - hits: hits, - cachedScoring: sort.CacheIsScore(), - cachedDesc: sort.CacheDescending(), - } -} - -func (m *multiSearchHitSorter) Len() int { return len(m.hits) } -func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } -func (m *multiSearchHitSorter) Less(i, j int) bool { - c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) - return c < 0 -} diff --git a/vendor/github.com/blevesearch/bleve/index_impl.go b/vendor/github.com/blevesearch/bleve/index_impl.go index fe61b8064a..6324d960eb 100644 --- a/vendor/github.com/blevesearch/bleve/index_impl.go +++ b/vendor/github.com/blevesearch/bleve/index_impl.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "os" + "sort" "sync" "sync/atomic" "time" @@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr return nil, ErrorIndexClosed } - collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) + var reverseQueryExecution bool + if req.SearchBefore != nil { + reverseQueryExecution = true + req.Sort.Reverse() + req.SearchAfter = req.SearchBefore + req.SearchBefore = nil + } + + var coll *collector.TopNCollector + if req.SearchAfter != nil { + coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter) + } else { + coll = collector.NewTopNCollector(req.Size, req.From, req.Sort) + } // open a reader for this search indexReader, err := i.i.Reader() @@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr facetsBuilder.Add(facetName, facetBuilder) } } - collector.SetFacetsBuilder(facetsBuilder) + coll.SetFacetsBuilder(facetsBuilder) } - memNeeded := memNeededForSearch(req, searcher, collector) + memNeeded := memNeededForSearch(req, searcher, coll) if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { err = cbF(memNeeded) @@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } } - err = collector.Collect(ctx, searcher, indexReader) + err = coll.Collect(ctx, searcher, indexReader) if err != nil { return nil, err } - hits := collector.Results() + hits := coll.Results() var highlighter highlight.Highlighter @@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } for _, hit := range hits { - if len(req.Fields) > 0 || highlighter != nil { - doc, err := indexReader.Document(hit.ID) - if err == nil && doc != nil { - if len(req.Fields) > 0 { - fieldsToLoad := deDuplicate(req.Fields) - for _, f := range fieldsToLoad { - for _, docF := range doc.Fields { - if f == "*" || docF.Name() == f { - var value interface{} - switch docF := docF.(type) { - case *document.TextField: - value = string(docF.Value()) - case *document.NumericField: - num, err := docF.Number() - if err == nil { - value = num - } - case *document.DateTimeField: - datetime, err := docF.DateTime() - if err == nil { - value = datetime.Format(time.RFC3339) - } - case *document.BooleanField: - boolean, err := docF.Boolean() - if err == nil { - value = boolean - } - case *document.GeoPointField: - lon, err := docF.Lon() - if err == nil { - lat, err := docF.Lat() - if err == nil { - value = []float64{lon, lat} - } - } - } - if value != nil { - hit.AddFieldValue(docF.Name(), value) - } - } - } - } - } - if highlighter != nil { - highlightFields := req.Highlight.Fields - if highlightFields == nil { - // add all fields with matches - highlightFields = make([]string, 0, len(hit.Locations)) - for k := range hit.Locations { - highlightFields = append(highlightFields, k) - } - } - for _, hf := range highlightFields { - highlighter.BestFragmentsInField(hit, doc, hf, 1) - } - } - } else if doc == nil { - // unexpected case, a doc ID that was found as a search hit - // was unable to be found during document lookup - return nil, ErrorIndexReadInconsistency - } - } if i.name != "" { hit.Index = i.name } + err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) + if err != nil { + return nil, err + } } atomic.AddUint64(&i.stats.searches, 1) @@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr logger.Printf("slow search took %s - %v", searchDuration, req) } + if reverseQueryExecution { + // reverse the sort back to the original + req.Sort.Reverse() + // resort using the original order + mhs := newSearchHitSorter(req.Sort, hits) + sort.Sort(mhs) + // reset request + req.SearchBefore = req.SearchAfter + req.SearchAfter = nil + } + return &SearchResult{ Status: &SearchStatus{ Total: 1, @@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr }, Request: req, Hits: hits, - Total: collector.Total(), - MaxScore: collector.MaxScore(), + Total: coll.Total(), + MaxScore: coll.MaxScore(), Took: searchDuration, - Facets: collector.FacetResults(), + Facets: coll.FacetResults(), }, nil } +func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, + indexName string, r index.IndexReader, + highlighter highlight.Highlighter) error { + if len(req.Fields) > 0 || highlighter != nil { + doc, err := r.Document(hit.ID) + if err == nil && doc != nil { + if len(req.Fields) > 0 { + fieldsToLoad := deDuplicate(req.Fields) + for _, f := range fieldsToLoad { + for _, docF := range doc.Fields { + if f == "*" || docF.Name() == f { + var value interface{} + switch docF := docF.(type) { + case *document.TextField: + value = string(docF.Value()) + case *document.NumericField: + num, err := docF.Number() + if err == nil { + value = num + } + case *document.DateTimeField: + datetime, err := docF.DateTime() + if err == nil { + value = datetime.Format(time.RFC3339) + } + case *document.BooleanField: + boolean, err := docF.Boolean() + if err == nil { + value = boolean + } + case *document.GeoPointField: + lon, err := docF.Lon() + if err == nil { + lat, err := docF.Lat() + if err == nil { + value = []float64{lon, lat} + } + } + } + if value != nil { + hit.AddFieldValue(docF.Name(), value) + } + } + } + } + } + if highlighter != nil { + highlightFields := req.Highlight.Fields + if highlightFields == nil { + // add all fields with matches + highlightFields = make([]string, 0, len(hit.Locations)) + for k := range hit.Locations { + highlightFields = append(highlightFields, k) + } + } + for _, hf := range highlightFields { + highlighter.BestFragmentsInField(hit, doc, hf, 1) + } + } + } else if doc == nil { + // unexpected case, a doc ID that was found as a search hit + // was unable to be found during document lookup + return ErrorIndexReadInconsistency + } + } + + return nil +} + // Fields returns the name of all the fields this // Index has operated on. func (i *indexImpl) Fields() (fields []string, err error) { @@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string { } return ret } + +type searchHitSorter struct { + hits search.DocumentMatchCollection + sort search.SortOrder + cachedScoring []bool + cachedDesc []bool +} + +func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter { + return &searchHitSorter{ + sort: sort, + hits: hits, + cachedScoring: sort.CacheIsScore(), + cachedDesc: sort.CacheDescending(), + } +} + +func (m *searchHitSorter) Len() int { return len(m.hits) } +func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } +func (m *searchHitSorter) Less(i, j int) bool { + c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) + return c < 0 +} diff --git a/vendor/github.com/blevesearch/bleve/mapping/document.go b/vendor/github.com/blevesearch/bleve/mapping/document.go index f950b59bef..15cb6b5fa1 100644 --- a/vendor/github.com/blevesearch/bleve/mapping/document.go +++ b/vendor/github.com/blevesearch/bleve/mapping/document.go @@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, if !propertyValue.IsNil() { switch property := property.(type) { case encoding.TextMarshaler: - - txt, err := property.MarshalText() - if err == nil && subDocMapping != nil { - // index by explicit mapping + // ONLY process TextMarshaler if there is an explicit mapping + // AND all of the fiels are of type text + // OTHERWISE process field without TextMarshaler + if subDocMapping != nil { + allFieldsText := true for _, fieldMapping := range subDocMapping.Fields { - if fieldMapping.Type == "text" { - fieldMapping.processString(string(txt), pathString, path, indexes, context) + if fieldMapping.Type != "text" { + allFieldsText = false + break } } - } else { - dm.walkDocument(property, path, indexes, context) + txt, err := property.MarshalText() + if err == nil && allFieldsText { + txtStr := string(txt) + for _, fieldMapping := range subDocMapping.Fields { + fieldMapping.processString(txtStr, pathString, path, indexes, context) + } + return + } } - + dm.walkDocument(property, path, indexes, context) default: dm.walkDocument(property, path, indexes, context) } diff --git a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go index 76ea001ba7..29bd0fc5c1 100644 --- a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go +++ b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go @@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20 type PrefixCoded []byte func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { + rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil) + return rv, err +} + +func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) ( + rv PrefixCoded, preallocRest []byte, err error) { if shift > 63 { - return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) + return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) } nChars := ((63 - shift) / 7) + 1 - rv := make(PrefixCoded, nChars+1) + + size := int(nChars + 1) + if len(prealloc) >= size { + rv = PrefixCoded(prealloc[0:size]) + preallocRest = prealloc[size:] + } else { + rv = make(PrefixCoded, size) + } + rv[0] = ShiftStartInt64 + byte(shift) sortableBits := int64(uint64(in) ^ 0x8000000000000000) @@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { nChars-- sortableBits = int64(uint64(sortableBits) >> 7) } - return rv, nil + + return rv, preallocRest, nil } func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { diff --git a/vendor/github.com/blevesearch/bleve/search.go b/vendor/github.com/blevesearch/bleve/search.go index ebd69971ef..b337edc9e4 100644 --- a/vendor/github.com/blevesearch/bleve/search.go +++ b/vendor/github.com/blevesearch/bleve/search.go @@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) { // result score explanations. // Sort describes the desired order for the results to be returned. // Score controls the kind of scoring performed +// SearchAfter supports deep paging by providing a minimum sort key +// SearchBefore supports deep paging by providing a maximum sort key // // A special field named "*" can be used to return all fields. type SearchRequest struct { @@ -275,6 +277,8 @@ type SearchRequest struct { Sort search.SortOrder `json:"sort"` IncludeLocations bool `json:"includeLocations"` Score string `json:"score,omitempty"` + SearchAfter []string `json:"search_after"` + SearchBefore []string `json:"search_before"` } func (r *SearchRequest) Validate() error { @@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error { } } + if r.SearchAfter != nil && r.SearchBefore != nil { + return fmt.Errorf("cannot use search after and search before together") + } + + if r.SearchAfter != nil { + if r.From != 0 { + return fmt.Errorf("cannot use search after with from !=0") + } + if len(r.SearchAfter) != len(r.Sort) { + return fmt.Errorf("search after must have same size as sort order") + } + } + if r.SearchBefore != nil { + if r.From != 0 { + return fmt.Errorf("cannot use search before with from !=0") + } + if len(r.SearchBefore) != len(r.Sort) { + return fmt.Errorf("search before must have same size as sort order") + } + } + return r.Facets.Validate() } @@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) { r.Sort = order } +// SetSearchAfter sets the request to skip over hits with a sort +// value less than the provided sort after key +func (r *SearchRequest) SetSearchAfter(after []string) { + r.SearchAfter = after +} + +// SetSearchBefore sets the request to skip over hits with a sort +// value greater than the provided sort before key +func (r *SearchRequest) SetSearchBefore(before []string) { + r.SearchBefore = before +} + // UnmarshalJSON deserializes a JSON representation of // a SearchRequest func (r *SearchRequest) UnmarshalJSON(input []byte) error { @@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { Sort []json.RawMessage `json:"sort"` IncludeLocations bool `json:"includeLocations"` Score string `json:"score"` + SearchAfter []string `json:"search_after"` + SearchBefore []string `json:"search_before"` } err := json.Unmarshal(input, &temp) @@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { r.Facets = temp.Facets r.IncludeLocations = temp.IncludeLocations r.Score = temp.Score + r.SearchAfter = temp.SearchAfter + r.SearchBefore = temp.SearchBefore r.Query, err = query.ParseQuery(temp.Q) if err != nil { return err diff --git a/vendor/github.com/blevesearch/bleve/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/search/collector/topn.go index 378a7b114a..a027a12c22 100644 --- a/vendor/github.com/blevesearch/bleve/search/collector/topn.go +++ b/vendor/github.com/blevesearch/bleve/search/collector/topn.go @@ -69,6 +69,7 @@ type TopNCollector struct { lowestMatchOutsideResults *search.DocumentMatch updateFieldVisitor index.DocumentFieldTermVisitor dvReader index.DocValueReader + searchAfter *search.DocumentMatch } // CheckDoneEvery controls how frequently we check the context deadline @@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024) // skipping over the first 'skip' hits // ordering hits by the provided sort order func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { + return newTopNCollector(size, skip, sort) +} + +// NewTopNCollector builds a collector to find the top 'size' hits +// skipping over the first 'skip' hits +// ordering hits by the provided sort order +func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector { + rv := newTopNCollector(size, 0, sort) + rv.searchAfter = &search.DocumentMatch{ + Sort: after, + } + return rv +} + +func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { hc := &TopNCollector{size: size, skip: skip, sort: sort} // pre-allocate space on the store to avoid reslicing @@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, searchContext := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), Collector: hc, + IndexReader: reader, } hc.dvReader, err = reader.DocValueReader(hc.neededFields) @@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler( if d == nil { return nil } + + // support search after based pagination, + // if this hit is <= the search after sort key + // we should skip it + if hc.searchAfter != nil { + // exact sort order matches use hit number to break tie + // but we want to allow for exact match, so we pretend + hc.searchAfter.HitNumber = d.HitNumber + if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 { + return nil + } + } + // optimization, we track lowest sorting hit already removed from heap // with this one comparison, we can avoid all heap operations if // this hit would have been added and then immediately removed diff --git a/vendor/github.com/blevesearch/bleve/search/query/date_range.go b/vendor/github.com/blevesearch/bleve/search/query/date_range.go index ff67a7bb70..3ac0322f55 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/date_range.go +++ b/vendor/github.com/blevesearch/bleve/search/query/date_range.go @@ -41,6 +41,14 @@ type BleveQueryTime struct { time.Time } +var MinRFC3339CompatibleTime time.Time +var MaxRFC3339CompatibleTime time.Time + +func init() { + MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z") + MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z") +} + func queryTimeFromString(t string) (time.Time, error) { dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) if err != nil { @@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min := math.Inf(-1) max := math.Inf(1) if !q.Start.IsZero() { - min = numeric.Int64ToFloat64(q.Start.UnixNano()) + if !isDatetimeCompatible(q.Start) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) + } + startInt64 := q.Start.UnixNano() + min = numeric.Int64ToFloat64(startInt64) } if !q.End.IsZero() { - max = numeric.Int64ToFloat64(q.End.UnixNano()) + if !isDatetimeCompatible(q.End) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) + } + endInt64 := q.End.UnixNano() + max = numeric.Int64ToFloat64(endInt64) } return &min, &max, nil @@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error { } return nil } + +func isDatetimeCompatible(t BleveQueryTime) bool { + if QueryDateTimeFormat == time.RFC3339 && + (t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) { + return false + } + + return true +} diff --git a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go index 2bc1d70441..a1fc1439a6 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go +++ b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go @@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, if len(ss) < 1 { return searcher.NewMatchNoneSearcher(i) - } else if len(ss) == 1 && int(q.Min) == ss[0].Min() { - // apply optimization only if both conditions below are satisfied: - // - disjunction searcher has only 1 child searcher - // - parent searcher's min setting is equal to child searcher's min - - return ss[0], nil } return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) diff --git a/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go new file mode 100644 index 0000000000..41c7f7f3ab --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go @@ -0,0 +1,94 @@ +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "encoding/json" + "fmt" + + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoBoundingPolygonQuery struct { + Points []geo.Point `json:"polygon_points"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery { + return &GeoBoundingPolygonQuery{ + Points: points} +} + +func (q *GeoBoundingPolygonQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoBoundingPolygonQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoBoundingPolygonQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoBoundingPolygonQuery) Field() string { + return q.FieldVal +} + +func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader, + m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options) +} + +func (q *GeoBoundingPolygonQuery) Validate() error { + return nil +} + +func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error { + tmp := struct { + Points []interface{} `json:"polygon_points"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` + }{} + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + + q.Points = make([]geo.Point, 0, len(tmp.Points)) + for _, i := range tmp.Points { + // now use our generic point parsing code from the geo package + lon, lat, found := geo.ExtractGeoPoint(i) + if !found { + return fmt.Errorf("geo polygon point: %v is not in a valid format", i) + } + q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat}) + } + + q.FieldVal = tmp.FieldVal + q.BoostVal = tmp.BoostVal + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/search/query/query.go b/vendor/github.com/blevesearch/bleve/search/query/query.go index c7c1eefb80..18aca228d4 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/query.go +++ b/vendor/github.com/blevesearch/bleve/search/query/query.go @@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) { } return &rv, nil } + _, hasPoints := tmp["polygon_points"] + if hasPoints { + var rv GeoBoundingPolygonQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } return nil, fmt.Errorf("unknown query type") } diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go index 5544f2d011..718de2ea5e 100644 --- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go +++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go @@ -40,6 +40,7 @@ type TermQueryScorer struct { idf float64 options search.SearcherOptions idfExplanation *search.Explanation + includeScore bool queryNorm float64 queryWeight float64 queryWeightExplanation *search.Explanation @@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int { func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { rv := TermQueryScorer{ - queryTerm: string(queryTerm), - queryField: queryField, - queryBoost: queryBoost, - docTerm: docTerm, - docTotal: docTotal, - idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), - options: options, - queryWeight: 1.0, + queryTerm: string(queryTerm), + queryField: queryField, + queryBoost: queryBoost, + docTerm: docTerm, + docTotal: docTotal, + idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), + options: options, + queryWeight: 1.0, + includeScore: options.Score != "none", } if options.Explain { @@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { } func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { - var scoreExplanation *search.Explanation - - // need to compute score - var tf float64 - if termMatch.Freq < MaxSqrtCache { - tf = SqrtCache[int(termMatch.Freq)] - } else { - tf = math.Sqrt(float64(termMatch.Freq)) - } - score := tf * termMatch.Norm * s.idf - - if s.options.Explain { - childrenExplanations := make([]*search.Explanation, 3) - childrenExplanations[0] = &search.Explanation{ - Value: tf, - Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), - } - childrenExplanations[1] = &search.Explanation{ - Value: termMatch.Norm, - Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), - } - childrenExplanations[2] = s.idfExplanation - scoreExplanation = &search.Explanation{ - Value: score, - Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), - Children: childrenExplanations, + rv := ctx.DocumentMatchPool.Get() + // perform any score computations only when needed + if s.includeScore || s.options.Explain { + var scoreExplanation *search.Explanation + var tf float64 + if termMatch.Freq < MaxSqrtCache { + tf = SqrtCache[int(termMatch.Freq)] + } else { + tf = math.Sqrt(float64(termMatch.Freq)) } - } + score := tf * termMatch.Norm * s.idf - // if the query weight isn't 1, multiply - if s.queryWeight != 1.0 { - score = score * s.queryWeight if s.options.Explain { - childExplanations := make([]*search.Explanation, 2) - childExplanations[0] = s.queryWeightExplanation - childExplanations[1] = scoreExplanation + childrenExplanations := make([]*search.Explanation, 3) + childrenExplanations[0] = &search.Explanation{ + Value: tf, + Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), + } + childrenExplanations[1] = &search.Explanation{ + Value: termMatch.Norm, + Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), + } + childrenExplanations[2] = s.idfExplanation scoreExplanation = &search.Explanation{ Value: score, - Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), - Children: childExplanations, + Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), + Children: childrenExplanations, } } + + // if the query weight isn't 1, multiply + if s.queryWeight != 1.0 { + score = score * s.queryWeight + if s.options.Explain { + childExplanations := make([]*search.Explanation, 2) + childExplanations[0] = s.queryWeightExplanation + childExplanations[1] = scoreExplanation + scoreExplanation = &search.Explanation{ + Value: score, + Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), + Children: childExplanations, + } + } + } + + if s.includeScore { + rv.Score = score + } + + if s.options.Explain { + rv.Expl = scoreExplanation + } } - rv := ctx.DocumentMatchPool.Get() rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) - rv.Score = score - if s.options.Explain { - rv.Expl = scoreExplanation - } if len(termMatch.Vectors) > 0 { if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { diff --git a/vendor/github.com/blevesearch/bleve/search/search.go b/vendor/github.com/blevesearch/bleve/search/search.go index f8a282d165..8ed23de454 100644 --- a/vendor/github.com/blevesearch/bleve/search/search.go +++ b/vendor/github.com/blevesearch/bleve/search/search.go @@ -17,6 +17,7 @@ package search import ( "fmt" "reflect" + "sort" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/size" @@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { return true } +func (ap ArrayPositions) Compare(other ArrayPositions) int { + for i, p := range ap { + if i >= len(other) { + return 1 + } + if p < other[i] { + return -1 + } + if p > other[i] { + return 1 + } + } + if len(ap) < len(other) { + return -1 + } + return 0 +} + type Location struct { // Pos is the position of the term within the field, starting at 1 Pos uint64 `json:"pos"` @@ -68,6 +87,46 @@ func (l *Location) Size() int { type Locations []*Location +func (p Locations) Len() int { return len(p) } +func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func (p Locations) Less(i, j int) bool { + c := p[i].ArrayPositions.Compare(p[j].ArrayPositions) + if c < 0 { + return true + } + if c > 0 { + return false + } + return p[i].Pos < p[j].Pos +} + +func (p Locations) Dedupe() Locations { // destructive! + if len(p) <= 1 { + return p + } + + sort.Sort(p) + + slow := 0 + + for _, pfast := range p { + pslow := p[slow] + if pslow.Pos == pfast.Pos && + pslow.Start == pfast.Start && + pslow.End == pfast.End && + pslow.ArrayPositions.Equals(pfast.ArrayPositions) { + continue // duplicate, so only move fast ahead + } + + slow++ + + p[slow] = pfast + } + + return p[:slow+1] +} + type TermLocationMap map[string]Locations func (t TermLocationMap) AddLocation(term string, location *Location) { @@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { var lastField string var tlm TermLocationMap + var needsDedupe bool for i, ftl := range dm.FieldTermLocations { if lastField != ftl.Field { @@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) } - tlm[ftl.Term] = append(tlm[ftl.Term], loc) + locs := tlm[ftl.Term] + + // if the loc is before or at the last location, then there + // might be duplicates that need to be deduplicated + if !needsDedupe && len(locs) > 0 { + last := locs[len(locs)-1] + cmp := loc.ArrayPositions.Compare(last.ArrayPositions) + if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) { + needsDedupe = true + } + } + + tlm[ftl.Term] = append(locs, loc) dm.FieldTermLocations[i] = FieldTermLocation{ // recycle Location: Location{ @@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { }, } } + + if needsDedupe { + for _, tlm := range dm.Locations { + for term, locs := range tlm { + tlm[term] = locs.Dedupe() + } + } + } } dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle @@ -279,6 +359,7 @@ type SearcherOptions struct { type SearchContext struct { DocumentMatchPool *DocumentMatchPool Collector Collector + IndexReader index.IndexReader } func (sc *SearchContext) Size() int { diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go index bbbced4795..7f0bfa4246 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go @@ -45,6 +45,7 @@ type BooleanSearcher struct { scorer *scorer.ConjunctionQueryScorer matches []*search.DocumentMatch initialized bool + done bool } func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { @@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + if s.done { + return nil, nil + } + if !s.initialized { err := s.initSearchers(ctx) if err != nil { @@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch } } + if rv == nil { + s.done = true + } + return rv, nil } func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + if s.done { + return nil, nil + } + if !s.initialized { err := s.initSearchers(ctx) if err != nil { @@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter } } - // Advance the searchers only if the currentID cursor is trailing the lookup ID, - // additionally if the mustNotSearcher has been initialized, ensure that the - // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by - // currentID) is trailing the lookup ID as well - for in the case where currentID - // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT - // advance the currentID or the currMustNot cursors. - if (s.currentID == nil || s.currentID.Compare(ID) < 0) && - (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) { + // Advance the searcher only if the cursor is trailing the lookup ID + if s.currentID == nil || s.currentID.Compare(ID) < 0 { var err error if s.mustSearcher != nil { if s.currMust != nil { @@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter } if s.mustNotSearcher != nil { - if s.currMustNot != nil { - ctx.DocumentMatchPool.Put(s.currMustNot) - } - s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) - if err != nil { - return nil, err + // Additional check for mustNotSearcher, whose cursor isn't tracked by + // currentID to prevent it from moving when the searcher's tracked + // position is already ahead of or at the requested ID. + if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 { + if s.currMustNot != nil { + ctx.DocumentMatchPool.Put(s.currMustNot) + } + s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) + if err != nil { + return nil, err + } } } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go index 289e416782..38cb6467fb 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go @@ -22,6 +22,11 @@ import ( "github.com/blevesearch/bleve/search" ) +type filterFunc func(key []byte) bool + +var GeoBitsShift1 = (geo.GeoBits << 1) +var GeoBitsShift1Minus1 = GeoBitsShift1 - 1 + func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( @@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, } // do math to produce list of terms needed for this search - onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, - minLon, minLat, maxLon, maxLat, checkBoundaries) + onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1, + minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field) + if err != nil { + return nil, err + } var onBoundarySearcher search.Searcher dvReader, err := indexReader.DocValueReader([]string{field}) @@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 func ComputeGeoRange(term uint64, shift uint, - sminLon, sminLat, smaxLon, smaxLat float64, - checkBoundaries bool) ( - onBoundary [][]byte, notOnBoundary [][]byte) { - split := term | uint64(0x1)<<shift - var upperMax uint64 - if shift < 63 { - upperMax = term | ((uint64(1) << (shift + 1)) - 1) - } else { - upperMax = 0xffffffffffffffff + sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, + indexReader index.IndexReader, field string) ( + onBoundary [][]byte, notOnBoundary [][]byte, err error) { + preallocBytesLen := 32 + preallocBytes := make([]byte, preallocBytesLen) + + makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) { + if len(preallocBytes) <= 0 { + preallocBytesLen = preallocBytesLen * 2 + preallocBytes = make([]byte, preallocBytesLen) + } + + rv, preallocBytes, err = + numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes) + + return rv + } + + var fieldDict index.FieldDictContains + var isIndexed filterFunc + if irr, ok := indexReader.(index.IndexReaderContains); ok { + fieldDict, err = irr.FieldDictContains(field) + if err != nil { + return nil, nil, err + } + + isIndexed = func(term []byte) bool { + found, err := fieldDict.Contains(term) + return err == nil && found + } } - lowerMax := split - 1 - onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift, - sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) - plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift, - sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) - onBoundary = append(onBoundary, plusOnBoundary...) - notOnBoundary = append(notOnBoundary, plusNotOnBoundary...) - return -} -func relateAndRecurse(start, end uint64, res uint, - sminLon, sminLat, smaxLon, smaxLat float64, - checkBoundaries bool) ( - onBoundary [][]byte, notOnBoundary [][]byte) { - minLon := geo.MortonUnhashLon(start) - minLat := geo.MortonUnhashLat(start) - maxLon := geo.MortonUnhashLon(end) - maxLat := geo.MortonUnhashLat(end) - - level := ((geo.GeoBits << 1) - res) >> 1 - - within := res%document.GeoPrecisionStep == 0 && - geo.RectWithin(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat) - if within || (level == geoDetailLevel && - geo.RectIntersects(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat)) { - if !within && checkBoundaries { - return [][]byte{ - numeric.MustNewPrefixCodedInt64(int64(start), res), - }, nil + defer func() { + if fieldDict != nil { + if fd, ok := fieldDict.(index.FieldDict); ok { + cerr := fd.Close() + if cerr != nil { + err = cerr + } + } } - return nil, - [][]byte{ - numeric.MustNewPrefixCodedInt64(int64(start), res), + }() + + if isIndexed == nil { + isIndexed = func(term []byte) bool { + if indexReader != nil { + reader, err := indexReader.TermFieldReader(term, field, false, false, false) + if err != nil || reader == nil { + return false + } + if reader.Count() == 0 { + _ = reader.Close() + return false + } + _ = reader.Close() } - } else if level < geoDetailLevel && - geo.RectIntersects(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat) { - return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, - checkBoundaries) + return true + } } - return nil, nil + + var computeGeoRange func(term uint64, shift uint) // declare for recursion + + relateAndRecurse := func(start, end uint64, res, level uint) { + minLon := geo.MortonUnhashLon(start) + minLat := geo.MortonUnhashLat(start) + maxLon := geo.MortonUnhashLon(end) + maxLat := geo.MortonUnhashLat(end) + + within := res%document.GeoPrecisionStep == 0 && + geo.RectWithin(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat) + if within || (level == geoDetailLevel && + geo.RectIntersects(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat)) { + codedTerm := makePrefixCoded(int64(start), res) + if isIndexed(codedTerm) { + if !within && checkBoundaries { + onBoundary = append(onBoundary, codedTerm) + } else { + notOnBoundary = append(notOnBoundary, codedTerm) + } + } + } else if level < geoDetailLevel && + geo.RectIntersects(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat) { + computeGeoRange(start, res-1) + } + } + + computeGeoRange = func(term uint64, shift uint) { + if err != nil { + return + } + + split := term | uint64(0x1)<<shift + var upperMax uint64 + if shift < 63 { + upperMax = term | ((uint64(1) << (shift + 1)) - 1) + } else { + upperMax = 0xffffffffffffffff + } + + lowerMax := split - 1 + + level := (GeoBitsShift1 - shift) >> 1 + + relateAndRecurse(term, lowerMax, shift, level) + relateAndRecurse(split, upperMax, shift, level) + } + + computeGeoRange(term, shift) + + if err != nil { + return nil, nil, err + } + + return onBoundary, notOnBoundary, err } func buildRectFilter(dvReader index.DocValueReader, field string, diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go index a15c194e86..b01ae6a0af 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go @@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, // build a searcher for the box boxSearcher, err := boxSearcher(indexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, - field, boost, options) + field, boost, options, false) if err != nil { return nil, err } @@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, // two boxes joined through a disjunction searcher func boxSearcher(indexReader index.IndexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, - field string, boost float64, options search.SearcherOptions) ( + field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( search.Searcher, error) { if bottomRightLon < topLeftLon { // cross date line, rewrite as two parts leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, -180, bottomRightLat, bottomRightLon, topLeftLat, - field, boost, options, false) + field, boost, options, checkBoundaries) if err != nil { return nil, err } rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, - topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) + topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, + checkBoundaries) if err != nil { _ = leftSearcher.Close() return nil, err @@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader, // build geoboundinggox searcher for that bounding box boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, - options, false) + options, checkBoundaries) if err != nil { return nil, err } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go new file mode 100644 index 0000000000..3bb47519d0 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go @@ -0,0 +1,110 @@ +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" + "math" +) + +func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, + polygon []geo.Point, field string, boost float64, + options search.SearcherOptions) (search.Searcher, error) { + + // compute the bounding box enclosing the polygon + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err := + geo.BoundingRectangleForPolygon(polygon) + if err != nil { + return nil, err + } + + // build a searcher for the bounding box on the polygon + boxSearcher, err := boxSearcher(indexReader, + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, + field, boost, options, true) + if err != nil { + return nil, err + } + + dvReader, err := indexReader.DocValueReader([]string{field}) + if err != nil { + return nil, err + } + + // wrap it in a filtering searcher that checks for the polygon inclusivity + return NewFilteringSearcher(boxSearcher, + buildPolygonFilter(dvReader, field, polygon)), nil +} + +const float64EqualityThreshold = 1e-6 + +func almostEqual(a, b float64) bool { + return math.Abs(a-b) <= float64EqualityThreshold +} + +// buildPolygonFilter returns true if the point lies inside the +// polygon. It is based on the ray-casting technique as referred +// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html +func buildPolygonFilter(dvReader index.DocValueReader, field string, + polygon []geo.Point) FilterFunc { + return func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + + err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + + // Note: this approach works for points which are strictly inside + // the polygon. ie it might fail for certain points on the polygon boundaries. + if err == nil && found { + nVertices := len(polygon) + var inside bool + // check for a direct vertex match + if almostEqual(polygon[0].Lat, lat) && + almostEqual(polygon[0].Lon, lon) { + return true + } + + for i := 1; i < nVertices; i++ { + if almostEqual(polygon[i].Lat, lat) && + almostEqual(polygon[i].Lon, lon) { + return true + } + if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) && + lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/ + (polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon { + inside = !inside + } + } + return inside + + } + return false + } +} diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go index e52ef9a825..83107f0201 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go @@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, if !*inclusiveMax && maxInt64 != math.MinInt64 { maxInt64-- } + + var fieldDict index.FieldDictContains + var isIndexed filterFunc + var err error + if irr, ok := indexReader.(index.IndexReaderContains); ok { + fieldDict, err = irr.FieldDictContains(field) + if err != nil { + return nil, err + } + + isIndexed = func(term []byte) bool { + found, err := fieldDict.Contains(term) + return err == nil && found + } + } + // FIXME hard-coded precision, should match field declaration termRanges := splitInt64Range(minInt64, maxInt64, 4) - terms := termRanges.Enumerate() + terms := termRanges.Enumerate(isIndexed) + if fieldDict != nil { + if fd, ok := fieldDict.(index.FieldDict); ok { + cerr := fd.Close() + if cerr != nil { + err = cerr + } + } + } + if len(terms) < 1 { // cannot return MatchNoneSearcher because of interaction with // commit f391b991c20f02681bacd197afc6d8aed444e132 return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, true) } - var err error - terms, err = filterCandidateTerms(indexReader, terms, field) - if err != nil { - return nil, err + + // for upside_down + if isIndexed == nil { + terms, err = filterCandidateTerms(indexReader, terms, field) + if err != nil { + return nil, err + } } + if tooManyClauses(len(terms)) { return nil, tooManyClausesErr(len(terms)) } @@ -125,11 +154,17 @@ type termRange struct { endTerm []byte } -func (t *termRange) Enumerate() [][]byte { +func (t *termRange) Enumerate(filter filterFunc) [][]byte { var rv [][]byte next := t.startTerm for bytes.Compare(next, t.endTerm) <= 0 { - rv = append(rv, next) + if filter != nil { + if filter(next) { + rv = append(rv, next) + } + } else { + rv = append(rv, next) + } next = incrementBytes(next) } return rv @@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte { type termRanges []*termRange -func (tr termRanges) Enumerate() [][]byte { +func (tr termRanges) Enumerate(filter filterFunc) [][]byte { var rv [][]byte for _, tri := range tr { - trie := tri.Enumerate() + trie := tri.Enumerate(filter) rv = append(rv, trie...) } return rv diff --git a/vendor/github.com/blevesearch/bleve/search/sort.go b/vendor/github.com/blevesearch/bleve/search/sort.go index e17f707879..6e4ed80fa2 100644 --- a/vendor/github.com/blevesearch/bleve/search/sort.go +++ b/vendor/github.com/blevesearch/bleve/search/sort.go @@ -38,6 +38,8 @@ type SearchSort interface { RequiresScoring() bool RequiresFields() []string + Reverse() + Copy() SearchSort } @@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool { return rv } +func (so SortOrder) Reverse() { + for _, soi := range so { + soi.Reverse() + } +} + // SortFieldType lets you control some internal sort behavior // normally leaving this to the zero-value of SortFieldAuto is fine type SortFieldType int @@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort { return &rv } +func (s *SortField) Reverse() { + s.Desc = !s.Desc + if s.Missing == SortFieldMissingFirst { + s.Missing = SortFieldMissingLast + } else { + s.Missing = SortFieldMissingFirst + } +} + // SortDocID will sort results by the document identifier type SortDocID struct { Desc bool @@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort { return &rv } +func (s *SortDocID) Reverse() { + s.Desc = !s.Desc +} + // SortScore will sort results by the document match score type SortScore struct { Desc bool @@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort { return &rv } +func (s *SortScore) Reverse() { + s.Desc = !s.Desc +} + var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) // NewSortGeoDistance creates SearchSort instance for sorting documents by @@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort { return &rv } +func (s *SortGeoDistance) Reverse() { + s.Desc = !s.Desc +} + type BytesSlice [][]byte func (p BytesSlice) Len() int { return len(p) } diff --git a/vendor/github.com/blevesearch/segment/.travis.yml b/vendor/github.com/blevesearch/segment/.travis.yml index d032f234ef..b9d58e7c18 100644 --- a/vendor/github.com/blevesearch/segment/.travis.yml +++ b/vendor/github.com/blevesearch/segment/.travis.yml @@ -1,10 +1,9 @@ language: go go: - - 1.4 + - 1.7 script: - - go get golang.org/x/tools/cmd/vet - go get golang.org/x/tools/cmd/cover - go get github.com/mattn/goveralls - go test -v -covermode=count -coverprofile=profile.out diff --git a/vendor/github.com/couchbase/vellum/fst_iterator.go b/vendor/github.com/couchbase/vellum/fst_iterator.go index eb731395b2..d04ad63fb1 100644 --- a/vendor/github.com/couchbase/vellum/fst_iterator.go +++ b/vendor/github.com/couchbase/vellum/fst_iterator.go @@ -18,7 +18,7 @@ import ( "bytes" ) -// Iterator represents a means of visity key/value pairs in order. +// Iterator represents a means of visiting key/value pairs in order. type Iterator interface { // Current() returns the key/value pair currently pointed to. @@ -186,20 +186,29 @@ func (i *FSTIterator) Next() error { } func (i *FSTIterator) next(lastOffset int) error { - // remember where we started + // remember where we started with keysStack in this next() call i.nextStart = append(i.nextStart[:0], i.keysStack...) nextOffset := lastOffset + 1 + allowCompare := false OUTER: for true { curr := i.statesStack[len(i.statesStack)-1] autCurr := i.autStatesStack[len(i.autStatesStack)-1] - if curr.Final() && i.aut.IsMatch(autCurr) && - bytes.Compare(i.keysStack, i.nextStart) > 0 { - // in final state greater than start key - return nil + if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare { + // check to see if new keystack might have gone too far + if i.endKeyExclusive != nil && + bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { + return ErrIteratorDone + } + + cmp := bytes.Compare(i.keysStack, i.nextStart) + if cmp > 0 { + // in final state greater than start key + return nil + } } numTrans := curr.NumTransitions() @@ -207,8 +216,12 @@ OUTER: INNER: for nextOffset < numTrans { t := curr.TransitionAt(nextOffset) + autNext := i.aut.Accept(autCurr, t) if !i.aut.CanMatch(autNext) { + // TODO: potential optimization to skip nextOffset + // forwards more directly to something that the + // automaton likes rather than a linear scan? nextOffset += 1 continue INNER } @@ -234,30 +247,41 @@ OUTER: i.valsStack = append(i.valsStack, v) i.autStatesStack = append(i.autStatesStack, autNext) - // check to see if new keystack might have gone too far - if i.endKeyExclusive != nil && - bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { - return ErrIteratorDone - } - nextOffset = 0 + allowCompare = true + continue OUTER } + // no more transitions, so need to backtrack and stack pop if len(i.statesStack) <= 1 { // stack len is 1 (root), can't go back further, we're done break } - // no transitions, and still room to pop - i.statesStack = i.statesStack[:len(i.statesStack)-1] - i.keysStack = i.keysStack[:len(i.keysStack)-1] + // if the top of the stack represents a linear chain of states + // (i.e., a suffix of nodes linked by single transitions), + // then optimize by popping the suffix in one shot without + // going back all the way to the OUTER loop + var popNum int + for j := len(i.statesStack) - 1; j > 0; j-- { + if i.statesStack[j].NumTransitions() != 1 { + popNum = len(i.statesStack) - 1 - j + break + } + } + if popNum < 1 { // always pop at least 1 entry from the stacks + popNum = 1 + } - nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1 + nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1 + allowCompare = false - i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1] - i.valsStack = i.valsStack[:len(i.valsStack)-1] - i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] + i.statesStack = i.statesStack[:len(i.statesStack)-popNum] + i.keysStack = i.keysStack[:len(i.keysStack)-popNum] + i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum] + i.valsStack = i.valsStack[:len(i.valsStack)-popNum] + i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum] } return ErrIteratorDone diff --git a/vendor/github.com/couchbase/vellum/go.mod b/vendor/github.com/couchbase/vellum/go.mod new file mode 100644 index 0000000000..0e304159d4 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/go.mod @@ -0,0 +1,10 @@ +module github.com/couchbase/vellum + +go 1.12 + +require ( + github.com/edsrzf/mmap-go v1.0.0 + github.com/spf13/cobra v0.0.5 + github.com/willf/bitset v1.1.10 + golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect +) diff --git a/vendor/github.com/couchbase/vellum/go.sum b/vendor/github.com/couchbase/vellum/go.sum new file mode 100644 index 0000000000..f14998530d --- /dev/null +++ b/vendor/github.com/couchbase/vellum/go.sum @@ -0,0 +1,39 @@ +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= +github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= +github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE b/vendor/github.com/couchbase/vellum/levenshtein/LICENSE index 6b0b1270ff..6b0b1270ff 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE +++ b/vendor/github.com/couchbase/vellum/levenshtein/LICENSE diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/README.md b/vendor/github.com/couchbase/vellum/levenshtein/README.md index 582b69c77e..582b69c77e 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/README.md +++ b/vendor/github.com/couchbase/vellum/levenshtein/README.md diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go b/vendor/github.com/couchbase/vellum/levenshtein/alphabet.go index 4bf64fef2e..ec285129ca 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go +++ b/vendor/github.com/couchbase/vellum/levenshtein/alphabet.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package levenshtein2 +package levenshtein import ( "fmt" diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go b/vendor/github.com/couchbase/vellum/levenshtein/dfa.go index e82a780a52..d0e43cac24 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go +++ b/vendor/github.com/couchbase/vellum/levenshtein/dfa.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package levenshtein2 +package levenshtein import ( "fmt" diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go b/vendor/github.com/couchbase/vellum/levenshtein/levenshtein.go index 1ca0aaa65b..aa652df844 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go +++ b/vendor/github.com/couchbase/vellum/levenshtein/levenshtein.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package levenshtein2 +package levenshtein import "fmt" diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go b/vendor/github.com/couchbase/vellum/levenshtein/levenshtein_nfa.go index bed9b99d56..68db5d191c 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go +++ b/vendor/github.com/couchbase/vellum/levenshtein/levenshtein_nfa.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package levenshtein2 +package levenshtein import ( "math" diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go b/vendor/github.com/couchbase/vellum/levenshtein/parametric_dfa.go index ebd9311959..d08e5da639 100644 --- a/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go +++ b/vendor/github.com/couchbase/vellum/levenshtein/parametric_dfa.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package levenshtein2 +package levenshtein import ( "crypto/md5" diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go index 55280164c7..92284d0a87 100644 --- a/vendor/github.com/couchbase/vellum/regexp/compile.go +++ b/vendor/github.com/couchbase/vellum/regexp/compile.go @@ -75,15 +75,23 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) { Rune0: [2]rune{r, r}, } next.Rune = next.Rune0[0:2] - return c.c(&next) - } - c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( - r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) - if err != nil { - return err - } - for _, seq := range c.sequences { - c.compileUtf8Ranges(seq) + // try to find more folded runes + for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) { + next.Rune = append(next.Rune, r1, r1) + } + err = c.c(&next) + if err != nil { + return err + } + } else { + c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( + r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) + if err != nil { + return err + } + for _, seq := range c.sequences { + c.compileUtf8Ranges(seq) + } } } case syntax.OpAnyChar: diff --git a/vendor/github.com/etcd-io/bbolt/bolt_riscv64.go b/vendor/github.com/etcd-io/bbolt/bolt_riscv64.go new file mode 100644 index 0000000000..07b4b47cdb --- /dev/null +++ b/vendor/github.com/etcd-io/bbolt/bolt_riscv64.go @@ -0,0 +1,12 @@ +// +build riscv64 + +package bbolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF + +// Are unaligned load/stores broken on this arch? +var brokenUnaligned = true diff --git a/vendor/github.com/etcd-io/bbolt/db.go b/vendor/github.com/etcd-io/bbolt/db.go index 962248c99f..870c8b1cc9 100644 --- a/vendor/github.com/etcd-io/bbolt/db.go +++ b/vendor/github.com/etcd-io/bbolt/db.go @@ -121,6 +121,7 @@ type DB struct { AllocSize int path string + openFile func(string, int, os.FileMode) (*os.File, error) file *os.File dataref []byte // mmap'ed readonly, write throws SEGV data *[maxMapSize]byte @@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.readOnly = true } + db.openFile = options.OpenFile + if db.openFile == nil { + db.openFile = os.OpenFile + } + // Open data file and separate sync handler for metadata writes. db.path = path var err error - if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { + if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil { _ = db.close() return nil, err } @@ -1054,6 +1060,10 @@ type Options struct { // set directly on the DB itself when returned from Open(), but this option // is useful in APIs which expose Options but not the underlying DB. NoSync bool + + // OpenFile is used to open files. It defaults to os.OpenFile. This option + // is useful for writing hermetic tests. + OpenFile func(string, int, os.FileMode) (*os.File, error) } // DefaultOptions represent the options used if nil options are passed into Open(). diff --git a/vendor/github.com/etcd-io/bbolt/freelist.go b/vendor/github.com/etcd-io/bbolt/freelist.go index 93fd85d504..587b8cc02d 100644 --- a/vendor/github.com/etcd-io/bbolt/freelist.go +++ b/vendor/github.com/etcd-io/bbolt/freelist.go @@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) { f.readIDs(a) } +// noSyncReload reads the freelist from pgids and filters out pending items. +func (f *freelist) noSyncReload(pgids []pgid) { + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, txp := range f.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range pgids { + if !pcache[id] { + a = append(a, id) + } + } + + f.readIDs(a) +} + // reindex rebuilds the free cache based on available and pending free lists. func (f *freelist) reindex() { ids := f.getFreePageIDs() diff --git a/vendor/github.com/etcd-io/bbolt/tx.go b/vendor/github.com/etcd-io/bbolt/tx.go index f508641427..2df7688c2f 100644 --- a/vendor/github.com/etcd-io/bbolt/tx.go +++ b/vendor/github.com/etcd-io/bbolt/tx.go @@ -254,17 +254,36 @@ func (tx *Tx) Rollback() error { if tx.db == nil { return ErrTxClosed } - tx.rollback() + tx.nonPhysicalRollback() return nil } +// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk. +func (tx *Tx) nonPhysicalRollback() { + if tx.db == nil { + return + } + if tx.writable { + tx.db.freelist.rollback(tx.meta.txid) + } + tx.close() +} + +// rollback needs to reload the free pages from disk in case some system error happens like fsync error. func (tx *Tx) rollback() { if tx.db == nil { return } if tx.writable { tx.db.freelist.rollback(tx.meta.txid) - tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + if !tx.db.hasSyncedFreelist() { + // Reconstruct free page list by scanning the DB to get the whole free page list. + // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. + tx.db.freelist.noSyncReload(tx.db.freepages()) + } else { + // Read free page list from freelist page. + tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + } } tx.close() } @@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error { // If err == nil then exactly tx.Size() bytes will be written into the writer. func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { // Attempt to open reader with WriteFlag - f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) + f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) if err != nil { return 0, err } @@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { // A reader transaction is maintained during the copy so it is safe to continue // using the database while a copy is in progress. func (tx *Tx) CopyFile(path string, mode os.FileMode) error { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) + f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) if err != nil { return err } diff --git a/vendor/github.com/glycerine/go-unsnap-stream/LICENSE b/vendor/github.com/glycerine/go-unsnap-stream/LICENSE index 31671ea603..a441b993be 100644 --- a/vendor/github.com/glycerine/go-unsnap-stream/LICENSE +++ b/vendor/github.com/glycerine/go-unsnap-stream/LICENSE @@ -1,3 +1,5 @@ +The MIT license. + Copyright (c) 2014 the go-unsnap-stream authors. Permission is hereby granted, free of charge, to any person obtaining a copy of @@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR @@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -Permission is explicitly granted to relicense this material under new terms of -your choice when integrating this library with another library or project. diff --git a/vendor/github.com/glycerine/go-unsnap-stream/README.md b/vendor/github.com/glycerine/go-unsnap-stream/README.md index b1b8c74751..932291f74d 100644 --- a/vendor/github.com/glycerine/go-unsnap-stream/README.md +++ b/vendor/github.com/glycerine/go-unsnap-stream/README.md @@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4]. -For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. +Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo. + +For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead. @@ -17,4 +19,4 @@ However, while the c-snappy was useful for checking compatibility, it introduced [3] https://github.com/kubo/snzip -[4] https://pypi.python.org/pypi/python-snappy
\ No newline at end of file +[4] https://pypi.python.org/pypi/python-snappy diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go index 8789445c9a..0d33949e96 100644 --- a/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go +++ b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "os" + "strings" "hash/crc32" @@ -189,7 +190,12 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS err = nil } } else { - panic(err) + // may be an odd already closed... don't panic on that + if strings.Contains(err.Error(), "file already closed") { + err = nil + } else { + panic(err) + } } } diff --git a/vendor/github.com/tinylib/msgp/msgp/errors.go b/vendor/github.com/tinylib/msgp/msgp/errors.go index 8f197267e4..cc78a980c6 100644 --- a/vendor/github.com/tinylib/msgp/msgp/errors.go +++ b/vendor/github.com/tinylib/msgp/msgp/errors.go @@ -5,6 +5,8 @@ import ( "reflect" ) +const resumableDefault = false + var ( // ErrShortBytes is returned when the // slice being decoded is too short to @@ -26,99 +28,240 @@ type Error interface { // Resumable returns whether // or not the error means that // the stream of data is malformed - // and the information is unrecoverable. + // and the information is unrecoverable. Resumable() bool } +// contextError allows msgp Error instances to be enhanced with additional +// context about their origin. +type contextError interface { + Error + + // withContext must not modify the error instance - it must clone and + // return a new error with the context added. + withContext(ctx string) error +} + +// Cause returns the underlying cause of an error that has been wrapped +// with additional context. +func Cause(e error) error { + out := e + if e, ok := e.(errWrapped); ok && e.cause != nil { + out = e.cause + } + return out +} + +// Resumable returns whether or not the error means that the stream of data is +// malformed and the information is unrecoverable. +func Resumable(e error) bool { + if e, ok := e.(Error); ok { + return e.Resumable() + } + return resumableDefault +} + +// WrapError wraps an error with additional context that allows the part of the +// serialized type that caused the problem to be identified. Underlying errors +// can be retrieved using Cause() +// +// The input error is not modified - a new error should be returned. +// +// ErrShortBytes is not wrapped with any context due to backward compatibility +// issues with the public API. +// +func WrapError(err error, ctx ...interface{}) error { + switch e := err.(type) { + case errShort: + return e + case contextError: + return e.withContext(ctxString(ctx)) + default: + return errWrapped{cause: err, ctx: ctxString(ctx)} + } +} + +// ctxString converts the incoming interface{} slice into a single string. +func ctxString(ctx []interface{}) string { + out := "" + for idx, cv := range ctx { + if idx > 0 { + out += "/" + } + out += fmt.Sprintf("%v", cv) + } + return out +} + +func addCtx(ctx, add string) string { + if ctx != "" { + return add + "/" + ctx + } else { + return add + } +} + +// errWrapped allows arbitrary errors passed to WrapError to be enhanced with +// context and unwrapped with Cause() +type errWrapped struct { + cause error + ctx string +} + +func (e errWrapped) Error() string { + if e.ctx != "" { + return fmt.Sprintf("%s at %s", e.cause, e.ctx) + } else { + return e.cause.Error() + } +} + +func (e errWrapped) Resumable() bool { + if e, ok := e.cause.(Error); ok { + return e.Resumable() + } + return resumableDefault +} + type errShort struct{} func (e errShort) Error() string { return "msgp: too few bytes left to read object" } func (e errShort) Resumable() bool { return false } -type errFatal struct{} +type errFatal struct { + ctx string +} + +func (f errFatal) Error() string { + out := "msgp: fatal decoding error (unreachable code)" + if f.ctx != "" { + out += " at " + f.ctx + } + return out +} -func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" } func (f errFatal) Resumable() bool { return false } +func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f } + // ArrayError is an error returned // when decoding a fix-sized array // of the wrong size type ArrayError struct { Wanted uint32 Got uint32 + ctx string } // Error implements the error interface func (a ArrayError) Error() string { - return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) + out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) + if a.ctx != "" { + out += " at " + a.ctx + } + return out } // Resumable is always 'true' for ArrayErrors func (a ArrayError) Resumable() bool { return true } +func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a } + // IntOverflow is returned when a call // would downcast an integer to a type // with too few bits to hold its value. type IntOverflow struct { Value int64 // the value of the integer FailedBitsize int // the bit size that the int64 could not fit into + ctx string } // Error implements the error interface func (i IntOverflow) Error() string { - return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) + str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) + if i.ctx != "" { + str += " at " + i.ctx + } + return str } // Resumable is always 'true' for overflows func (i IntOverflow) Resumable() bool { return true } +func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i } + // UintOverflow is returned when a call // would downcast an unsigned integer to a type // with too few bits to hold its value type UintOverflow struct { Value uint64 // value of the uint FailedBitsize int // the bit size that couldn't fit the value + ctx string } // Error implements the error interface func (u UintOverflow) Error() string { - return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) + str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) + if u.ctx != "" { + str += " at " + u.ctx + } + return str } // Resumable is always 'true' for overflows func (u UintOverflow) Resumable() bool { return true } +func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u } + // UintBelowZero is returned when a call // would cast a signed integer below zero // to an unsigned integer. type UintBelowZero struct { Value int64 // value of the incoming int + ctx string } // Error implements the error interface func (u UintBelowZero) Error() string { - return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) + str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) + if u.ctx != "" { + str += " at " + u.ctx + } + return str } // Resumable is always 'true' for overflows func (u UintBelowZero) Resumable() bool { return true } +func (u UintBelowZero) withContext(ctx string) error { + u.ctx = ctx + return u +} + // A TypeError is returned when a particular // decoding method is unsuitable for decoding // a particular MessagePack value. type TypeError struct { Method Type // Type expected by method Encoded Type // Type actually encoded + + ctx string } // Error implements the error interface func (t TypeError) Error() string { - return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) + out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) + if t.ctx != "" { + out += " at " + t.ctx + } + return out } // Resumable returns 'true' for TypeErrors func (t TypeError) Resumable() bool { return true } +func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t } + // returns either InvalidPrefixError or // TypeError depending on whether or not // the prefix is recognized @@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false } // to a function that takes `interface{}`. type ErrUnsupportedType struct { T reflect.Type + + ctx string } // Error implements error -func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) } +func (e *ErrUnsupportedType) Error() string { + out := fmt.Sprintf("msgp: type %q not supported", e.T) + if e.ctx != "" { + out += " at " + e.ctx + } + return out +} // Resumable returns 'true' for ErrUnsupportedType func (e *ErrUnsupportedType) Resumable() bool { return true } + +func (e *ErrUnsupportedType) withContext(ctx string) error { + o := *e + o.ctx = addCtx(o.ctx, ctx) + return &o +} diff --git a/vendor/github.com/tinylib/msgp/msgp/write.go b/vendor/github.com/tinylib/msgp/msgp/write.go index da9099c2e9..fb1947c574 100644 --- a/vendor/github.com/tinylib/msgp/msgp/write.go +++ b/vendor/github.com/tinylib/msgp/msgp/write.go @@ -685,7 +685,7 @@ func (mw *Writer) WriteIntf(v interface{}) error { case reflect.Map: return mw.writeMap(val) } - return &ErrUnsupportedType{val.Type()} + return &ErrUnsupportedType{T: val.Type()} } func (mw *Writer) writeMap(v reflect.Value) (err error) { diff --git a/vendor/github.com/willf/bitset/.travis.yml b/vendor/github.com/willf/bitset/.travis.yml index 9413dc497a..094aa5ce07 100644 --- a/vendor/github.com/willf/bitset/.travis.yml +++ b/vendor/github.com/willf/bitset/.travis.yml @@ -12,8 +12,7 @@ branches: - travis go: - - 1.8 - - 1.9 + - "1.11.x" - tip matrix: @@ -35,4 +34,4 @@ after_failure: - cat ./target/test/report.xml after_success: - - if [ "$TRAVIS_GO_VERSION" = "1.8" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; + - if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; diff --git a/vendor/github.com/willf/bitset/Makefile b/vendor/github.com/willf/bitset/Makefile index e3fd5479d2..ad71f6a4aa 100644 --- a/vendor/github.com/willf/bitset/Makefile +++ b/vendor/github.com/willf/bitset/Makefile @@ -158,7 +158,7 @@ gosimple: # AST scanner astscan: @mkdir -p target/report - GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 + GOPATH=$(GOPATH) gosec . | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 || true # Generate source docs docs: @@ -168,14 +168,14 @@ docs: @echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html # Alias to run all quality-assurance checks -qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple +qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan # --- INSTALL --- # Get the dependencies deps: GOPATH=$(GOPATH) go get ./... - GOPATH=$(GOPATH) go get github.com/golang/lint/golint + GOPATH=$(GOPATH) go get golang.org/x/lint/golint GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo @@ -185,7 +185,7 @@ deps: GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck GOPATH=$(GOPATH) go get github.com/kisielk/errcheck GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple - GOPATH=$(GOPATH) go get github.com/GoASTScanner/gas + GOPATH=$(GOPATH) go get github.com/securego/gosec/cmd/gosec/... # Remove any build artifact clean: diff --git a/vendor/github.com/willf/bitset/VERSION b/vendor/github.com/willf/bitset/VERSION deleted file mode 100644 index 781dcb07cd..0000000000 --- a/vendor/github.com/willf/bitset/VERSION +++ /dev/null @@ -1 +0,0 @@ -1.1.3 diff --git a/vendor/github.com/willf/bitset/bitset.go b/vendor/github.com/willf/bitset/bitset.go index 65ef6851d1..32044f5c83 100644 --- a/vendor/github.com/willf/bitset/bitset.go +++ b/vendor/github.com/willf/bitset/bitset.go @@ -58,6 +58,18 @@ const log2WordSize = uint(6) // allBits has every bit set const allBits uint64 = 0xffffffffffffffff +// default binary BigEndian +var binaryOrder binary.ByteOrder = binary.BigEndian + +// default json encoding base64.URLEncoding +var base64Encoding = base64.URLEncoding + +// Base64StdEncoding Marshal/Unmarshal BitSet with base64.StdEncoding(Default: base64.URLEncoding) +func Base64StdEncoding() { base64Encoding = base64.StdEncoding } + +// LittleEndian Marshal/Unmarshal Binary as Little Endian(Default: binary.BigEndian) +func LittleEndian() { binaryOrder = binary.LittleEndian } + // A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0. type BitSet struct { length uint @@ -180,6 +192,70 @@ func (b *BitSet) Flip(i uint) *BitSet { return b } +// Shrink shrinks BitSet to desired length in bits. It clears all bits > length +// and reduces the size and length of the set. +// +// A new slice is allocated to store the new bits, so you may see an increase in +// memory usage until the GC runs. Normally this should not be a problem, but if you +// have an extremely large BitSet its important to understand that the old BitSet will +// remain in memory until the GC frees it. +func (b *BitSet) Shrink(length uint) *BitSet { + idx := wordsNeeded(length + 1) + if idx > len(b.set) { + return b + } + shrunk := make([]uint64, idx) + copy(shrunk, b.set[:idx]) + b.set = shrunk + b.length = length + 1 + b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)) - 1)) + return b +} + +// InsertAt takes an index which indicates where a bit should be +// inserted. Then it shifts all the bits in the set to the left by 1, starting +// from the given index position, and sets the index position to 0. +// +// Depending on the size of your BitSet, and where you are inserting the new entry, +// this method could be extremely slow and in some cases might cause the entire BitSet +// to be recopied. +func (b *BitSet) InsertAt(idx uint) *BitSet { + insertAtElement := (idx >> log2WordSize) + + // if length of set is a multiple of wordSize we need to allocate more space first + if b.isLenExactMultiple() { + b.set = append(b.set, uint64(0)) + } + + var i uint + for i = uint(len(b.set) - 1); i > insertAtElement; i-- { + // all elements above the position where we want to insert can simply by shifted + b.set[i] <<= 1 + + // we take the most significant bit of the previous element and set it as + // the least significant bit of the current element + b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63 + } + + // generate a mask to extract the data that we need to shift left + // within the element where we insert a bit + dataMask := ^(uint64(1)<<uint64(idx&(wordSize-1)) - 1) + + // extract that data that we'll shift + data := b.set[i] & dataMask + + // set the positions of the data mask to 0 in the element where we insert + b.set[i] &= ^dataMask + + // shift data mask to the left and insert its data to the slice element + b.set[i] |= data << 1 + + // add 1 to length of BitSet + b.length++ + + return b +} + // String creates a string representation of the Bitmap func (b *BitSet) String() string { // follows code from https://github.com/RoaringBitmap/roaring @@ -205,6 +281,43 @@ func (b *BitSet) String() string { return buffer.String() } +// DeleteAt deletes the bit at the given index position from +// within the bitset +// All the bits residing on the left of the deleted bit get +// shifted right by 1 +// The running time of this operation may potentially be +// relatively slow, O(length) +func (b *BitSet) DeleteAt(i uint) *BitSet { + // the index of the slice element where we'll delete a bit + deleteAtElement := i >> log2WordSize + + // generate a mask for the data that needs to be shifted right + // within that slice element that gets modified + dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1) + + // extract the data that we'll shift right from the slice element + data := b.set[deleteAtElement] & dataMask + + // set the masked area to 0 while leaving the rest as it is + b.set[deleteAtElement] &= ^dataMask + + // shift the previously extracted data to the right and then + // set it in the previously masked area + b.set[deleteAtElement] |= (data >> 1) & dataMask + + // loop over all the consecutive slice elements to copy each + // lowest bit into the highest position of the previous element, + // then shift the entire content to the right by 1 + for i := int(deleteAtElement) + 1; i < len(b.set); i++ { + b.set[i-1] |= (b.set[i] & 1) << 63 + b.set[i] >>= 1 + } + + b.length = b.length - 1 + + return b +} + // NextSet returns the next bit set from the specified index, // including possibly the current index // along with an error code (true = valid, false = no set bit found) @@ -234,7 +347,7 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // including possibly the current index and up to cap(buffer). // If the returned slice has len zero, then no more set bits were found // -// buffer := make([]uint, 256) +// buffer := make([]uint, 256) // this should be reused // j := uint(0) // j, buffer = bitmap.NextSetMany(j, buffer) // for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { @@ -245,39 +358,44 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // } // func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { - myanswer := buffer[:0] - + myanswer := buffer + capacity := cap(buffer) x := int(i >> log2WordSize) - if x >= len(b.set) { - return 0, myanswer + if x >= len(b.set) || capacity == 0 { + return 0, myanswer[:0] + } + skip := i & (wordSize - 1) + word := b.set[x] >> skip + myanswer = myanswer[:capacity] + size := int(0) + for word != 0 { + r := trailingZeroes64(word) + t := word & ((^word) + 1) + myanswer[size] = r + i + size++ + if size == capacity { + goto End + } + word = word ^ t } - w := b.set[x] - w = w >> (i & (wordSize - 1)) - base := uint(x << 6) - capacity := cap(buffer) - for len(myanswer) < capacity { - for w != 0 { - t := w & ((^w) + 1) - r := trailingZeroes64(w) - myanswer = append(myanswer, r+base) - if len(myanswer) == capacity { + x++ + for idx, word := range b.set[x:] { + for word != 0 { + r := trailingZeroes64(word) + t := word & ((^word) + 1) + myanswer[size] = r + (uint(x+idx) << 6) + size++ + if size == capacity { goto End } - w = w ^ t - } - x += 1 - if x == len(b.set) { - break + word = word ^ t } - base += 64 - w = b.set[x] } End: - if len(myanswer) > 0 { - return myanswer[len(myanswer)-1], myanswer - } else { - return 0, myanswer + if size > 0 { + return myanswer[size-1], myanswer[:size] } + return 0, myanswer[:0] } // NextClear returns the next clear bit from the specified index, @@ -654,7 +772,7 @@ func (b *BitSet) DumpAsBits() string { for ; i >= 0; i-- { fmt.Fprintf(buffer, "%064b.", b.set[i]) } - return string(buffer.Bytes()) + return buffer.String() } // BinaryStorageSize returns the binary storage requirements @@ -667,13 +785,13 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { length := uint64(b.length) // Write length - err := binary.Write(stream, binary.BigEndian, length) + err := binary.Write(stream, binaryOrder, length) if err != nil { return 0, err } // Write set - err = binary.Write(stream, binary.BigEndian, b.set) + err = binary.Write(stream, binaryOrder, b.set) return int64(b.BinaryStorageSize()), err } @@ -682,7 +800,7 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { var length uint64 // Read length first - err := binary.Read(stream, binary.BigEndian, &length) + err := binary.Read(stream, binaryOrder, &length) if err != nil { return 0, err } @@ -693,7 +811,7 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { } // Read remaining bytes as set - err = binary.Read(stream, binary.BigEndian, newset.set) + err = binary.Read(stream, binaryOrder, newset.set) if err != nil { return 0, err } @@ -736,7 +854,7 @@ func (b *BitSet) MarshalJSON() ([]byte, error) { } // URLEncode all bytes - return json.Marshal(base64.URLEncoding.EncodeToString(buffer.Bytes())) + return json.Marshal(base64Encoding.EncodeToString(buffer.Bytes())) } // UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON @@ -749,7 +867,7 @@ func (b *BitSet) UnmarshalJSON(data []byte) error { } // URLDecode string - buf, err := base64.URLEncoding.DecodeString(s) + buf, err := base64Encoding.DecodeString(s) if err != nil { return err } |