diff options
Diffstat (limited to 'vendor/github.com')
183 files changed, 39402 insertions, 43 deletions
diff --git a/vendor/github.com/RoaringBitmap/roaring/AUTHORS b/vendor/github.com/RoaringBitmap/roaring/AUTHORS new file mode 100644 index 0000000000..08c074047f --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/AUTHORS @@ -0,0 +1,10 @@ +# This is the official list of roaring authors for copyright purposes. + +Todd Gruben (@tgruben), +Daniel Lemire (@lemire), +Elliot Murphy (@statik), +Bob Potter (@bpot), +Tyson Maly (@tvmaly), +Will Glynn (@willglynn), +Brent Pedersen (@brentp) +Maciej Biłas (@maciej) diff --git a/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS b/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS new file mode 100644 index 0000000000..70b4735dad --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS @@ -0,0 +1,12 @@ +# This is the official list of roaring contributors + +Todd Gruben (@tgruben), +Daniel Lemire (@lemire), +Elliot Murphy (@statik), +Bob Potter (@bpot), +Tyson Maly (@tvmaly), +Will Glynn (@willglynn), +Brent Pedersen (@brentp), +Jason E. Aten (@glycerine), +Vali Malinoiu (@0x4139), +Forud Ghafouri (@fzerorubigd)
\ No newline at end of file diff --git a/vendor/github.com/RoaringBitmap/roaring/LICENSE b/vendor/github.com/RoaringBitmap/roaring/LICENSE new file mode 100644 index 0000000000..aff5f9999b --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016 by the authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt b/vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt new file mode 100644 index 0000000000..aff5f9999b --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016 by the authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/RoaringBitmap/roaring/Makefile b/vendor/github.com/RoaringBitmap/roaring/Makefile new file mode 100644 index 0000000000..d5259b4c9e --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/Makefile @@ -0,0 +1,121 @@ +.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets + + + + + + + + +# Display general help about this command +help: + @echo "" + @echo "The following commands are available:" + @echo "" + @echo " make qa : Run all the tests" + @echo " make test : Run the unit tests" + @echo "" + @echo " make format : Format the source code" + @echo " make fmtcheck : Check if the source code has been formatted" + @echo " make vet : Check for suspicious constructs" + @echo " make lint : Check for style errors" + @echo "" + @echo " make deps : Get the dependencies" + @echo " make clean : Remove any build artifact" + @echo " make nuke : Deletes any intermediate file" + @echo "" + @echo " make fuzz-smat : Fuzzy testing with smat" + @echo " make fuzz-stream : Fuzzy testing with stream deserialization" + @echo " make fuzz-buffer : Fuzzy testing with buffer deserialization" + @echo "" + +# Alias for help target +all: help +test: + go test + go test -race -run TestConcurrent* +# Format the source code +format: + @find ./ -type f -name "*.go" -exec gofmt -w {} \; + +# Check if the source code has been formatted +fmtcheck: + @mkdir -p target + @find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff + @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } + +# Check for syntax errors +vet: + GOPATH=$(GOPATH) go vet ./... + +# Check for style errors +lint: + GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./... + + + + + +# Alias to run all quality-assurance checks +qa: fmtcheck test vet lint + +# --- INSTALL --- + +# Get the dependencies +deps: + GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey + GOPATH=$(GOPATH) go get github.com/willf/bitset + GOPATH=$(GOPATH) go get github.com/golang/lint/golint + GOPATH=$(GOPATH) go get github.com/mschoch/smat + GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz + GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build + GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream + GOPATH=$(GOPATH) go get github.com/philhofer/fwd + GOPATH=$(GOPATH) go get github.com/jtolds/gls + +fuzz-smat: + go test -tags=gofuzz -run=TestGenerateSmatCorpus + go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring + go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 + + +fuzz-stream: + go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring + go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 + + +fuzz-buffer: + go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring + go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 + +# Remove any build artifact +clean: + GOPATH=$(GOPATH) go clean ./... + +# Deletes any intermediate file +nuke: + rm -rf ./target + GOPATH=$(GOPATH) go clean -i ./... + +rle: + cp rle.go rle16.go + perl -pi -e 's/32/16/g' rle16.go + cp rle_test.go rle16_test.go + perl -pi -e 's/32/16/g' rle16_test.go + +backrle: + cp rle16.go rle.go + perl -pi -e 's/16/32/g' rle.go + perl -pi -e 's/2032/2016/g' rle.go + +ser: rle + go generate + +cover: + go test -coverprofile=coverage.out + go tool cover -html=coverage.out + +fetch-real-roaring-datasets: + # pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets + git submodule init + git submodule update diff --git a/vendor/github.com/RoaringBitmap/roaring/README.md b/vendor/github.com/RoaringBitmap/roaring/README.md new file mode 100644 index 0000000000..2c096ce8e6 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/README.md @@ -0,0 +1,246 @@ +roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) +============= + +This is a go version of the Roaring bitmap data structure. + + + +Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and +[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. + +[lucene]: https://lucene.apache.org/ +[solr]: https://lucene.apache.org/solr/ +[elasticsearch]: https://www.elastic.co/products/elasticsearch +[druid]: http://druid.io/ +[spark]: https://spark.apache.org/ +[opensearchserver]: http://www.opensearchserver.com +[cloudtorrent]: https://github.com/jpillora/cloud-torrent +[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home +[pilosa]: https://www.pilosa.com/ +[kylin]: http://kylin.apache.org/ +[pinot]: http://github.com/linkedin/pinot/wiki +[vsts]: https://www.visualstudio.com/team-services/ +[atlas]: https://github.com/Netflix/atlas + +Roaring bitmaps are found to work well in many important applications: + +> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf)) + + +The ``roaring`` Go library is used by +* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client +* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative +* [InfluxDB](https://www.influxdata.com) +* [Pilosa](https://www.pilosa.com/) +* [Bleve](http://www.blevesearch.com) + +This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com). + + +There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps +from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec). + + +This code is licensed under Apache License, Version 2.0 (ASL2.0). + +Copyright 2016-... by the authors. + + +### References + +- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) +- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, +Better bitmap performance with Roaring bitmaps, +Software: Practice and Experience 46 (5), 2016. +http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html +- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549 + + +### Dependencies + +Dependencies are fetched automatically by giving the `-t` flag to `go get`. + +they include + - github.com/smartystreets/goconvey/convey + - github.com/willf/bitset + - github.com/mschoch/smat + - github.com/glycerine/go-unsnap-stream + - github.com/philhofer/fwd + - github.com/jtolds/gls + +Note that the smat library requires Go 1.6 or better. + +#### Installation + + - go get -t github.com/RoaringBitmap/roaring + + +### Example + +Here is a simplified but complete example: + +```go +package main + +import ( + "fmt" + "github.com/RoaringBitmap/roaring" + "bytes" +) + + +func main() { + // example inspired by https://github.com/fzandona/goroar + fmt.Println("==roaring==") + rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000) + fmt.Println(rb1.String()) + + rb2 := roaring.BitmapOf(3, 4, 1000) + fmt.Println(rb2.String()) + + rb3 := roaring.New() + fmt.Println(rb3.String()) + + fmt.Println("Cardinality: ", rb1.GetCardinality()) + + fmt.Println("Contains 3? ", rb1.Contains(3)) + + rb1.And(rb2) + + rb3.Add(1) + rb3.Add(5) + + rb3.Or(rb1) + + // computes union of the three bitmaps in parallel using 4 workers + roaring.ParOr(4, rb1, rb2, rb3) + // computes intersection of the three bitmaps in parallel using 4 workers + roaring.ParAnd(4, rb1, rb2, rb3) + + + // prints 1, 3, 4, 5, 1000 + i := rb3.Iterator() + for i.HasNext() { + fmt.Println(i.Next()) + } + fmt.Println() + + // next we include an example of serialization + buf := new(bytes.Buffer) + rb1.WriteTo(buf) // we omit error handling + newrb:= roaring.New() + newrb.ReadFrom(buf) + if rb1.Equals(newrb) { + fmt.Println("I wrote the content to a byte stream and read it back.") + } +} +``` + +If you wish to use serialization and handle errors, you might want to +consider the following sample of code: + +```go + rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) + buf := new(bytes.Buffer) + size,err:=rb.WriteTo(buf) + if err != nil { + t.Errorf("Failed writing") + } + newrb:= New() + size,err=newrb.ReadFrom(buf) + if err != nil { + t.Errorf("Failed reading") + } + if ! rb.Equals(newrb) { + t.Errorf("Cannot retrieve serialized version") + } +``` + +Given N integers in [0,x), then the serialized size in bytes of +a Roaring bitmap should never exceed this bound: + +`` 8 + 9 * ((long)x+65535)/65536 + 2 * N `` + +That is, given a fixed overhead for the universe size (x), Roaring +bitmaps never use more than 2 bytes per integer. You can call +``BoundSerializedSizeInBytes`` for a more precise estimate. + + +### Documentation + +Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring + +### Goroutine safety + +In general, it should not generally be considered safe to access +the same bitmaps using different goroutines--they are left +unsynchronized for performance. Should you want to access +a Bitmap from more than one goroutine, you should +provide synchronization. Typically this is done by using channels to pass +the *Bitmap around (in Go style; so there is only ever one owner), +or by using `sync.Mutex` to serialize operations on Bitmaps. + +### Coverage + +We test our software. For a report on our test coverage, see + +https://coveralls.io/github/RoaringBitmap/roaring?branch=master + +### Benchmark + +Type + + go test -bench Benchmark -run - + +To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets) +run the following: + +```sh +go get github.com/RoaringBitmap/real-roaring-datasets +BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run - +``` + +### Iterative use + +You can use roaring with gore: + +- go get -u github.com/motemen/gore +- Make sure that ``$GOPATH/bin`` is in your ``$PATH``. +- go get github/RoaringBitmap/roaring + +```go +$ gore +gore version 0.2.6 :help for help +gore> :import github.com/RoaringBitmap/roaring +gore> x:=roaring.New() +gore> x.Add(1) +gore> x.String() +"{1}" +``` + + +### Fuzzy testing + +You can help us test further the library with fuzzy testing: + + go get github.com/dvyukov/go-fuzz/go-fuzz + go get github.com/dvyukov/go-fuzz/go-fuzz-build + go test -tags=gofuzz -run=TestGenerateSmatCorpus + go-fuzz-build github.com/RoaringBitmap/roaring + go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 + +Let it run, and if the # of crashers is > 0, check out the reports in +the workdir where you should be able to find the panic goroutine stack +traces. + +### Alternative in Go + +There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring + +For an alternative implementation in Go, see https://github.com/fzandona/goroar +The two versions were written independently. + + +### Mailing list/discussion group + +https://groups.google.com/forum/#!forum/roaring-bitmaps diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go new file mode 100644 index 0000000000..c395868210 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go @@ -0,0 +1,960 @@ +package roaring + +import ( + "fmt" +) + +//go:generate msgp -unexported + +type arrayContainer struct { + content []uint16 +} + +func (ac *arrayContainer) String() string { + s := "{" + for it := ac.getShortIterator(); it.hasNext(); { + s += fmt.Sprintf("%v, ", it.next()) + } + return s + "}" +} + +func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { + for k := 0; k < len(ac.content); k++ { + x[k+i] = uint32(ac.content[k]) | mask + } +} + +func (ac *arrayContainer) getShortIterator() shortIterable { + return &shortIterator{ac.content, 0} +} + +func (ac *arrayContainer) getManyIterator() manyIterable { + return &manyIterator{ac.content, 0} +} + +func (ac *arrayContainer) minimum() uint16 { + return ac.content[0] // assume not empty +} + +func (ac *arrayContainer) maximum() uint16 { + return ac.content[len(ac.content)-1] // assume not empty +} + +func (ac *arrayContainer) getSizeInBytes() int { + return ac.getCardinality() * 2 +} + +func (ac *arrayContainer) serializedSizeInBytes() int { + return ac.getCardinality() * 2 +} + +func arrayContainerSizeInBytes(card int) int { + return card * 2 +} + +// add the values in the range [firstOfRange,endx) +func (ac *arrayContainer) iaddRange(firstOfRange, endx int) container { + if firstOfRange >= endx { + return ac + } + indexstart := binarySearch(ac.content, uint16(firstOfRange)) + if indexstart < 0 { + indexstart = -indexstart - 1 + } + indexend := binarySearch(ac.content, uint16(endx-1)) + if indexend < 0 { + indexend = -indexend - 1 + } else { + indexend++ + } + rangelength := endx - firstOfRange + newcardinality := indexstart + (ac.getCardinality() - indexend) + rangelength + if newcardinality > arrayDefaultMaxSize { + a := ac.toBitmapContainer() + return a.iaddRange(firstOfRange, endx) + } + if cap(ac.content) < newcardinality { + tmp := make([]uint16, newcardinality, newcardinality) + copy(tmp[:indexstart], ac.content[:indexstart]) + copy(tmp[indexstart+rangelength:], ac.content[indexend:]) + + ac.content = tmp + } else { + ac.content = ac.content[:newcardinality] + copy(ac.content[indexstart+rangelength:], ac.content[indexend:]) + + } + for k := 0; k < rangelength; k++ { + ac.content[k+indexstart] = uint16(firstOfRange + k) + } + return ac +} + +// remove the values in the range [firstOfRange,endx) +func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container { + if firstOfRange >= endx { + return ac + } + indexstart := binarySearch(ac.content, uint16(firstOfRange)) + if indexstart < 0 { + indexstart = -indexstart - 1 + } + indexend := binarySearch(ac.content, uint16(endx-1)) + if indexend < 0 { + indexend = -indexend - 1 + } else { + indexend++ + } + rangelength := indexend - indexstart + answer := ac + copy(answer.content[indexstart:], ac.content[indexstart+rangelength:]) + answer.content = answer.content[:ac.getCardinality()-rangelength] + return answer +} + +// flip the values in the range [firstOfRange,endx) +func (ac *arrayContainer) not(firstOfRange, endx int) container { + if firstOfRange >= endx { + //p("arrayContainer.not(): exiting early with ac.clone()") + return ac.clone() + } + return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] +} + +// flip the values in the range [firstOfRange,lastOfRange] +func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { + if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] + //p("arrayContainer.notClose(): exiting early with ac.clone()") + return ac.clone() + } + + // determine the span of array indices to be affected^M + startIndex := binarySearch(ac.content, uint16(firstOfRange)) + //p("startIndex=%v", startIndex) + if startIndex < 0 { + startIndex = -startIndex - 1 + } + lastIndex := binarySearch(ac.content, uint16(lastOfRange)) + //p("lastIndex=%v", lastIndex) + if lastIndex < 0 { + lastIndex = -lastIndex - 2 + } + currentValuesInRange := lastIndex - startIndex + 1 + spanToBeFlipped := lastOfRange - firstOfRange + 1 + newValuesInRange := spanToBeFlipped - currentValuesInRange + cardinalityChange := newValuesInRange - currentValuesInRange + newCardinality := len(ac.content) + cardinalityChange + //p("new card is %v", newCardinality) + if newCardinality > arrayDefaultMaxSize { + //p("new card over arrayDefaultMaxSize, so returning bitmap") + return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) + } + answer := newArrayContainer() + answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure + + copy(answer.content, ac.content[:startIndex]) + outPos := startIndex + inPos := startIndex + valInRange := firstOfRange + for ; valInRange <= lastOfRange && inPos <= lastIndex; valInRange++ { + if uint16(valInRange) != ac.content[inPos] { + answer.content[outPos] = uint16(valInRange) + outPos++ + } else { + inPos++ + } + } + + for ; valInRange <= lastOfRange; valInRange++ { + answer.content[outPos] = uint16(valInRange) + outPos++ + } + + for i := lastIndex + 1; i < len(ac.content); i++ { + answer.content[outPos] = ac.content[i] + outPos++ + } + answer.content = answer.content[:newCardinality] + return answer + +} + +func (ac *arrayContainer) equals(o container) bool { + + srb, ok := o.(*arrayContainer) + if ok { + // Check if the containers are the same object. + if ac == srb { + return true + } + + if len(srb.content) != len(ac.content) { + return false + } + + for i, v := range ac.content { + if v != srb.content[i] { + return false + } + } + return true + } + + // use generic comparison + bCard := o.getCardinality() + aCard := ac.getCardinality() + if bCard != aCard { + return false + } + + ait := ac.getShortIterator() + bit := o.getShortIterator() + for ait.hasNext() { + if bit.next() != ait.next() { + return false + } + } + return true +} + +func (ac *arrayContainer) toBitmapContainer() *bitmapContainer { + bc := newBitmapContainer() + bc.loadData(ac) + return bc + +} +func (ac *arrayContainer) iadd(x uint16) (wasNew bool) { + // Special case adding to the end of the container. + l := len(ac.content) + if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x { + ac.content = append(ac.content, x) + return true + } + + loc := binarySearch(ac.content, x) + + if loc < 0 { + s := ac.content + i := -loc - 1 + s = append(s, 0) + copy(s[i+1:], s[i:]) + s[i] = x + ac.content = s + return true + } + return false +} + +func (ac *arrayContainer) iaddReturnMinimized(x uint16) container { + // Special case adding to the end of the container. + l := len(ac.content) + if l > 0 && l < arrayDefaultMaxSize && ac.content[l-1] < x { + ac.content = append(ac.content, x) + return ac + } + + loc := binarySearch(ac.content, x) + + if loc < 0 { + if len(ac.content) >= arrayDefaultMaxSize { + a := ac.toBitmapContainer() + a.iadd(x) + return a + } + s := ac.content + i := -loc - 1 + s = append(s, 0) + copy(s[i+1:], s[i:]) + s[i] = x + ac.content = s + } + return ac +} + +// iremoveReturnMinimized is allowed to change the return type to minimize storage. +func (ac *arrayContainer) iremoveReturnMinimized(x uint16) container { + ac.iremove(x) + return ac +} + +func (ac *arrayContainer) iremove(x uint16) bool { + loc := binarySearch(ac.content, x) + if loc >= 0 { + s := ac.content + s = append(s[:loc], s[loc+1:]...) + ac.content = s + return true + } + return false +} + +func (ac *arrayContainer) remove(x uint16) container { + out := &arrayContainer{make([]uint16, len(ac.content))} + copy(out.content, ac.content[:]) + + loc := binarySearch(out.content, x) + if loc >= 0 { + s := out.content + s = append(s[:loc], s[loc+1:]...) + out.content = s + } + return out +} + +func (ac *arrayContainer) or(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.orArray(x) + case *bitmapContainer: + return x.orArray(ac) + case *runContainer16: + if x.isFull() { + return x.clone() + } + return x.orArray(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) orCardinality(a container) int { + switch x := a.(type) { + case *arrayContainer: + return ac.orArrayCardinality(x) + case *bitmapContainer: + return x.orArrayCardinality(ac) + case *runContainer16: + return x.orArrayCardinality(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) ior(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.iorArray(x) + case *bitmapContainer: + return a.(*bitmapContainer).orArray(ac) + //return ac.iorBitmap(x) // note: this does not make sense + case *runContainer16: + if x.isFull() { + return x.clone() + } + return ac.iorRun16(x) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) iorArray(value2 *arrayContainer) container { + value1 := ac + len1 := value1.getCardinality() + len2 := value2.getCardinality() + maxPossibleCardinality := len1 + len2 + if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap! + bc := newBitmapContainer() + for k := 0; k < len(value2.content); k++ { + v := value2.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + for k := 0; k < len(ac.content); k++ { + v := ac.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + bc.cardinality = int(popcntSlice(bc.bitmap)) + if bc.cardinality <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc + } + if maxPossibleCardinality > cap(value1.content) { + newcontent := make([]uint16, 0, maxPossibleCardinality) + copy(newcontent[len2:maxPossibleCardinality], ac.content[0:len1]) + ac.content = newcontent + } else { + copy(ac.content[len2:maxPossibleCardinality], ac.content[0:len1]) + } + nl := union2by2(value1.content[len2:maxPossibleCardinality], value2.content, ac.content) + ac.content = ac.content[:nl] // reslice to match actual used capacity + return ac +} + +// Note: such code does not make practical sense, except for lazy evaluations +func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container { + bc1 := ac.toBitmapContainer() + bc1.iorBitmap(bc2) + *ac = *newArrayContainerFromBitmap(bc1) + return ac +} + +func (ac *arrayContainer) iorRun16(rc *runContainer16) container { + bc1 := ac.toBitmapContainer() + bc2 := rc.toBitmapContainer() + bc1.iorBitmap(bc2) + *ac = *newArrayContainerFromBitmap(bc1) + return ac +} + +func (ac *arrayContainer) lazyIOR(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.lazyIorArray(x) + case *bitmapContainer: + return ac.lazyIorBitmap(x) + case *runContainer16: + if x.isFull() { + return x.clone() + } + return ac.lazyIorRun16(x) + + } + panic("unsupported container type") +} + +func (ac *arrayContainer) lazyIorArray(ac2 *arrayContainer) container { + // TODO actually make this lazy + return ac.iorArray(ac2) +} + +func (ac *arrayContainer) lazyIorBitmap(bc *bitmapContainer) container { + // TODO actually make this lazy + return ac.iorBitmap(bc) +} + +func (ac *arrayContainer) lazyIorRun16(rc *runContainer16) container { + // TODO actually make this lazy + return ac.iorRun16(rc) +} + +func (ac *arrayContainer) lazyOR(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.lazyorArray(x) + case *bitmapContainer: + return a.lazyOR(ac) + case *runContainer16: + if x.isFull() { + return x.clone() + } + return x.orArray(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) orArray(value2 *arrayContainer) container { + value1 := ac + maxPossibleCardinality := value1.getCardinality() + value2.getCardinality() + if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap! + bc := newBitmapContainer() + for k := 0; k < len(value2.content); k++ { + v := value2.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + for k := 0; k < len(ac.content); k++ { + v := ac.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + bc.cardinality = int(popcntSlice(bc.bitmap)) + if bc.cardinality <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc + } + answer := newArrayContainerCapacity(maxPossibleCardinality) + nl := union2by2(value1.content, value2.content, answer.content) + answer.content = answer.content[:nl] // reslice to match actual used capacity + return answer +} + +func (ac *arrayContainer) orArrayCardinality(value2 *arrayContainer) int { + return union2by2Cardinality(ac.content, value2.content) +} + +func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container { + value1 := ac + maxPossibleCardinality := value1.getCardinality() + value2.getCardinality() + if maxPossibleCardinality > arrayLazyLowerBound { // it could be a bitmap!^M + bc := newBitmapContainer() + for k := 0; k < len(value2.content); k++ { + v := value2.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + for k := 0; k < len(ac.content); k++ { + v := ac.content[k] + i := uint(v) >> 6 + mask := uint64(1) << (v % 64) + bc.bitmap[i] |= mask + } + bc.cardinality = invalidCardinality + return bc + } + answer := newArrayContainerCapacity(maxPossibleCardinality) + nl := union2by2(value1.content, value2.content, answer.content) + answer.content = answer.content[:nl] // reslice to match actual used capacity + return answer +} + +func (ac *arrayContainer) and(a container) container { + //p("ac.and() called") + switch x := a.(type) { + case *arrayContainer: + return ac.andArray(x) + case *bitmapContainer: + return x.and(ac) + case *runContainer16: + if x.isFull() { + return ac.clone() + } + return x.andArray(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) andCardinality(a container) int { + switch x := a.(type) { + case *arrayContainer: + return ac.andArrayCardinality(x) + case *bitmapContainer: + return x.andCardinality(ac) + case *runContainer16: + return x.andArrayCardinality(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) intersects(a container) bool { + switch x := a.(type) { + case *arrayContainer: + return ac.intersectsArray(x) + case *bitmapContainer: + return x.intersects(ac) + case *runContainer16: + return x.intersects(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) iand(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.iandArray(x) + case *bitmapContainer: + return ac.iandBitmap(x) + case *runContainer16: + if x.isFull() { + return ac.clone() + } + return x.andArray(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container { + pos := 0 + c := ac.getCardinality() + for k := 0; k < c; k++ { + // branchless + v := ac.content[k] + ac.content[pos] = v + pos += int(bc.bitValue(v)) + } + ac.content = ac.content[:pos] + return ac + +} + +func (ac *arrayContainer) xor(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.xorArray(x) + case *bitmapContainer: + return a.xor(ac) + case *runContainer16: + return x.xorArray(ac) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) xorArray(value2 *arrayContainer) container { + value1 := ac + totalCardinality := value1.getCardinality() + value2.getCardinality() + if totalCardinality > arrayDefaultMaxSize { // it could be a bitmap! + bc := newBitmapContainer() + for k := 0; k < len(value2.content); k++ { + v := value2.content[k] + i := uint(v) >> 6 + bc.bitmap[i] ^= (uint64(1) << (v % 64)) + } + for k := 0; k < len(ac.content); k++ { + v := ac.content[k] + i := uint(v) >> 6 + bc.bitmap[i] ^= (uint64(1) << (v % 64)) + } + bc.computeCardinality() + if bc.cardinality <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc + } + desiredCapacity := totalCardinality + answer := newArrayContainerCapacity(desiredCapacity) + length := exclusiveUnion2by2(value1.content, value2.content, answer.content) + answer.content = answer.content[:length] + return answer + +} + +func (ac *arrayContainer) andNot(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.andNotArray(x) + case *bitmapContainer: + return ac.andNotBitmap(x) + case *runContainer16: + return ac.andNotRun16(x) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) andNotRun16(rc *runContainer16) container { + acb := ac.toBitmapContainer() + rcb := rc.toBitmapContainer() + return acb.andNotBitmap(rcb) +} + +func (ac *arrayContainer) iandNot(a container) container { + switch x := a.(type) { + case *arrayContainer: + return ac.iandNotArray(x) + case *bitmapContainer: + return ac.iandNotBitmap(x) + case *runContainer16: + return ac.iandNotRun16(x) + } + panic("unsupported container type") +} + +func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + acb.iandNotBitmapSurely(rcb) + *ac = *(acb.toArrayContainer()) + return ac +} + +func (ac *arrayContainer) andNotArray(value2 *arrayContainer) container { + value1 := ac + desiredcapacity := value1.getCardinality() + answer := newArrayContainerCapacity(desiredcapacity) + length := difference(value1.content, value2.content, answer.content) + answer.content = answer.content[:length] + return answer +} + +func (ac *arrayContainer) iandNotArray(value2 *arrayContainer) container { + length := difference(ac.content, value2.content, ac.content) + ac.content = ac.content[:length] + return ac +} + +func (ac *arrayContainer) andNotBitmap(value2 *bitmapContainer) container { + desiredcapacity := ac.getCardinality() + answer := newArrayContainerCapacity(desiredcapacity) + answer.content = answer.content[:desiredcapacity] + pos := 0 + for _, v := range ac.content { + answer.content[pos] = v + pos += 1 - int(value2.bitValue(v)) + } + answer.content = answer.content[:pos] + return answer +} + +func (ac *arrayContainer) andBitmap(value2 *bitmapContainer) container { + desiredcapacity := ac.getCardinality() + answer := newArrayContainerCapacity(desiredcapacity) + answer.content = answer.content[:desiredcapacity] + pos := 0 + for _, v := range ac.content { + answer.content[pos] = v + pos += int(value2.bitValue(v)) + } + answer.content = answer.content[:pos] + return answer +} + +func (ac *arrayContainer) iandNotBitmap(value2 *bitmapContainer) container { + pos := 0 + for _, v := range ac.content { + ac.content[pos] = v + pos += 1 - int(value2.bitValue(v)) + } + ac.content = ac.content[:pos] + return ac +} + +func copyOf(array []uint16, size int) []uint16 { + result := make([]uint16, size) + for i, x := range array { + if i == size { + break + } + result[i] = x + } + return result +} + +// flip the values in the range [firstOfRange,endx) +func (ac *arrayContainer) inot(firstOfRange, endx int) container { + if firstOfRange >= endx { + return ac + } + return ac.inotClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] +} + +// flip the values in the range [firstOfRange,lastOfRange] +func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { + //p("ac.inotClose() starting") + if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] + return ac + } + // determine the span of array indices to be affected + startIndex := binarySearch(ac.content, uint16(firstOfRange)) + if startIndex < 0 { + startIndex = -startIndex - 1 + } + lastIndex := binarySearch(ac.content, uint16(lastOfRange)) + if lastIndex < 0 { + lastIndex = -lastIndex - 1 - 1 + } + currentValuesInRange := lastIndex - startIndex + 1 + spanToBeFlipped := lastOfRange - firstOfRange + 1 + + newValuesInRange := spanToBeFlipped - currentValuesInRange + buffer := make([]uint16, newValuesInRange) + cardinalityChange := newValuesInRange - currentValuesInRange + newCardinality := len(ac.content) + cardinalityChange + if cardinalityChange > 0 { + if newCardinality > len(ac.content) { + if newCardinality > arrayDefaultMaxSize { + //p("ac.inotClose() converting to bitmap and doing inot there") + bcRet := ac.toBitmapContainer() + bcRet.inot(firstOfRange, lastOfRange+1) + *ac = *bcRet.toArrayContainer() + return bcRet + } + ac.content = copyOf(ac.content, newCardinality) + } + base := lastIndex + 1 + copy(ac.content[lastIndex+1+cardinalityChange:], ac.content[base:base+len(ac.content)-1-lastIndex]) + ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1) + } else { // no expansion needed + ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1) + if cardinalityChange < 0 { + + for i := startIndex + newValuesInRange; i < newCardinality; i++ { + ac.content[i] = ac.content[i-cardinalityChange] + } + } + } + ac.content = ac.content[:newCardinality] + //p("bottom of ac.inotClose(): returning ac") + return ac +} + +func (ac *arrayContainer) negateRange(buffer []uint16, startIndex, lastIndex, startRange, lastRange int) { + // compute the negation into buffer + outPos := 0 + inPos := startIndex // value here always >= valInRange, + // until it is exhausted + // n.b., we can start initially exhausted. + + valInRange := startRange + for ; valInRange < lastRange && inPos <= lastIndex; valInRange++ { + if uint16(valInRange) != ac.content[inPos] { + buffer[outPos] = uint16(valInRange) + outPos++ + } else { + inPos++ + } + } + + // if there are extra items (greater than the biggest + // pre-existing one in range), buffer them + for ; valInRange < lastRange; valInRange++ { + buffer[outPos] = uint16(valInRange) + outPos++ + } + + if outPos != len(buffer) { + panic("negateRange: internal bug") + } + + for i, item := range buffer { + ac.content[i+startIndex] = item + } +} + +func (ac *arrayContainer) isFull() bool { + return false +} + +func (ac *arrayContainer) andArray(value2 *arrayContainer) container { + desiredcapacity := minOfInt(ac.getCardinality(), value2.getCardinality()) + answer := newArrayContainerCapacity(desiredcapacity) + length := intersection2by2( + ac.content, + value2.content, + answer.content) + answer.content = answer.content[:length] + return answer +} + +func (ac *arrayContainer) andArrayCardinality(value2 *arrayContainer) int { + return intersection2by2Cardinality( + ac.content, + value2.content) +} + +func (ac *arrayContainer) intersectsArray(value2 *arrayContainer) bool { + return intersects2by2( + ac.content, + value2.content) +} + +func (ac *arrayContainer) iandArray(value2 *arrayContainer) container { + length := intersection2by2( + ac.content, + value2.content, + ac.content) + ac.content = ac.content[:length] + return ac +} + +func (ac *arrayContainer) getCardinality() int { + return len(ac.content) +} + +func (ac *arrayContainer) rank(x uint16) int { + answer := binarySearch(ac.content, x) + if answer >= 0 { + return answer + 1 + } + return -answer - 1 + +} + +func (ac *arrayContainer) selectInt(x uint16) int { + return int(ac.content[x]) +} + +func (ac *arrayContainer) clone() container { + ptr := arrayContainer{make([]uint16, len(ac.content))} + copy(ptr.content, ac.content[:]) + return &ptr +} + +func (ac *arrayContainer) contains(x uint16) bool { + return binarySearch(ac.content, x) >= 0 +} + +func (ac *arrayContainer) loadData(bitmapContainer *bitmapContainer) { + ac.content = make([]uint16, bitmapContainer.cardinality, bitmapContainer.cardinality) + bitmapContainer.fillArray(ac.content) +} +func newArrayContainer() *arrayContainer { + p := new(arrayContainer) + return p +} + +func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer { + ac := &arrayContainer{} + ac.loadData(bc) + return ac +} + +func newArrayContainerCapacity(size int) *arrayContainer { + p := new(arrayContainer) + p.content = make([]uint16, 0, size) + return p +} + +func newArrayContainerSize(size int) *arrayContainer { + p := new(arrayContainer) + p.content = make([]uint16, size, size) + return p +} + +func newArrayContainerRange(firstOfRun, lastOfRun int) *arrayContainer { + valuesInRange := lastOfRun - firstOfRun + 1 + this := newArrayContainerCapacity(valuesInRange) + for i := 0; i < valuesInRange; i++ { + this.content = append(this.content, uint16(firstOfRun+i)) + } + return this +} + +func (ac *arrayContainer) numberOfRuns() (nr int) { + n := len(ac.content) + var runlen uint16 + var cur, prev uint16 + + switch n { + case 0: + return 0 + case 1: + return 1 + default: + for i := 1; i < n; i++ { + prev = ac.content[i-1] + cur = ac.content[i] + + if cur == prev+1 { + runlen++ + } else { + if cur < prev { + panic("then fundamental arrayContainer assumption of sorted ac.content was broken") + } + if cur == prev { + panic("then fundamental arrayContainer assumption of deduplicated content was broken") + } else { + nr++ + runlen = 0 + } + } + } + nr++ + } + return +} + +// convert to run or array *if needed* +func (ac *arrayContainer) toEfficientContainer() container { + + numRuns := ac.numberOfRuns() + + sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns) + sizeAsBitmapContainer := bitmapContainerSizeInBytes() + card := ac.getCardinality() + sizeAsArrayContainer := arrayContainerSizeInBytes(card) + + if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { + return newRunContainer16FromArray(ac) + } + if card <= arrayDefaultMaxSize { + return ac + } + return ac.toBitmapContainer() +} + +func (ac *arrayContainer) containerType() contype { + return arrayContype +} diff --git a/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go b/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go new file mode 100644 index 0000000000..cba6e53e30 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer_gen.go @@ -0,0 +1,134 @@ +package roaring + +// NOTE: THIS FILE WAS PRODUCED BY THE +// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) +// DO NOT EDIT + +import "github.com/tinylib/msgp/msgp" + +// DecodeMsg implements msgp.Decodable +func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zbzg uint32 + zbzg, err = dc.ReadMapHeader() + if err != nil { + return + } + for zbzg > 0 { + zbzg-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "content": + var zbai uint32 + zbai, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.content) >= int(zbai) { + z.content = (z.content)[:zbai] + } else { + z.content = make([]uint16, zbai) + } + for zxvk := range z.content { + z.content[zxvk], err = dc.ReadUint16() + if err != nil { + return + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 1 + // write "content" + err = en.Append(0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.content))) + if err != nil { + return + } + for zxvk := range z.content { + err = en.WriteUint16(z.content[zxvk]) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 1 + // string "content" + o = append(o, 0x81, 0xa7, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74) + o = msgp.AppendArrayHeader(o, uint32(len(z.content))) + for zxvk := range z.content { + o = msgp.AppendUint16(o, z.content[zxvk]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zcmr uint32 + zcmr, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zcmr > 0 { + zcmr-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "content": + var zajw uint32 + zajw, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.content) >= int(zajw) { + z.content = (z.content)[:zajw] + } else { + z.content = make([]uint16, zajw) + } + for zxvk := range z.content { + z.content[zxvk], bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *arrayContainer) Msgsize() (s int) { + s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) + return +} diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go new file mode 100644 index 0000000000..5e58b31f2b --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go @@ -0,0 +1,982 @@ +package roaring + +import ( + "fmt" + "unsafe" +) + +//go:generate msgp -unexported + +type bitmapContainer struct { + cardinality int + bitmap []uint64 +} + +func (bc bitmapContainer) String() string { + var s string + for it := bc.getShortIterator(); it.hasNext(); { + s += fmt.Sprintf("%v, ", it.next()) + } + return s +} + +func newBitmapContainer() *bitmapContainer { + p := new(bitmapContainer) + size := (1 << 16) / 64 + p.bitmap = make([]uint64, size, size) + return p +} + +func newBitmapContainerwithRange(firstOfRun, lastOfRun int) *bitmapContainer { + bc := newBitmapContainer() + bc.cardinality = lastOfRun - firstOfRun + 1 + if bc.cardinality == maxCapacity { + fill(bc.bitmap, uint64(0xffffffffffffffff)) + } else { + firstWord := firstOfRun / 64 + lastWord := lastOfRun / 64 + zeroPrefixLength := uint64(firstOfRun & 63) + zeroSuffixLength := uint64(63 - (lastOfRun & 63)) + + fillRange(bc.bitmap, firstWord, lastWord+1, uint64(0xffffffffffffffff)) + bc.bitmap[firstWord] ^= ((uint64(1) << zeroPrefixLength) - 1) + blockOfOnes := (uint64(1) << zeroSuffixLength) - 1 + maskOnLeft := blockOfOnes << (uint64(64) - zeroSuffixLength) + bc.bitmap[lastWord] ^= maskOnLeft + } + return bc +} + +func (bc *bitmapContainer) minimum() uint16 { + for i := 0; i < len(bc.bitmap); i++ { + w := bc.bitmap[i] + if w != 0 { + r := countTrailingZeros(w) + return uint16(r + i*64) + } + } + return MaxUint16 +} + +// i should be non-zero +func clz(i uint64) int { + n := 1 + x := uint32(i >> 32) + if x == 0 { + n += 32 + x = uint32(i) + } + if x>>16 == 0 { + n += 16 + x = x << 16 + } + if x>>24 == 0 { + n += 8 + x = x << 8 + } + if x>>28 == 0 { + n += 4 + x = x << 4 + } + if x>>30 == 0 { + n += 2 + x = x << 2 + } + return n - int(x>>31) +} + +func (bc *bitmapContainer) maximum() uint16 { + for i := len(bc.bitmap); i > 0; i-- { + w := bc.bitmap[i-1] + if w != 0 { + r := clz(w) + return uint16((i-1)*64 + 63 - r) + } + } + return uint16(0) +} + +type bitmapContainerShortIterator struct { + ptr *bitmapContainer + i int +} + +func (bcsi *bitmapContainerShortIterator) next() uint16 { + j := bcsi.i + bcsi.i = bcsi.ptr.NextSetBit(bcsi.i + 1) + return uint16(j) +} +func (bcsi *bitmapContainerShortIterator) hasNext() bool { + return bcsi.i >= 0 +} + +func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { + return &bitmapContainerShortIterator{a, a.NextSetBit(0)} +} + +func (bc *bitmapContainer) getShortIterator() shortIterable { + return newBitmapContainerShortIterator(bc) +} + +type bitmapContainerManyIterator struct { + ptr *bitmapContainer + base int + bitset uint64 +} + +func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { + n := 0 + base := bcmi.base + bitset := bcmi.bitset + + for n < len(buf) { + if bitset == 0 { + base += 1 + if base >= len(bcmi.ptr.bitmap) { + bcmi.base = base + bcmi.bitset = bitset + return n + } + bitset = bcmi.ptr.bitmap[base] + continue + } + t := bitset & -bitset + buf[n] = uint32(((base * 64) + int(popcount(t-1)))) | hs + n = n + 1 + bitset ^= t + } + + bcmi.base = base + bcmi.bitset = bitset + return n +} + +func newBitmapContainerManyIterator(a *bitmapContainer) *bitmapContainerManyIterator { + return &bitmapContainerManyIterator{a, -1, 0} +} + +func (bc *bitmapContainer) getManyIterator() manyIterable { + return newBitmapContainerManyIterator(bc) +} + +func (bc *bitmapContainer) getSizeInBytes() int { + return len(bc.bitmap) * 8 // + bcBaseBytes +} + +func (bc *bitmapContainer) serializedSizeInBytes() int { + //return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes + return len(bc.bitmap) * 8 +} + +const bcBaseBytes = int(unsafe.Sizeof(bitmapContainer{})) + +// bitmapContainer doesn't depend on card, always fully allocated +func bitmapContainerSizeInBytes() int { + return bcBaseBytes + (1<<16)/8 +} + +func bitmapEquals(a, b []uint64) bool { + if len(a) != len(b) { + //p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b)) + return false + } + for i, v := range a { + if v != b[i] { + //p("bitmaps differ on element i=%v", i) + return false + } + } + //p("bitmapEquals returning true") + return true +} + +func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { + // TODO: should be written as optimized assembly + pos := i + base := mask + for k := 0; k < len(bc.bitmap); k++ { + bitset := bc.bitmap[k] + for bitset != 0 { + t := bitset & -bitset + x[pos] = base + uint32(popcount(t-1)) + pos++ + bitset ^= t + } + base += 64 + } +} + +func (bc *bitmapContainer) equals(o container) bool { + srb, ok := o.(*bitmapContainer) + if ok { + //p("bitmapContainers.equals: both are bitmapContainers") + if srb.cardinality != bc.cardinality { + //p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality) + return false + } + return bitmapEquals(bc.bitmap, srb.bitmap) + } + + // use generic comparison + if bc.getCardinality() != o.getCardinality() { + return false + } + ait := o.getShortIterator() + bit := bc.getShortIterator() + + for ait.hasNext() { + if bit.next() != ait.next() { + return false + } + } + return true +} + +func (bc *bitmapContainer) iaddReturnMinimized(i uint16) container { + bc.iadd(i) + if bc.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + return bc +} + +func (bc *bitmapContainer) iadd(i uint16) bool { + x := int(i) + previous := bc.bitmap[x/64] + mask := uint64(1) << (uint(x) % 64) + newb := previous | mask + bc.bitmap[x/64] = newb + bc.cardinality += int((previous ^ newb) >> (uint(x) % 64)) + return newb != previous +} + +func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container { + if bc.iremove(i) { + if bc.cardinality == arrayDefaultMaxSize { + return bc.toArrayContainer() + } + } + return bc +} + +// iremove returns true if i was found. +func (bc *bitmapContainer) iremove(i uint16) bool { + /* branchless code + w := bc.bitmap[i>>6] + mask := uint64(1) << (i % 64) + neww := w &^ mask + bc.cardinality -= int((w ^ neww) >> (i % 64)) + bc.bitmap[i>>6] = neww */ + if bc.contains(i) { + bc.cardinality-- + bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) + return true + } + return false +} + +func (bc *bitmapContainer) isFull() bool { + return bc.cardinality == int(MaxUint16)+1 +} + +func (bc *bitmapContainer) getCardinality() int { + return bc.cardinality +} + +func (bc *bitmapContainer) clone() container { + ptr := bitmapContainer{bc.cardinality, make([]uint64, len(bc.bitmap))} + copy(ptr.bitmap, bc.bitmap[:]) + return &ptr +} + +// add all values in range [firstOfRange,lastOfRange) +func (bc *bitmapContainer) iaddRange(firstOfRange, lastOfRange int) container { + bc.cardinality += setBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange) + return bc +} + +// remove all values in range [firstOfRange,lastOfRange) +func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container { + bc.cardinality += resetBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange) + if bc.getCardinality() <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc +} + +// flip all values in range [firstOfRange,endx) +func (bc *bitmapContainer) inot(firstOfRange, endx int) container { + p("bc.inot() called with [%v, %v)", firstOfRange, endx) + if endx-firstOfRange == maxCapacity { + //p("endx-firstOfRange == maxCapacity") + flipBitmapRange(bc.bitmap, firstOfRange, endx) + bc.cardinality = maxCapacity - bc.cardinality + //p("bc.cardinality is now %v", bc.cardinality) + } else if endx-firstOfRange > maxCapacity/2 { + //p("endx-firstOfRange > maxCapacity/2") + flipBitmapRange(bc.bitmap, firstOfRange, endx) + bc.computeCardinality() + } else { + bc.cardinality += flipBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, endx) + } + if bc.getCardinality() <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc +} + +// flip all values in range [firstOfRange,endx) +func (bc *bitmapContainer) not(firstOfRange, endx int) container { + answer := bc.clone() + return answer.inot(firstOfRange, endx) +} + +func (bc *bitmapContainer) or(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.orArray(x) + case *bitmapContainer: + return bc.orBitmap(x) + case *runContainer16: + if x.isFull() { + return x.clone() + } + return x.orBitmapContainer(bc) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) orCardinality(a container) int { + switch x := a.(type) { + case *arrayContainer: + return bc.orArrayCardinality(x) + case *bitmapContainer: + return bc.orBitmapCardinality(x) + case *runContainer16: + return x.orBitmapContainerCardinality(bc) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) ior(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.iorArray(x) + case *bitmapContainer: + return bc.iorBitmap(x) + case *runContainer16: + if x.isFull() { + return x.clone() + } + for i := range x.iv { + bc.iaddRange(int(x.iv[i].start), int(x.iv[i].last())+1) + } + if bc.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + //bc.computeCardinality() + return bc + } + panic(fmt.Errorf("unsupported container type %T", a)) +} + +func (bc *bitmapContainer) lazyIOR(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.lazyIORArray(x) + case *bitmapContainer: + return bc.lazyIORBitmap(x) + case *runContainer16: + if x.isFull() { + return x.clone() + } + + // Manually inlined setBitmapRange function + bitmap := bc.bitmap + for _, iv := range x.iv { + start := int(iv.start) + end := int(iv.last()) + 1 + if start >= end { + continue + } + firstword := start / 64 + endword := (end - 1) / 64 + if firstword == endword { + bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)) + continue + } + bitmap[firstword] |= ^uint64(0) << uint(start%64) + for i := firstword + 1; i < endword; i++ { + bitmap[i] = ^uint64(0) + } + bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64) + } + bc.cardinality = invalidCardinality + return bc + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) lazyOR(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.lazyORArray(x) + case *bitmapContainer: + return bc.lazyORBitmap(x) + case *runContainer16: + if x.isFull() { + return x.clone() + } + // TODO: implement lazy OR + return x.orBitmapContainer(bc) + + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) orArray(value2 *arrayContainer) container { + answer := bc.clone().(*bitmapContainer) + c := value2.getCardinality() + for k := 0; k < c; k++ { + v := value2.content[k] + i := uint(v) >> 6 + bef := answer.bitmap[i] + aft := bef | (uint64(1) << (v % 64)) + answer.bitmap[i] = aft + answer.cardinality += int((bef - aft) >> 63) + } + return answer +} + +func (bc *bitmapContainer) orArrayCardinality(value2 *arrayContainer) int { + answer := 0 + c := value2.getCardinality() + for k := 0; k < c; k++ { + // branchless: + v := value2.content[k] + i := uint(v) >> 6 + bef := bc.bitmap[i] + aft := bef | (uint64(1) << (v % 64)) + answer += int((bef - aft) >> 63) + } + return answer +} + +func (bc *bitmapContainer) orBitmap(value2 *bitmapContainer) container { + answer := newBitmapContainer() + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] + } + answer.computeCardinality() + if answer.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + return answer +} + +func (bc *bitmapContainer) orBitmapCardinality(value2 *bitmapContainer) int { + return int(popcntOrSlice(bc.bitmap, value2.bitmap)) +} + +func (bc *bitmapContainer) andBitmapCardinality(value2 *bitmapContainer) int { + return int(popcntAndSlice(bc.bitmap, value2.bitmap)) +} + +func (bc *bitmapContainer) computeCardinality() { + bc.cardinality = int(popcntSlice(bc.bitmap)) +} + +func (bc *bitmapContainer) iorArray(ac *arrayContainer) container { + for k := range ac.content { + vc := ac.content[k] + i := uint(vc) >> 6 + bef := bc.bitmap[i] + aft := bef | (uint64(1) << (vc % 64)) + bc.bitmap[i] = aft + bc.cardinality += int((bef - aft) >> 63) + } + if bc.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + return bc +} + +func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container { + answer := bc + answer.cardinality = 0 + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] + } + answer.computeCardinality() + if bc.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + return answer +} + +func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { + answer := bc + c := value2.getCardinality() + for k := 0; k < c; k++ { + vc := value2.content[k] + i := uint(vc) >> 6 + answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) + } + answer.cardinality = invalidCardinality + return answer +} + +func (bc *bitmapContainer) lazyORArray(value2 *arrayContainer) container { + answer := bc.clone().(*bitmapContainer) + return answer.lazyIORArray(value2) +} + +func (bc *bitmapContainer) lazyIORBitmap(value2 *bitmapContainer) container { + answer := bc + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] | value2.bitmap[k] + } + bc.cardinality = invalidCardinality + return answer +} + +func (bc *bitmapContainer) lazyORBitmap(value2 *bitmapContainer) container { + answer := bc.clone().(*bitmapContainer) + return answer.lazyIORBitmap(value2) +} + +func (bc *bitmapContainer) xor(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.xorArray(x) + case *bitmapContainer: + return bc.xorBitmap(x) + case *runContainer16: + return x.xorBitmap(bc) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) xorArray(value2 *arrayContainer) container { + answer := bc.clone().(*bitmapContainer) + c := value2.getCardinality() + for k := 0; k < c; k++ { + vc := value2.content[k] + index := uint(vc) >> 6 + abi := answer.bitmap[index] + mask := uint64(1) << (vc % 64) + answer.cardinality += 1 - 2*int((abi&mask)>>(vc%64)) + answer.bitmap[index] = abi ^ mask + } + if answer.cardinality <= arrayDefaultMaxSize { + return answer.toArrayContainer() + } + return answer +} + +func (bc *bitmapContainer) rank(x uint16) int { + // TODO: rewrite in assembly + leftover := (uint(x) + 1) & 63 + if leftover == 0 { + return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64])) + } + return int(popcntSlice(bc.bitmap[:(uint(x)+1)/64]) + popcount(bc.bitmap[(uint(x)+1)/64]<<(64-leftover))) +} + +func (bc *bitmapContainer) selectInt(x uint16) int { + remaining := x + for k := 0; k < len(bc.bitmap); k++ { + w := popcount(bc.bitmap[k]) + if uint16(w) > remaining { + return k*64 + selectBitPosition(bc.bitmap[k], int(remaining)) + } + remaining -= uint16(w) + } + return -1 +} + +func (bc *bitmapContainer) xorBitmap(value2 *bitmapContainer) container { + newCardinality := int(popcntXorSlice(bc.bitmap, value2.bitmap)) + + if newCardinality > arrayDefaultMaxSize { + answer := newBitmapContainer() + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] ^ value2.bitmap[k] + } + answer.cardinality = newCardinality + if answer.isFull() { + return newRunContainer16Range(0, MaxUint16) + } + return answer + } + ac := newArrayContainerSize(newCardinality) + fillArrayXOR(ac.content, bc.bitmap, value2.bitmap) + ac.content = ac.content[:newCardinality] + return ac +} + +func (bc *bitmapContainer) and(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.andArray(x) + case *bitmapContainer: + return bc.andBitmap(x) + case *runContainer16: + if x.isFull() { + return bc.clone() + } + return x.andBitmapContainer(bc) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) andCardinality(a container) int { + switch x := a.(type) { + case *arrayContainer: + return bc.andArrayCardinality(x) + case *bitmapContainer: + return bc.andBitmapCardinality(x) + case *runContainer16: + return x.andBitmapContainerCardinality(bc) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) intersects(a container) bool { + switch x := a.(type) { + case *arrayContainer: + return bc.intersectsArray(x) + case *bitmapContainer: + return bc.intersectsBitmap(x) + case *runContainer16: + return x.intersects(bc) + + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) iand(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.iandArray(x) + case *bitmapContainer: + return bc.iandBitmap(x) + case *runContainer16: + if x.isFull() { + return bc.clone() + } + return bc.iandRun16(x) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) iandRun16(rc *runContainer16) container { + rcb := newBitmapContainerFromRun(rc) + return bc.iandBitmap(rcb) +} + +func (bc *bitmapContainer) iandArray(ac *arrayContainer) container { + acb := ac.toBitmapContainer() + return bc.iandBitmap(acb) +} + +func (bc *bitmapContainer) andArray(value2 *arrayContainer) *arrayContainer { + answer := newArrayContainerCapacity(len(value2.content)) + answer.content = answer.content[:cap(answer.content)] + c := value2.getCardinality() + pos := 0 + for k := 0; k < c; k++ { + v := value2.content[k] + answer.content[pos] = v + pos += int(bc.bitValue(v)) + } + answer.content = answer.content[:pos] + return answer +} + +func (bc *bitmapContainer) andArrayCardinality(value2 *arrayContainer) int { + c := value2.getCardinality() + pos := 0 + for k := 0; k < c; k++ { + v := value2.content[k] + pos += int(bc.bitValue(v)) + } + return pos +} + +func (bc *bitmapContainer) getCardinalityInRange(start, end uint) int { + if start >= end { + return 0 + } + firstword := start / 64 + endword := (end - 1) / 64 + const allones = ^uint64(0) + if firstword == endword { + return int(popcount(bc.bitmap[firstword] & ((allones << (start % 64)) & (allones >> ((64 - end) & 63))))) + } + answer := popcount(bc.bitmap[firstword] & (allones << (start % 64))) + answer += popcntSlice(bc.bitmap[firstword+1 : endword]) + answer += popcount(bc.bitmap[endword] & (allones >> ((64 - end) & 63))) + return int(answer) +} + +func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container { + newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap)) + if newcardinality > arrayDefaultMaxSize { + answer := newBitmapContainer() + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] & value2.bitmap[k] + } + answer.cardinality = newcardinality + return answer + } + ac := newArrayContainerSize(newcardinality) + fillArrayAND(ac.content, bc.bitmap, value2.bitmap) + ac.content = ac.content[:newcardinality] //not sure why i need this + return ac + +} + +func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool { + c := value2.getCardinality() + for k := 0; k < c; k++ { + v := value2.content[k] + if bc.contains(v) { + return true + } + } + return false +} + +func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool { + for k := 0; k < len(bc.bitmap); k++ { + if (bc.bitmap[k] & value2.bitmap[k]) != 0 { + return true + } + } + return false + +} + +func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container { + newcardinality := int(popcntAndSlice(bc.bitmap, value2.bitmap)) + for k := 0; k < len(bc.bitmap); k++ { + bc.bitmap[k] = bc.bitmap[k] & value2.bitmap[k] + } + bc.cardinality = newcardinality + + if newcardinality <= arrayDefaultMaxSize { + return newArrayContainerFromBitmap(bc) + } + return bc +} + +func (bc *bitmapContainer) andNot(a container) container { + switch x := a.(type) { + case *arrayContainer: + return bc.andNotArray(x) + case *bitmapContainer: + return bc.andNotBitmap(x) + case *runContainer16: + return bc.andNotRun16(x) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container { + rcb := rc.toBitmapContainer() + return bc.andNotBitmap(rcb) +} + +func (bc *bitmapContainer) iandNot(a container) container { + //p("bitmapContainer.iandNot() starting") + + switch x := a.(type) { + case *arrayContainer: + return bc.iandNotArray(x) + case *bitmapContainer: + return bc.iandNotBitmapSurely(x) + case *runContainer16: + return bc.iandNotRun16(x) + } + panic("unsupported container type") +} + +func (bc *bitmapContainer) iandNotArray(ac *arrayContainer) container { + acb := ac.toBitmapContainer() + return bc.iandNotBitmapSurely(acb) +} + +func (bc *bitmapContainer) iandNotRun16(rc *runContainer16) container { + rcb := rc.toBitmapContainer() + return bc.iandNotBitmapSurely(rcb) +} + +func (bc *bitmapContainer) andNotArray(value2 *arrayContainer) container { + answer := bc.clone().(*bitmapContainer) + c := value2.getCardinality() + for k := 0; k < c; k++ { + vc := value2.content[k] + i := uint(vc) >> 6 + oldv := answer.bitmap[i] + newv := oldv &^ (uint64(1) << (vc % 64)) + answer.bitmap[i] = newv + answer.cardinality -= int((oldv ^ newv) >> (vc % 64)) + } + if answer.cardinality <= arrayDefaultMaxSize { + return answer.toArrayContainer() + } + return answer +} + +func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container { + newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) + if newCardinality > arrayDefaultMaxSize { + answer := newBitmapContainer() + for k := 0; k < len(answer.bitmap); k++ { + answer.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] + } + answer.cardinality = newCardinality + return answer + } + ac := newArrayContainerSize(newCardinality) + fillArrayANDNOT(ac.content, bc.bitmap, value2.bitmap) + return ac +} + +func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer { + newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) + for k := 0; k < len(bc.bitmap); k++ { + bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] + } + bc.cardinality = newCardinality + return bc +} + +func (bc *bitmapContainer) contains(i uint16) bool { //testbit + x := uint(i) + w := bc.bitmap[x>>6] + mask := uint64(1) << (x & 63) + return (w & mask) != 0 +} + +func (bc *bitmapContainer) bitValue(i uint16) uint64 { + x := uint(i) + w := bc.bitmap[x>>6] + return (w >> (x & 63)) & 1 +} + +func (bc *bitmapContainer) loadData(arrayContainer *arrayContainer) { + bc.cardinality = arrayContainer.getCardinality() + c := arrayContainer.getCardinality() + for k := 0; k < c; k++ { + x := arrayContainer.content[k] + i := int(x) / 64 + bc.bitmap[i] |= (uint64(1) << uint(x%64)) + } +} + +func (bc *bitmapContainer) toArrayContainer() *arrayContainer { + ac := &arrayContainer{} + ac.loadData(bc) + return ac +} + +func (bc *bitmapContainer) fillArray(container []uint16) { + //TODO: rewrite in assembly + pos := 0 + base := 0 + for k := 0; k < len(bc.bitmap); k++ { + bitset := bc.bitmap[k] + for bitset != 0 { + t := bitset & -bitset + container[pos] = uint16((base + int(popcount(t-1)))) + pos = pos + 1 + bitset ^= t + } + base += 64 + } +} + +func (bc *bitmapContainer) NextSetBit(i int) int { + x := i / 64 + if x >= len(bc.bitmap) { + return -1 + } + w := bc.bitmap[x] + w = w >> uint(i%64) + if w != 0 { + return i + countTrailingZeros(w) + } + x++ + for ; x < len(bc.bitmap); x++ { + if bc.bitmap[x] != 0 { + return (x * 64) + countTrailingZeros(bc.bitmap[x]) + } + } + return -1 +} + +// reference the java implementation +// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 +// +func (bc *bitmapContainer) numberOfRuns() int { + if bc.cardinality == 0 { + return 0 + } + + var numRuns uint64 + nextWord := bc.bitmap[0] + + for i := 0; i < len(bc.bitmap)-1; i++ { + word := nextWord + nextWord = bc.bitmap[i+1] + numRuns += popcount((^word)&(word<<1)) + ((word >> 63) &^ nextWord) + } + + word := nextWord + numRuns += popcount((^word) & (word << 1)) + if (word & 0x8000000000000000) != 0 { + numRuns++ + } + + return int(numRuns) +} + +// convert to run or array *if needed* +func (bc *bitmapContainer) toEfficientContainer() container { + + numRuns := bc.numberOfRuns() + + sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns) + sizeAsBitmapContainer := bitmapContainerSizeInBytes() + card := bc.getCardinality() + sizeAsArrayContainer := arrayContainerSizeInBytes(card) + + if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { + return newRunContainer16FromBitmapContainer(bc) + } + if card <= arrayDefaultMaxSize { + return bc.toArrayContainer() + } + return bc +} + +func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer { + + if len(rc.iv) == 1 { + return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last())) + } + + bc := newBitmapContainer() + for i := range rc.iv { + setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1) + bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start) + } + //bc.computeCardinality() + return bc +} + +func (bc *bitmapContainer) containerType() contype { + return bitmapContype +} diff --git a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go new file mode 100644 index 0000000000..f6c053e650 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer_gen.go @@ -0,0 +1,415 @@ +package roaring + +// NOTE: THIS FILE WAS PRODUCED BY THE +// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) +// DO NOT EDIT + +import "github.com/tinylib/msgp/msgp" + +// DecodeMsg implements msgp.Decodable +func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zbzg uint32 + zbzg, err = dc.ReadMapHeader() + if err != nil { + return + } + for zbzg > 0 { + zbzg-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "cardinality": + z.cardinality, err = dc.ReadInt() + if err != nil { + return + } + case "bitmap": + var zbai uint32 + zbai, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.bitmap) >= int(zbai) { + z.bitmap = (z.bitmap)[:zbai] + } else { + z.bitmap = make([]uint64, zbai) + } + for zxvk := range z.bitmap { + z.bitmap[zxvk], err = dc.ReadUint64() + if err != nil { + return + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "cardinality" + err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) + if err != nil { + return err + } + err = en.WriteInt(z.cardinality) + if err != nil { + return + } + // write "bitmap" + err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.bitmap))) + if err != nil { + return + } + for zxvk := range z.bitmap { + err = en.WriteUint64(z.bitmap[zxvk]) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "cardinality" + o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) + o = msgp.AppendInt(o, z.cardinality) + // string "bitmap" + o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) + o = msgp.AppendArrayHeader(o, uint32(len(z.bitmap))) + for zxvk := range z.bitmap { + o = msgp.AppendUint64(o, z.bitmap[zxvk]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zcmr uint32 + zcmr, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zcmr > 0 { + zcmr-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "cardinality": + z.cardinality, bts, err = msgp.ReadIntBytes(bts) + if err != nil { + return + } + case "bitmap": + var zajw uint32 + zajw, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.bitmap) >= int(zajw) { + z.bitmap = (z.bitmap)[:zajw] + } else { + z.bitmap = make([]uint64, zajw) + } + for zxvk := range z.bitmap { + z.bitmap[zxvk], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + return + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *bitmapContainer) Msgsize() (s int) { + s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) + return +} + +// DecodeMsg implements msgp.Decodable +func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zhct uint32 + zhct, err = dc.ReadMapHeader() + if err != nil { + return + } + for zhct > 0 { + zhct-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "ptr": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + return + } + z.ptr = nil + } else { + if z.ptr == nil { + z.ptr = new(bitmapContainer) + } + var zcua uint32 + zcua, err = dc.ReadMapHeader() + if err != nil { + return + } + for zcua > 0 { + zcua-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "cardinality": + z.ptr.cardinality, err = dc.ReadInt() + if err != nil { + return + } + case "bitmap": + var zxhx uint32 + zxhx, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.ptr.bitmap) >= int(zxhx) { + z.ptr.bitmap = (z.ptr.bitmap)[:zxhx] + } else { + z.ptr.bitmap = make([]uint64, zxhx) + } + for zwht := range z.ptr.bitmap { + z.ptr.bitmap[zwht], err = dc.ReadUint64() + if err != nil { + return + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "i": + z.i, err = dc.ReadInt() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "ptr" + err = en.Append(0x82, 0xa3, 0x70, 0x74, 0x72) + if err != nil { + return err + } + if z.ptr == nil { + err = en.WriteNil() + if err != nil { + return + } + } else { + // map header, size 2 + // write "cardinality" + err = en.Append(0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) + if err != nil { + return err + } + err = en.WriteInt(z.ptr.cardinality) + if err != nil { + return + } + // write "bitmap" + err = en.Append(0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.ptr.bitmap))) + if err != nil { + return + } + for zwht := range z.ptr.bitmap { + err = en.WriteUint64(z.ptr.bitmap[zwht]) + if err != nil { + return + } + } + } + // write "i" + err = en.Append(0xa1, 0x69) + if err != nil { + return err + } + err = en.WriteInt(z.i) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "ptr" + o = append(o, 0x82, 0xa3, 0x70, 0x74, 0x72) + if z.ptr == nil { + o = msgp.AppendNil(o) + } else { + // map header, size 2 + // string "cardinality" + o = append(o, 0x82, 0xab, 0x63, 0x61, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x74, 0x79) + o = msgp.AppendInt(o, z.ptr.cardinality) + // string "bitmap" + o = append(o, 0xa6, 0x62, 0x69, 0x74, 0x6d, 0x61, 0x70) + o = msgp.AppendArrayHeader(o, uint32(len(z.ptr.bitmap))) + for zwht := range z.ptr.bitmap { + o = msgp.AppendUint64(o, z.ptr.bitmap[zwht]) + } + } + // string "i" + o = append(o, 0xa1, 0x69) + o = msgp.AppendInt(o, z.i) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zlqf uint32 + zlqf, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zlqf > 0 { + zlqf-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "ptr": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.ptr = nil + } else { + if z.ptr == nil { + z.ptr = new(bitmapContainer) + } + var zdaf uint32 + zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zdaf > 0 { + zdaf-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "cardinality": + z.ptr.cardinality, bts, err = msgp.ReadIntBytes(bts) + if err != nil { + return + } + case "bitmap": + var zpks uint32 + zpks, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.ptr.bitmap) >= int(zpks) { + z.ptr.bitmap = (z.ptr.bitmap)[:zpks] + } else { + z.ptr.bitmap = make([]uint64, zpks) + } + for zwht := range z.ptr.bitmap { + z.ptr.bitmap[zwht], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + return + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "i": + z.i, bts, err = msgp.ReadIntBytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *bitmapContainerShortIterator) Msgsize() (s int) { + s = 1 + 4 + if z.ptr == nil { + s += msgp.NilSize + } else { + s += 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.ptr.bitmap) * (msgp.Uint64Size)) + } + s += 2 + msgp.IntSize + return +} diff --git a/vendor/github.com/RoaringBitmap/roaring/ctz.go b/vendor/github.com/RoaringBitmap/roaring/ctz.go new file mode 100644 index 0000000000..e399dddebd --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/ctz.go @@ -0,0 +1,11 @@ +// +build go1.9 +// "go1.9", from Go version 1.9 onward +// See https://golang.org/pkg/go/build/#hdr-Build_Constraints + +package roaring + +import "math/bits" + +func countTrailingZeros(x uint64) int { + return bits.TrailingZeros64(x) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go b/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go new file mode 100644 index 0000000000..80220e6bee --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go @@ -0,0 +1,71 @@ +// +build !go1.9 + +package roaring + +// Reuse of portions of go/src/math/big standard lib code +// under this license: +/* +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +const deBruijn32 = 0x077CB531 + +var deBruijn32Lookup = []byte{ + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, +} + +const deBruijn64 = 0x03f79d71b4ca8b09 + +var deBruijn64Lookup = []byte{ + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +} + +// trailingZeroBits returns the number of consecutive least significant zero +// bits of x. +func countTrailingZeros(x uint64) int { + // x & -x leaves only the right-most bit set in the word. Let k be the + // index of that bit. Since only a single bit is set, the value is two + // to the power of k. Multiplying by a power of two is equivalent to + // left shifting, in this case by k bits. The de Bruijn constant is + // such that all six bit, consecutive substrings are distinct. + // Therefore, if we have a left shifted version of this constant we can + // find by how many bits it was shifted by looking at which six bit + // substring ended up at the top of the word. + // (Knuth, volume 4, section 7.3.1) + if x == 0 { + // We have to special case 0; the fomula + // below doesn't work for 0. + return 64 + } + return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58]) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go b/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go new file mode 100644 index 0000000000..762e500ed8 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go @@ -0,0 +1,215 @@ +package roaring + +import ( + "container/heap" +) + +// Or function that requires repairAfterLazy +func lazyOR(x1, x2 *Bitmap) *Bitmap { + answer := NewBitmap() + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() +main: + for (pos1 < length1) && (pos2 < length2) { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + + for { + if s1 < s2 { + answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) + pos1++ + if pos1 == length1 { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 > s2 { + answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) + pos2++ + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { + c1 := x1.highlowcontainer.getContainerAtIndex(pos1) + switch t := c1.(type) { + case *arrayContainer: + c1 = t.toBitmapContainer() + case *runContainer16: + if !t.isFull() { + c1 = t.toBitmapContainer() + } + } + + answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false) + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } + if pos1 == length1 { + answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } else if pos2 == length2 { + answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) + } + return answer +} + +// In-place Or function that requires repairAfterLazy +func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap { + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() +main: + for (pos1 < length1) && (pos2 < length2) { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + + for { + if s1 < s2 { + pos1++ + if pos1 == length1 { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 > s2 { + x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone()) + pos2++ + pos1++ + length1++ + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { + c1 := x1.highlowcontainer.getContainerAtIndex(pos1) + switch t := c1.(type) { + case *arrayContainer: + c1 = t.toBitmapContainer() + case *runContainer16: + if !t.isFull() { + c1 = t.toBitmapContainer() + } + case *bitmapContainer: + c1 = x1.highlowcontainer.getWritableContainerAtIndex(pos1) + } + + x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2)) + x1.highlowcontainer.needCopyOnWrite[pos1] = false + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } + if pos1 == length1 { + x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } + return x1 +} + +// to be called after lazy aggregates +func (x1 *Bitmap) repairAfterLazy() { + for pos := 0; pos < x1.highlowcontainer.size(); pos++ { + c := x1.highlowcontainer.getContainerAtIndex(pos) + switch c.(type) { + case *bitmapContainer: + if c.(*bitmapContainer).cardinality == invalidCardinality { + c = x1.highlowcontainer.getWritableContainerAtIndex(pos) + c.(*bitmapContainer).computeCardinality() + if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize { + x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer()) + } else if c.(*bitmapContainer).isFull() { + x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16)) + } + } + } + } +} + +// FastAnd computes the intersection between many bitmaps quickly +// Compared to the And function, it can take many bitmaps as input, thus saving the trouble +// of manually calling "And" many times. +func FastAnd(bitmaps ...*Bitmap) *Bitmap { + if len(bitmaps) == 0 { + return NewBitmap() + } else if len(bitmaps) == 1 { + return bitmaps[0].Clone() + } + answer := And(bitmaps[0], bitmaps[1]) + for _, bm := range bitmaps[2:] { + answer.And(bm) + } + return answer +} + +// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly. +// It might also be faster than calling Or repeatedly. +func FastOr(bitmaps ...*Bitmap) *Bitmap { + if len(bitmaps) == 0 { + return NewBitmap() + } else if len(bitmaps) == 1 { + return bitmaps[0].Clone() + } + answer := lazyOR(bitmaps[0], bitmaps[1]) + for _, bm := range bitmaps[2:] { + answer = answer.lazyOR(bm) + } + // here is where repairAfterLazy is called. + answer.repairAfterLazy() + return answer +} + +// HeapOr computes the union between many bitmaps quickly using a heap. +// It might be faster than calling Or repeatedly. +func HeapOr(bitmaps ...*Bitmap) *Bitmap { + if len(bitmaps) == 0 { + return NewBitmap() + } + // TODO: for better speed, we could do the operation lazily, see Java implementation + pq := make(priorityQueue, len(bitmaps)) + for i, bm := range bitmaps { + pq[i] = &item{bm, i} + } + heap.Init(&pq) + + for pq.Len() > 1 { + x1 := heap.Pop(&pq).(*item) + x2 := heap.Pop(&pq).(*item) + heap.Push(&pq, &item{Or(x1.value, x2.value), 0}) + } + return heap.Pop(&pq).(*item).value +} + +// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated). +// Internally, this function uses a heap. +// It might be faster than calling Xor repeatedly. +func HeapXor(bitmaps ...*Bitmap) *Bitmap { + if len(bitmaps) == 0 { + return NewBitmap() + } + + pq := make(priorityQueue, len(bitmaps)) + for i, bm := range bitmaps { + pq[i] = &item{bm, i} + } + heap.Init(&pq) + + for pq.Len() > 1 { + x1 := heap.Pop(&pq).(*item) + x2 := heap.Pop(&pq).(*item) + heap.Push(&pq, &item{Xor(x1.value, x2.value), 0}) + } + return heap.Pop(&pq).(*item).value +} diff --git a/vendor/github.com/RoaringBitmap/roaring/manyiterator.go b/vendor/github.com/RoaringBitmap/roaring/manyiterator.go new file mode 100644 index 0000000000..b4f630a7b4 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/manyiterator.go @@ -0,0 +1,23 @@ +package roaring + +type manyIterable interface { + nextMany(hs uint32, buf []uint32) int +} + +type manyIterator struct { + slice []uint16 + loc int +} + +func (si *manyIterator) nextMany(hs uint32, buf []uint32) int { + n := 0 + l := si.loc + s := si.slice + for n < len(buf) && l < len(s) { + buf[n] = uint32(s[l]) | hs + l++ + n++ + } + si.loc = l + return n +} diff --git a/vendor/github.com/RoaringBitmap/roaring/parallel.go b/vendor/github.com/RoaringBitmap/roaring/parallel.go new file mode 100644 index 0000000000..09f94fe83c --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/parallel.go @@ -0,0 +1,613 @@ +package roaring + +import ( + "container/heap" + "fmt" + "runtime" + "sync" +) + +var defaultWorkerCount = runtime.NumCPU() + +type bitmapContainerKey struct { + key uint16 + idx int + bitmap *Bitmap +} + +type multipleContainers struct { + key uint16 + containers []container + idx int +} + +type keyedContainer struct { + key uint16 + container container + idx int +} + +type bitmapContainerHeap []bitmapContainerKey + +func (h bitmapContainerHeap) Len() int { return len(h) } +func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key } +func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *bitmapContainerHeap) Push(x interface{}) { + // Push and Pop use pointer receivers because they modify the slice's length, + // not just its contents. + *h = append(*h, x.(bitmapContainerKey)) +} + +func (h *bitmapContainerHeap) Pop() interface{} { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +func (h bitmapContainerHeap) Peek() bitmapContainerKey { + return h[0] +} + +func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) { + k := h.Peek() + key = k.key + container = k.bitmap.highlowcontainer.containers[k.idx] + + newIdx := k.idx + 1 + if newIdx < k.bitmap.highlowcontainer.size() { + k = bitmapContainerKey{ + k.bitmap.highlowcontainer.keys[newIdx], + newIdx, + k.bitmap, + } + (*h)[0] = k + heap.Fix(h, 0) + } else { + heap.Pop(h) + } + + return +} + +func (h *bitmapContainerHeap) Next(containers []container) multipleContainers { + if h.Len() == 0 { + return multipleContainers{} + } + + key, container := h.popIncrementing() + containers = append(containers, container) + + for h.Len() > 0 && key == h.Peek().key { + _, container = h.popIncrementing() + containers = append(containers, container) + } + + return multipleContainers{ + key, + containers, + -1, + } +} + +func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap { + // Initialize heap + var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps)) + for _, bitmap := range bitmaps { + if !bitmap.IsEmpty() { + key := bitmapContainerKey{ + bitmap.highlowcontainer.keys[0], + 0, + bitmap, + } + h = append(h, key) + } + } + + heap.Init(&h) + + return h +} + +func repairAfterLazy(c container) container { + switch t := c.(type) { + case *bitmapContainer: + if t.cardinality == invalidCardinality { + t.computeCardinality() + } + + if t.getCardinality() <= arrayDefaultMaxSize { + return t.toArrayContainer() + } else if c.(*bitmapContainer).isFull() { + return newRunContainer16Range(0, MaxUint16) + } + } + + return c +} + +func toBitmapContainer(c container) container { + switch t := c.(type) { + case *arrayContainer: + return t.toBitmapContainer() + case *runContainer16: + if !t.isFull() { + return t.toBitmapContainer() + } + } + return c +} + +func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { + expectedKeys := -1 + appendedKeys := 0 + keys := make([]uint16, 0) + containers := make([]container, 0) + for appendedKeys != expectedKeys { + select { + case item := <-resultChan: + if len(keys) <= item.idx { + keys = append(keys, make([]uint16, item.idx-len(keys)+1)...) + containers = append(containers, make([]container, item.idx-len(containers)+1)...) + } + keys[item.idx] = item.key + containers[item.idx] = item.container + + appendedKeys++ + case msg := <-expectedKeysChan: + expectedKeys = msg + } + } + answer := &Bitmap{ + roaringArray{ + make([]uint16, 0, expectedKeys), + make([]container, 0, expectedKeys), + make([]bool, 0, expectedKeys), + false, + nil, + }, + } + for i := range keys { + if containers[i] != nil { // in case a resulting container was empty, see ParAnd function + answer.highlowcontainer.appendContainer(keys[i], containers[i], false) + } + } + + bitmapChan <- answer +} + +// ParHeapOr computes the union (OR) of all provided bitmaps in parallel, +// where the parameter "parallelism" determines how many workers are to be used +// (if it is set to 0, a default number of workers is chosen) +// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr +func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { + + bitmapCount := len(bitmaps) + if bitmapCount == 0 { + return NewBitmap() + } else if bitmapCount == 1 { + return bitmaps[0].Clone() + } + + if parallelism == 0 { + parallelism = defaultWorkerCount + } + + h := newBitmapContainerHeap(bitmaps...) + + bitmapChan := make(chan *Bitmap) + inputChan := make(chan multipleContainers, 128) + resultChan := make(chan keyedContainer, 32) + expectedKeysChan := make(chan int) + + pool := sync.Pool{ + New: func() interface{} { + return make([]container, 0, len(bitmaps)) + }, + } + + orFunc := func() { + // Assumes only structs with >=2 containers are passed + for input := range inputChan { + c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1]) + for _, next := range input.containers[2:] { + c = c.lazyIOR(next) + } + c = repairAfterLazy(c) + kx := keyedContainer{ + input.key, + c, + input.idx, + } + resultChan <- kx + pool.Put(input.containers[:0]) + } + } + + go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) + + for i := 0; i < parallelism; i++ { + go orFunc() + } + + idx := 0 + for h.Len() > 0 { + ck := h.Next(pool.Get().([]container)) + if len(ck.containers) == 1 { + resultChan <- keyedContainer{ + ck.key, + ck.containers[0], + idx, + } + pool.Put(ck.containers[:0]) + } else { + ck.idx = idx + inputChan <- ck + } + idx++ + } + expectedKeysChan <- idx + + bitmap := <-bitmapChan + + close(inputChan) + close(resultChan) + close(expectedKeysChan) + + return bitmap +} + +// ParAnd computes the intersection (AND) of all provided bitmaps in parallel, +// where the parameter "parallelism" determines how many workers are to be used +// (if it is set to 0, a default number of workers is chosen) +func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { + bitmapCount := len(bitmaps) + if bitmapCount == 0 { + return NewBitmap() + } else if bitmapCount == 1 { + return bitmaps[0].Clone() + } + + if parallelism == 0 { + parallelism = defaultWorkerCount + } + + h := newBitmapContainerHeap(bitmaps...) + + bitmapChan := make(chan *Bitmap) + inputChan := make(chan multipleContainers, 128) + resultChan := make(chan keyedContainer, 32) + expectedKeysChan := make(chan int) + + andFunc := func() { + // Assumes only structs with >=2 containers are passed + for input := range inputChan { + c := input.containers[0].and(input.containers[1]) + for _, next := range input.containers[2:] { + if c.getCardinality() == 0 { + break + } + c = c.iand(next) + } + + // Send a nil explicitly if the result of the intersection is an empty container + if c.getCardinality() == 0 { + c = nil + } + + kx := keyedContainer{ + input.key, + c, + input.idx, + } + resultChan <- kx + } + } + + go appenderRoutine(bitmapChan, resultChan, expectedKeysChan) + + for i := 0; i < parallelism; i++ { + go andFunc() + } + + idx := 0 + for h.Len() > 0 { + ck := h.Next(make([]container, 0, 4)) + if len(ck.containers) == bitmapCount { + ck.idx = idx + inputChan <- ck + idx++ + } + } + expectedKeysChan <- idx + + bitmap := <-bitmapChan + + close(inputChan) + close(resultChan) + close(expectedKeysChan) + + return bitmap +} + +// ParOr computes the union (OR) of all provided bitmaps in parallel, +// where the parameter "parallelism" determines how many workers are to be used +// (if it is set to 0, a default number of workers is chosen) +func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { + var lKey uint16 = MaxUint16 + var hKey uint16 = 0 + + bitmapsFiltered := bitmaps[:0] + for _, b := range bitmaps { + if !b.IsEmpty() { + bitmapsFiltered = append(bitmapsFiltered, b) + } + } + bitmaps = bitmapsFiltered + + for _, b := range bitmaps { + lKey = minOfUint16(lKey, b.highlowcontainer.keys[0]) + hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1]) + } + + if lKey == MaxUint16 && hKey == 0 { + return New() + } else if len(bitmaps) == 1 { + return bitmaps[0] + } + + keyRange := hKey - lKey + 1 + if keyRange == 1 { + // revert to FastOr. Since the key range is 0 + // no container-level aggregation parallelism is achievable + return FastOr(bitmaps...) + } + + if parallelism == 0 { + parallelism = defaultWorkerCount + } + + var chunkSize int + var chunkCount int + if parallelism*4 > int(keyRange) { + chunkSize = 1 + chunkCount = int(keyRange) + } else { + chunkCount = parallelism * 4 + chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount + } + + if chunkCount*chunkSize < int(keyRange) { + // it's fine to panic to indicate an implementation error + panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange)) + } + + chunks := make([]*roaringArray, chunkCount) + + chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount))) + chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount))) + + orFunc := func() { + for spec := range chunkSpecChan { + ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end) + for _, b := range bitmaps[2:] { + ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end) + } + + for i, c := range ra.containers { + ra.containers[i] = repairAfterLazy(c) + } + + chunkChan <- parChunk{ra, spec.idx} + } + } + + for i := 0; i < parallelism; i++ { + go orFunc() + } + + go func() { + for i := 0; i < chunkCount; i++ { + spec := parChunkSpec{ + start: uint16(int(lKey) + i*chunkSize), + end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))), + idx: int(i), + } + chunkSpecChan <- spec + } + }() + + chunksRemaining := chunkCount + for chunk := range chunkChan { + chunks[chunk.idx] = chunk.ra + chunksRemaining-- + if chunksRemaining == 0 { + break + } + } + close(chunkChan) + close(chunkSpecChan) + + containerCount := 0 + for _, chunk := range chunks { + containerCount += chunk.size() + } + + result := Bitmap{ + roaringArray{ + containers: make([]container, containerCount), + keys: make([]uint16, containerCount), + needCopyOnWrite: make([]bool, containerCount), + }, + } + + resultOffset := 0 + for _, chunk := range chunks { + copy(result.highlowcontainer.containers[resultOffset:], chunk.containers) + copy(result.highlowcontainer.keys[resultOffset:], chunk.keys) + copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite) + resultOffset += chunk.size() + } + + return &result +} + +type parChunkSpec struct { + start uint16 + end uint16 + idx int +} + +type parChunk struct { + ra *roaringArray + idx int +} + +func (c parChunk) size() int { + return c.ra.size() +} + +func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int { + for idx, key := range ra.keys { + if key >= start && key <= last { + return idx + } else if key > last { + break + } + } + return ra.size() +} + +func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { + answer := newRoaringArray() + length1 := ra1.size() + length2 := ra2.size() + + idx1 := parNaiveStartAt(ra1, start, last) + idx2 := parNaiveStartAt(ra2, start, last) + + var key1 uint16 + var key2 uint16 + if idx1 < length1 && idx2 < length2 { + key1 = ra1.getKeyAtIndex(idx1) + key2 = ra2.getKeyAtIndex(idx2) + + for key1 <= last && key2 <= last { + + if key1 < key2 { + answer.appendCopy(*ra1, idx1) + idx1++ + if idx1 == length1 { + break + } + key1 = ra1.getKeyAtIndex(idx1) + } else if key1 > key2 { + answer.appendCopy(*ra2, idx2) + idx2++ + if idx2 == length2 { + break + } + key2 = ra2.getKeyAtIndex(idx2) + } else { + c1 := ra1.getFastContainerAtIndex(idx1, false) + + answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false) + idx1++ + idx2++ + if idx1 == length1 || idx2 == length2 { + break + } + + key1 = ra1.getKeyAtIndex(idx1) + key2 = ra2.getKeyAtIndex(idx2) + } + } + } + + if idx2 < length2 { + key2 = ra2.getKeyAtIndex(idx2) + for key2 <= last { + answer.appendCopy(*ra2, idx2) + idx2++ + if idx2 == length2 { + break + } + key2 = ra2.getKeyAtIndex(idx2) + } + } + + if idx1 < length1 { + key1 = ra1.getKeyAtIndex(idx1) + for key1 <= last { + answer.appendCopy(*ra1, idx1) + idx1++ + if idx1 == length1 { + break + } + key1 = ra1.getKeyAtIndex(idx1) + } + } + return answer +} + +func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray { + length1 := ra1.size() + length2 := ra2.size() + + idx1 := 0 + idx2 := parNaiveStartAt(ra2, start, last) + + var key1 uint16 + var key2 uint16 + if idx1 < length1 && idx2 < length2 { + key1 = ra1.getKeyAtIndex(idx1) + key2 = ra2.getKeyAtIndex(idx2) + + for key1 <= last && key2 <= last { + if key1 < key2 { + idx1++ + if idx1 >= length1 { + break + } + key1 = ra1.getKeyAtIndex(idx1) + } else if key1 > key2 { + ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2)) + ra1.needCopyOnWrite[idx1] = true + idx2++ + idx1++ + length1++ + if idx2 >= length2 { + break + } + key2 = ra2.getKeyAtIndex(idx2) + } else { + c1 := ra1.getFastContainerAtIndex(idx1, true) + + ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2)) + ra1.needCopyOnWrite[idx1] = false + idx1++ + idx2++ + if idx1 >= length1 || idx2 >= length2 { + break + } + + key1 = ra1.getKeyAtIndex(idx1) + key2 = ra2.getKeyAtIndex(idx2) + } + } + } + if idx2 < length2 { + key2 = ra2.getKeyAtIndex(idx2) + for key2 <= last { + ra1.appendCopy(*ra2, idx2) + idx2++ + if idx2 >= length2 { + break + } + key2 = ra2.getKeyAtIndex(idx2) + } + } + return ra1 +} diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt.go b/vendor/github.com/RoaringBitmap/roaring/popcnt.go new file mode 100644 index 0000000000..9d99508ce0 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt.go @@ -0,0 +1,11 @@ +// +build go1.9 +// "go1.9", from Go version 1.9 onward +// See https://golang.org/pkg/go/build/#hdr-Build_Constraints + +package roaring + +import "math/bits" + +func popcount(x uint64) uint64 { + return uint64(bits.OnesCount64(x)) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s b/vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s new file mode 100644 index 0000000000..1f13fa2eca --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s @@ -0,0 +1,103 @@ +// +build amd64,!appengine,!go1.9 + +TEXT ·hasAsm(SB),4,$0-1 +MOVQ $1, AX +CPUID +SHRQ $23, CX +ANDQ $1, CX +MOVB CX, ret+0(FP) +RET + +#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 + +TEXT ·popcntSliceAsm(SB),4,$0-32 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntSliceEnd +popcntSliceLoop: +BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX +ADDQ DX, AX +ADDQ $8, SI +LOOP popcntSliceLoop +popcntSliceEnd: +MOVQ AX, ret+24(FP) +RET + +TEXT ·popcntMaskSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntMaskSliceEnd +MOVQ m+24(FP), DI +popcntMaskSliceLoop: +MOVQ (DI), DX +NOTQ DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntMaskSliceLoop +popcntMaskSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntAndSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntAndSliceEnd +MOVQ m+24(FP), DI +popcntAndSliceLoop: +MOVQ (DI), DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntAndSliceLoop +popcntAndSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntOrSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntOrSliceEnd +MOVQ m+24(FP), DI +popcntOrSliceLoop: +MOVQ (DI), DX +ORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntOrSliceLoop +popcntOrSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntXorSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntXorSliceEnd +MOVQ m+24(FP), DI +popcntXorSliceLoop: +MOVQ (DI), DX +XORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntXorSliceLoop +popcntXorSliceEnd: +MOVQ AX, ret+48(FP) +RET diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go new file mode 100644 index 0000000000..882d7f4ecf --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go @@ -0,0 +1,67 @@ +// +build amd64,!appengine,!go1.9 + +package roaring + +// *** the following functions are defined in popcnt_amd64.s + +//go:noescape + +func hasAsm() bool + +// useAsm is a flag used to select the GO or ASM implementation of the popcnt function +var useAsm = hasAsm() + +//go:noescape + +func popcntSliceAsm(s []uint64) uint64 + +//go:noescape + +func popcntMaskSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntAndSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntOrSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntXorSliceAsm(s, m []uint64) uint64 + +func popcntSlice(s []uint64) uint64 { + if useAsm { + return popcntSliceAsm(s) + } + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + if useAsm { + return popcntMaskSliceAsm(s, m) + } + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + if useAsm { + return popcntAndSliceAsm(s, m) + } + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + if useAsm { + return popcntOrSliceAsm(s, m) + } + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + if useAsm { + return popcntXorSliceAsm(s, m) + } + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go new file mode 100644 index 0000000000..7ae82d4c83 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go @@ -0,0 +1,17 @@ +// +build !go1.9 + +package roaring + +// bit population count, take from +// https://code.google.com/p/go/issues/detail?id=4988#c11 +// credit: https://code.google.com/u/arnehormann/ +// credit: https://play.golang.org/p/U7SogJ7psJ +// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +func popcount(x uint64) uint64 { + x -= (x >> 1) & 0x5555555555555555 + x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 + x += x >> 4 + x &= 0x0f0f0f0f0f0f0f0f + x *= 0x0101010101010101 + return x >> 56 +} diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go new file mode 100644 index 0000000000..edf2083f19 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go @@ -0,0 +1,23 @@ +// +build !amd64 appengine go1.9 + +package roaring + +func popcntSlice(s []uint64) uint64 { + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go b/vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go new file mode 100644 index 0000000000..d27c5f383d --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go @@ -0,0 +1,41 @@ +package roaring + +func popcntSliceGo(s []uint64) uint64 { + cnt := uint64(0) + for _, x := range s { + cnt += popcount(x) + } + return cnt +} + +func popcntMaskSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] &^ m[i]) + } + return cnt +} + +func popcntAndSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] & m[i]) + } + return cnt +} + +func popcntOrSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] | m[i]) + } + return cnt +} + +func popcntXorSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] ^ m[i]) + } + return cnt +} diff --git a/vendor/github.com/RoaringBitmap/roaring/priorityqueue.go b/vendor/github.com/RoaringBitmap/roaring/priorityqueue.go new file mode 100644 index 0000000000..9259a68163 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/priorityqueue.go @@ -0,0 +1,101 @@ +package roaring + +import "container/heap" + +///////////// +// The priorityQueue is used to keep Bitmaps sorted. +//////////// + +type item struct { + value *Bitmap + index int +} + +type priorityQueue []*item + +func (pq priorityQueue) Len() int { return len(pq) } + +func (pq priorityQueue) Less(i, j int) bool { + return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes() +} + +func (pq priorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] + pq[i].index = i + pq[j].index = j +} + +func (pq *priorityQueue) Push(x interface{}) { + n := len(*pq) + item := x.(*item) + item.index = n + *pq = append(*pq, item) +} + +func (pq *priorityQueue) Pop() interface{} { + old := *pq + n := len(old) + item := old[n-1] + item.index = -1 // for safety + *pq = old[0 : n-1] + return item +} + +func (pq *priorityQueue) update(item *item, value *Bitmap) { + item.value = value + heap.Fix(pq, item.index) +} + +///////////// +// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted. +//////////// + +type containeritem struct { + value *Bitmap + keyindex int + index int +} + +type containerPriorityQueue []*containeritem + +func (pq containerPriorityQueue) Len() int { return len(pq) } + +func (pq containerPriorityQueue) Less(i, j int) bool { + k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex) + k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex) + if k1 != k2 { + return k1 < k2 + } + c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex) + c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex) + + return c1.getCardinality() > c2.getCardinality() +} + +func (pq containerPriorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] + pq[i].index = i + pq[j].index = j +} + +func (pq *containerPriorityQueue) Push(x interface{}) { + n := len(*pq) + item := x.(*containeritem) + item.index = n + *pq = append(*pq, item) +} + +func (pq *containerPriorityQueue) Pop() interface{} { + old := *pq + n := len(old) + item := old[n-1] + item.index = -1 // for safety + *pq = old[0 : n-1] + return item +} + +//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) { +// item.value = value +// item.keyindex = keyindex +// heap.Fix(pq, item.index) +//} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle.go b/vendor/github.com/RoaringBitmap/roaring/rle.go new file mode 100644 index 0000000000..8f3d4edd68 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rle.go @@ -0,0 +1,1667 @@ +package roaring + +// +// Copyright (c) 2016 by the roaring authors. +// Licensed under the Apache License, Version 2.0. +// +// We derive a few lines of code from the sort.Search +// function in the golang standard library. That function +// is Copyright 2009 The Go Authors, and licensed +// under the following BSD-style license. +/* +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +import ( + "fmt" + "sort" + "unsafe" +) + +//go:generate msgp -unexported + +// runContainer32 does run-length encoding of sets of +// uint32 integers. +type runContainer32 struct { + iv []interval32 + card int64 + + // avoid allocation during search + myOpts searchOptions `msg:"-"` +} + +// interval32 is the internal to runContainer32 +// structure that maintains the individual [Start, last] +// closed intervals. +type interval32 struct { + start uint32 + last uint32 +} + +// runlen returns the count of integers in the interval. +func (iv interval32) runlen() int64 { + return 1 + int64(iv.last) - int64(iv.start) +} + +// String produces a human viewable string of the contents. +func (iv interval32) String() string { + return fmt.Sprintf("[%d, %d]", iv.start, iv.last) +} + +func ivalString32(iv []interval32) string { + var s string + var j int + var p interval32 + for j, p = range iv { + s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last) + } + return s +} + +// String produces a human viewable string of the contents. +func (rc *runContainer32) String() string { + if len(rc.iv) == 0 { + return "runContainer32{}" + } + is := ivalString32(rc.iv) + return `runContainer32{` + is + `}` +} + +// uint32Slice is a sort.Sort convenience method +type uint32Slice []uint32 + +// Len returns the length of p. +func (p uint32Slice) Len() int { return len(p) } + +// Less returns p[i] < p[j] +func (p uint32Slice) Less(i, j int) bool { return p[i] < p[j] } + +// Swap swaps elements i and j. +func (p uint32Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +//msgp:ignore addHelper + +// addHelper helps build a runContainer32. +type addHelper32 struct { + runstart uint32 + runlen uint32 + actuallyAdded uint32 + m []interval32 + rc *runContainer32 +} + +func (ah *addHelper32) storeIval(runstart, runlen uint32) { + mi := interval32{start: runstart, last: runstart + runlen} + ah.m = append(ah.m, mi) +} + +func (ah *addHelper32) add(cur, prev uint32, i int) { + if cur == prev+1 { + ah.runlen++ + ah.actuallyAdded++ + } else { + if cur < prev { + panic(fmt.Sprintf("newRunContainer32FromVals sees "+ + "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+ + " before calling us with alreadySorted == true.", i, cur, prev)) + } + if cur == prev { + // ignore duplicates + } else { + ah.actuallyAdded++ + ah.storeIval(ah.runstart, ah.runlen) + ah.runstart = cur + ah.runlen = 0 + } + } +} + +// newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast] +func newRunContainer32Range(rangestart uint32, rangelast uint32) *runContainer32 { + rc := &runContainer32{} + rc.iv = append(rc.iv, interval32{start: rangestart, last: rangelast}) + return rc +} + +// newRunContainer32FromVals makes a new container from vals. +// +// For efficiency, vals should be sorted in ascending order. +// Ideally vals should not contain duplicates, but we detect and +// ignore them. If vals is already sorted in ascending order, then +// pass alreadySorted = true. Otherwise, for !alreadySorted, +// we will sort vals before creating a runContainer32 of them. +// We sort the original vals, so this will change what the +// caller sees in vals as a side effect. +func newRunContainer32FromVals(alreadySorted bool, vals ...uint32) *runContainer32 { + // keep this in sync with newRunContainer32FromArray below + + rc := &runContainer32{} + ah := addHelper32{rc: rc} + + if !alreadySorted { + sort.Sort(uint32Slice(vals)) + } + n := len(vals) + var cur, prev uint32 + switch { + case n == 0: + // nothing more + case n == 1: + ah.m = append(ah.m, interval32{start: vals[0], last: vals[0]}) + ah.actuallyAdded++ + default: + ah.runstart = vals[0] + ah.actuallyAdded++ + for i := 1; i < n; i++ { + prev = vals[i-1] + cur = vals[i] + ah.add(cur, prev, i) + } + ah.storeIval(ah.runstart, ah.runlen) + } + rc.iv = ah.m + rc.card = int64(ah.actuallyAdded) + return rc +} + +// newRunContainer32FromBitmapContainer makes a new run container from bc, +// somewhat efficiently. For reference, see the Java +// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192 +func newRunContainer32FromBitmapContainer(bc *bitmapContainer) *runContainer32 { + + rc := &runContainer32{} + nbrRuns := bc.numberOfRuns() + if nbrRuns == 0 { + return rc + } + rc.iv = make([]interval32, nbrRuns) + + longCtr := 0 // index of current long in bitmap + curWord := bc.bitmap[0] // its value + runCount := 0 + for { + // potentially multiword advance to first 1 bit + for curWord == 0 && longCtr < len(bc.bitmap)-1 { + longCtr++ + curWord = bc.bitmap[longCtr] + } + + if curWord == 0 { + // wrap up, no more runs + return rc + } + localRunStart := countTrailingZeros(curWord) + runStart := localRunStart + 64*longCtr + // stuff 1s into number's LSBs + curWordWith1s := curWord | (curWord - 1) + + // find the next 0, potentially in a later word + runEnd := 0 + for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 { + longCtr++ + curWordWith1s = bc.bitmap[longCtr] + } + + if curWordWith1s == maxWord { + // a final unterminated run of 1s + runEnd = wordSizeInBits + longCtr*64 + rc.iv[runCount].start = uint32(runStart) + rc.iv[runCount].last = uint32(runEnd) - 1 + return rc + } + localRunEnd := countTrailingZeros(^curWordWith1s) + runEnd = localRunEnd + longCtr*64 + rc.iv[runCount].start = uint32(runStart) + rc.iv[runCount].last = uint32(runEnd) - 1 + runCount++ + // now, zero out everything right of runEnd. + curWord = curWordWith1s & (curWordWith1s + 1) + // We've lathered and rinsed, so repeat... + } + +} + +// +// newRunContainer32FromArray populates a new +// runContainer32 from the contents of arr. +// +func newRunContainer32FromArray(arr *arrayContainer) *runContainer32 { + // keep this in sync with newRunContainer32FromVals above + + rc := &runContainer32{} + ah := addHelper32{rc: rc} + + n := arr.getCardinality() + var cur, prev uint32 + switch { + case n == 0: + // nothing more + case n == 1: + ah.m = append(ah.m, interval32{start: uint32(arr.content[0]), last: uint32(arr.content[0])}) + ah.actuallyAdded++ + default: + ah.runstart = uint32(arr.content[0]) + ah.actuallyAdded++ + for i := 1; i < n; i++ { + prev = uint32(arr.content[i-1]) + cur = uint32(arr.content[i]) + ah.add(cur, prev, i) + } + ah.storeIval(ah.runstart, ah.runlen) + } + rc.iv = ah.m + rc.card = int64(ah.actuallyAdded) + return rc +} + +// set adds the integers in vals to the set. Vals +// must be sorted in increasing order; if not, you should set +// alreadySorted to false, and we will sort them in place for you. +// (Be aware of this side effect -- it will affect the callers +// view of vals). +// +// If you have a small number of additions to an already +// big runContainer32, calling Add() may be faster. +func (rc *runContainer32) set(alreadySorted bool, vals ...uint32) { + + rc2 := newRunContainer32FromVals(alreadySorted, vals...) + un := rc.union(rc2) + rc.iv = un.iv + rc.card = 0 +} + +// canMerge returns true if the intervals +// a and b either overlap or they are +// contiguous and so can be merged into +// a single interval. +func canMerge32(a, b interval32) bool { + if int64(a.last)+1 < int64(b.start) { + return false + } + return int64(b.last)+1 >= int64(a.start) +} + +// haveOverlap differs from canMerge in that +// it tells you if the intersection of a +// and b would contain an element (otherwise +// it would be the empty set, and we return +// false). +func haveOverlap32(a, b interval32) bool { + if int64(a.last)+1 <= int64(b.start) { + return false + } + return int64(b.last)+1 > int64(a.start) +} + +// mergeInterval32s joins a and b into a +// new interval, and panics if it cannot. +func mergeInterval32s(a, b interval32) (res interval32) { + if !canMerge32(a, b) { + panic(fmt.Sprintf("cannot merge %#v and %#v", a, b)) + } + if b.start < a.start { + res.start = b.start + } else { + res.start = a.start + } + if b.last > a.last { + res.last = b.last + } else { + res.last = a.last + } + return +} + +// intersectInterval32s returns the intersection +// of a and b. The isEmpty flag will be true if +// a and b were disjoint. +func intersectInterval32s(a, b interval32) (res interval32, isEmpty bool) { + if !haveOverlap32(a, b) { + isEmpty = true + return + } + if b.start > a.start { + res.start = b.start + } else { + res.start = a.start + } + if b.last < a.last { + res.last = b.last + } else { + res.last = a.last + } + return +} + +// union merges two runContainer32s, producing +// a new runContainer32 with the union of rc and b. +func (rc *runContainer32) union(b *runContainer32) *runContainer32 { + + // rc is also known as 'a' here, but golint insisted we + // call it rc for consistency with the rest of the methods. + + var m []interval32 + + alim := int64(len(rc.iv)) + blim := int64(len(b.iv)) + + var na int64 // next from a + var nb int64 // next from b + + // merged holds the current merge output, which might + // get additional merges before being appended to m. + var merged interval32 + var mergedUsed bool // is merged being used at the moment? + + var cura interval32 // currently considering this interval32 from a + var curb interval32 // currently considering this interval32 from b + + pass := 0 + for na < alim && nb < blim { + pass++ + cura = rc.iv[na] + curb = b.iv[nb] + + if mergedUsed { + mergedUpdated := false + if canMerge32(cura, merged) { + merged = mergeInterval32s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + mergedUpdated = true + } + if canMerge32(curb, merged) { + merged = mergeInterval32s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + mergedUpdated = true + } + if !mergedUpdated { + // we know that merged is disjoint from cura and curb + m = append(m, merged) + mergedUsed = false + } + continue + + } else { + // !mergedUsed + if !canMerge32(cura, curb) { + if cura.start < curb.start { + m = append(m, cura) + na++ + } else { + m = append(m, curb) + nb++ + } + } else { + merged = mergeInterval32s(cura, curb) + mergedUsed = true + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + } + } + } + var aDone, bDone bool + if na >= alim { + aDone = true + } + if nb >= blim { + bDone = true + } + // finish by merging anything remaining into merged we can: + if mergedUsed { + if !aDone { + aAdds: + for na < alim { + cura = rc.iv[na] + if canMerge32(cura, merged) { + merged = mergeInterval32s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + } else { + break aAdds + } + } + + } + + if !bDone { + bAdds: + for nb < blim { + curb = b.iv[nb] + if canMerge32(curb, merged) { + merged = mergeInterval32s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + } else { + break bAdds + } + } + + } + + m = append(m, merged) + } + if na < alim { + m = append(m, rc.iv[na:]...) + } + if nb < blim { + m = append(m, b.iv[nb:]...) + } + + res := &runContainer32{iv: m} + return res +} + +// unionCardinality returns the cardinality of the merger of two runContainer32s, the union of rc and b. +func (rc *runContainer32) unionCardinality(b *runContainer32) uint64 { + + // rc is also known as 'a' here, but golint insisted we + // call it rc for consistency with the rest of the methods. + answer := uint64(0) + + alim := int64(len(rc.iv)) + blim := int64(len(b.iv)) + + var na int64 // next from a + var nb int64 // next from b + + // merged holds the current merge output, which might + // get additional merges before being appended to m. + var merged interval32 + var mergedUsed bool // is merged being used at the moment? + + var cura interval32 // currently considering this interval32 from a + var curb interval32 // currently considering this interval32 from b + + pass := 0 + for na < alim && nb < blim { + pass++ + cura = rc.iv[na] + curb = b.iv[nb] + + if mergedUsed { + mergedUpdated := false + if canMerge32(cura, merged) { + merged = mergeInterval32s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + mergedUpdated = true + } + if canMerge32(curb, merged) { + merged = mergeInterval32s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + mergedUpdated = true + } + if !mergedUpdated { + // we know that merged is disjoint from cura and curb + //m = append(m, merged) + answer += uint64(merged.last) - uint64(merged.start) + 1 + mergedUsed = false + } + continue + + } else { + // !mergedUsed + if !canMerge32(cura, curb) { + if cura.start < curb.start { + answer += uint64(cura.last) - uint64(cura.start) + 1 + //m = append(m, cura) + na++ + } else { + answer += uint64(curb.last) - uint64(curb.start) + 1 + //m = append(m, curb) + nb++ + } + } else { + merged = mergeInterval32s(cura, curb) + mergedUsed = true + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + } + } + } + var aDone, bDone bool + if na >= alim { + aDone = true + } + if nb >= blim { + bDone = true + } + // finish by merging anything remaining into merged we can: + if mergedUsed { + if !aDone { + aAdds: + for na < alim { + cura = rc.iv[na] + if canMerge32(cura, merged) { + merged = mergeInterval32s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last)+1, na+1) + } else { + break aAdds + } + } + + } + + if !bDone { + bAdds: + for nb < blim { + curb = b.iv[nb] + if canMerge32(curb, merged) { + merged = mergeInterval32s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last)+1, nb+1) + } else { + break bAdds + } + } + + } + + //m = append(m, merged) + answer += uint64(merged.last) - uint64(merged.start) + 1 + } + for _, r := range rc.iv[na:] { + answer += uint64(r.last) - uint64(r.start) + 1 + } + for _, r := range b.iv[nb:] { + answer += uint64(r.last) - uint64(r.start) + 1 + } + return answer +} + +// indexOfIntervalAtOrAfter is a helper for union. +func (rc *runContainer32) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 { + rc.myOpts.startIndex = startIndex + rc.myOpts.endxIndex = 0 + + w, already, _ := rc.search(key, &rc.myOpts) + if already { + return w + } + return w + 1 +} + +// intersect returns a new runContainer32 holding the +// intersection of rc (also known as 'a') and b. +func (rc *runContainer32) intersect(b *runContainer32) *runContainer32 { + + a := rc + numa := int64(len(a.iv)) + numb := int64(len(b.iv)) + res := &runContainer32{} + if numa == 0 || numb == 0 { + return res + } + + if numa == 1 && numb == 1 { + if !haveOverlap32(a.iv[0], b.iv[0]) { + return res + } + } + + var output []interval32 + + var acuri int64 + var bcuri int64 + + astart := int64(a.iv[acuri].start) + bstart := int64(b.iv[bcuri].start) + + var intersection interval32 + var leftoverstart int64 + var isOverlap, isLeftoverA, isLeftoverB bool + var done bool + pass := 0 +toploop: + for acuri < numa && bcuri < numb { + pass++ + + isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last)) + + if !isOverlap { + switch { + case astart < bstart: + acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) + if done { + break toploop + } + astart = int64(a.iv[acuri].start) + + case astart > bstart: + bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) + if done { + break toploop + } + bstart = int64(b.iv[bcuri].start) + + //default: + // panic("impossible that astart == bstart, since !isOverlap") + } + + } else { + // isOverlap + output = append(output, intersection) + switch { + case isLeftoverA: + // note that we change astart without advancing acuri, + // since we need to capture any 2ndary intersections with a.iv[acuri] + astart = leftoverstart + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + case isLeftoverB: + // note that we change bstart without advancing bcuri, + // since we need to capture any 2ndary intersections with b.iv[bcuri] + bstart = leftoverstart + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + default: + // neither had leftover, both completely consumed + // optionally, assert for sanity: + //if a.iv[acuri].endx != b.iv[bcuri].endx { + // panic("huh? should only be possible that endx agree now!") + //} + + // advance to next a interval + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + + // advance to next b interval + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + } + } + } // end for toploop + + if len(output) == 0 { + return res + } + + res.iv = output + return res +} + +// intersectCardinality returns the cardinality of the +// intersection of rc (also known as 'a') and b. +func (rc *runContainer32) intersectCardinality(b *runContainer32) int64 { + answer := int64(0) + + a := rc + numa := int64(len(a.iv)) + numb := int64(len(b.iv)) + if numa == 0 || numb == 0 { + return 0 + } + + if numa == 1 && numb == 1 { + if !haveOverlap32(a.iv[0], b.iv[0]) { + return 0 + } + } + + var acuri int64 + var bcuri int64 + + astart := int64(a.iv[acuri].start) + bstart := int64(b.iv[bcuri].start) + + var intersection interval32 + var leftoverstart int64 + var isOverlap, isLeftoverA, isLeftoverB bool + var done bool + pass := 0 +toploop: + for acuri < numa && bcuri < numb { + pass++ + + isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover32(astart, int64(a.iv[acuri].last), bstart, int64(b.iv[bcuri].last)) + + if !isOverlap { + switch { + case astart < bstart: + acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) + if done { + break toploop + } + astart = int64(a.iv[acuri].start) + + case astart > bstart: + bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) + if done { + break toploop + } + bstart = int64(b.iv[bcuri].start) + + //default: + // panic("impossible that astart == bstart, since !isOverlap") + } + + } else { + // isOverlap + answer += int64(intersection.last) - int64(intersection.start) + 1 + switch { + case isLeftoverA: + // note that we change astart without advancing acuri, + // since we need to capture any 2ndary intersections with a.iv[acuri] + astart = leftoverstart + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + case isLeftoverB: + // note that we change bstart without advancing bcuri, + // since we need to capture any 2ndary intersections with b.iv[bcuri] + bstart = leftoverstart + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + default: + // neither had leftover, both completely consumed + // optionally, assert for sanity: + //if a.iv[acuri].endx != b.iv[bcuri].endx { + // panic("huh? should only be possible that endx agree now!") + //} + + // advance to next a interval + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + + // advance to next b interval + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + } + } + } // end for toploop + + return answer +} + +// get returns true if key is in the container. +func (rc *runContainer32) contains(key uint32) bool { + _, in, _ := rc.search(int64(key), nil) + return in +} + +// numIntervals returns the count of intervals in the container. +func (rc *runContainer32) numIntervals() int { + return len(rc.iv) +} + +// search returns alreadyPresent to indicate if the +// key is already in one of our interval32s. +// +// If key is alreadyPresent, then whichInterval32 tells +// you where. +// +// If key is not already present, then whichInterval32 is +// set as follows: +// +// a) whichInterval32 == len(rc.iv)-1 if key is beyond our +// last interval32 in rc.iv; +// +// b) whichInterval32 == -1 if key is before our first +// interval32 in rc.iv; +// +// c) whichInterval32 is set to the minimum index of rc.iv +// which comes strictly before the key; +// so rc.iv[whichInterval32].last < key, +// and if whichInterval32+1 exists, then key < rc.iv[whichInterval32+1].start +// (Note that whichInterval32+1 won't exist when +// whichInterval32 is the last interval.) +// +// runContainer32.search always returns whichInterval32 < len(rc.iv). +// +// If not nil, opts can be used to further restrict +// the search space. +// +func (rc *runContainer32) search(key int64, opts *searchOptions) (whichInterval32 int64, alreadyPresent bool, numCompares int) { + n := int64(len(rc.iv)) + if n == 0 { + return -1, false, 0 + } + + startIndex := int64(0) + endxIndex := n + if opts != nil { + startIndex = opts.startIndex + + // let endxIndex == 0 mean no effect + if opts.endxIndex > 0 { + endxIndex = opts.endxIndex + } + } + + // sort.Search returns the smallest index i + // in [0, n) at which f(i) is true, assuming that on the range [0, n), + // f(i) == true implies f(i+1) == true. + // If there is no such index, Search returns n. + + // For correctness, this began as verbatim snippet from + // sort.Search in the Go standard lib. + // We inline our comparison function for speed, and + // annotate with numCompares + // to observe and test that extra bounds are utilized. + i, j := startIndex, endxIndex + for i < j { + h := i + (j-i)/2 // avoid overflow when computing h as the bisector + // i <= h < j + numCompares++ + if !(key < int64(rc.iv[h].start)) { + i = h + 1 + } else { + j = h + } + } + below := i + // end std lib snippet. + + // The above is a simple in-lining and annotation of: + /* below := sort.Search(n, + func(i int) bool { + return key < rc.iv[i].start + }) + */ + whichInterval32 = below - 1 + + if below == n { + // all falses => key is >= start of all interval32s + // ... so does it belong to the last interval32? + if key < int64(rc.iv[n-1].last)+1 { + // yes, it belongs to the last interval32 + alreadyPresent = true + return + } + // no, it is beyond the last interval32. + // leave alreadyPreset = false + return + } + + // INVAR: key is below rc.iv[below] + if below == 0 { + // key is before the first first interval32. + // leave alreadyPresent = false + return + } + + // INVAR: key is >= rc.iv[below-1].start and + // key is < rc.iv[below].start + + // is key in below-1 interval32? + if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last)+1 { + // yes, it is. key is in below-1 interval32. + alreadyPresent = true + return + } + + // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start + // leave alreadyPresent = false + return +} + +// cardinality returns the count of the integers stored in the +// runContainer32. +func (rc *runContainer32) cardinality() int64 { + if len(rc.iv) == 0 { + rc.card = 0 + return 0 + } + if rc.card > 0 { + return rc.card // already cached + } + // have to compute it + var n int64 + for _, p := range rc.iv { + n += p.runlen() + } + rc.card = n // cache it + return n +} + +// AsSlice decompresses the contents into a []uint32 slice. +func (rc *runContainer32) AsSlice() []uint32 { + s := make([]uint32, rc.cardinality()) + j := 0 + for _, p := range rc.iv { + for i := p.start; i <= p.last; i++ { + s[j] = i + j++ + } + } + return s +} + +// newRunContainer32 creates an empty run container. +func newRunContainer32() *runContainer32 { + return &runContainer32{} +} + +// newRunContainer32CopyIv creates a run container, initializing +// with a copy of the supplied iv slice. +// +func newRunContainer32CopyIv(iv []interval32) *runContainer32 { + rc := &runContainer32{ + iv: make([]interval32, len(iv)), + } + copy(rc.iv, iv) + return rc +} + +func (rc *runContainer32) Clone() *runContainer32 { + rc2 := newRunContainer32CopyIv(rc.iv) + return rc2 +} + +// newRunContainer32TakeOwnership returns a new runContainer32 +// backed by the provided iv slice, which we will +// assume exclusive control over from now on. +// +func newRunContainer32TakeOwnership(iv []interval32) *runContainer32 { + rc := &runContainer32{ + iv: iv, + } + return rc +} + +const baseRc32Size = int(unsafe.Sizeof(runContainer32{})) +const perIntervalRc32Size = int(unsafe.Sizeof(interval32{})) + +const baseDiskRc32Size = int(unsafe.Sizeof(uint32(0))) + +// see also runContainer32SerializedSizeInBytes(numRuns int) int + +// getSizeInBytes returns the number of bytes of memory +// required by this runContainer32. +func (rc *runContainer32) getSizeInBytes() int { + return perIntervalRc32Size*len(rc.iv) + baseRc32Size +} + +// runContainer32SerializedSizeInBytes returns the number of bytes of disk +// required to hold numRuns in a runContainer32. +func runContainer32SerializedSizeInBytes(numRuns int) int { + return perIntervalRc32Size*numRuns + baseDiskRc32Size +} + +// Add adds a single value k to the set. +func (rc *runContainer32) Add(k uint32) (wasNew bool) { + // TODO comment from runContainer32.java: + // it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).add(k) + // but note that some unit tests use this method to build up test + // runcontainers without calling runOptimize + + k64 := int64(k) + + index, present, _ := rc.search(k64, nil) + if present { + return // already there + } + wasNew = true + + // increment card if it is cached already + if rc.card > 0 { + rc.card++ + } + n := int64(len(rc.iv)) + if index == -1 { + // we may need to extend the first run + if n > 0 { + if rc.iv[0].start == k+1 { + rc.iv[0].start = k + return + } + } + // nope, k stands alone, starting the new first interval32. + rc.iv = append([]interval32{{start: k, last: k}}, rc.iv...) + return + } + + // are we off the end? handle both index == n and index == n-1: + if index >= n-1 { + if int64(rc.iv[n-1].last)+1 == k64 { + rc.iv[n-1].last++ + return + } + rc.iv = append(rc.iv, interval32{start: k, last: k}) + return + } + + // INVAR: index and index+1 both exist, and k goes between them. + // + // Now: add k into the middle, + // possibly fusing with index or index+1 interval32 + // and possibly resulting in fusing of two interval32s + // that had a one integer gap. + + left := index + right := index + 1 + + // are we fusing left and right by adding k? + if int64(rc.iv[left].last)+1 == k64 && int64(rc.iv[right].start) == k64+1 { + // fuse into left + rc.iv[left].last = rc.iv[right].last + // remove redundant right + rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...) + return + } + + // are we an addition to left? + if int64(rc.iv[left].last)+1 == k64 { + // yes + rc.iv[left].last++ + return + } + + // are we an addition to right? + if int64(rc.iv[right].start) == k64+1 { + // yes + rc.iv[right].start = k + return + } + + // k makes a standalone new interval32, inserted in the middle + tail := append([]interval32{{start: k, last: k}}, rc.iv[right:]...) + rc.iv = append(rc.iv[:left+1], tail...) + return +} + +//msgp:ignore runIterator + +// runIterator32 advice: you must call Next() at least once +// before calling Cur(); and you should call HasNext() +// before calling Next() to insure there are contents. +type runIterator32 struct { + rc *runContainer32 + curIndex int64 + curPosInIndex uint32 + curSeq int64 +} + +// newRunIterator32 returns a new empty run container. +func (rc *runContainer32) newRunIterator32() *runIterator32 { + return &runIterator32{rc: rc, curIndex: -1} +} + +// HasNext returns false if calling Next will panic. It +// returns true when there is at least one more value +// available in the iteration sequence. +func (ri *runIterator32) hasNext() bool { + if len(ri.rc.iv) == 0 { + return false + } + if ri.curIndex == -1 { + return true + } + return ri.curSeq+1 < ri.rc.cardinality() +} + +// cur returns the current value pointed to by the iterator. +func (ri *runIterator32) cur() uint32 { + return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex +} + +// Next returns the next value in the iteration sequence. +func (ri *runIterator32) next() uint32 { + if !ri.hasNext() { + panic("no Next available") + } + if ri.curIndex >= int64(len(ri.rc.iv)) { + panic("runIterator.Next() going beyond what is available") + } + if ri.curIndex == -1 { + // first time is special + ri.curIndex = 0 + } else { + ri.curPosInIndex++ + if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last)+1 { + ri.curPosInIndex = 0 + ri.curIndex++ + } + ri.curSeq++ + } + return ri.cur() +} + +// remove removes the element that the iterator +// is on from the run container. You can use +// Cur if you want to double check what is about +// to be deleted. +func (ri *runIterator32) remove() uint32 { + n := ri.rc.cardinality() + if n == 0 { + panic("runIterator.Remove called on empty runContainer32") + } + cur := ri.cur() + + ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq) + return cur +} + +// remove removes key from the container. +func (rc *runContainer32) removeKey(key uint32) (wasPresent bool) { + + var index int64 + var curSeq int64 + index, wasPresent, _ = rc.search(int64(key), nil) + if !wasPresent { + return // already removed, nothing to do. + } + pos := key - rc.iv[index].start + rc.deleteAt(&index, &pos, &curSeq) + return +} + +// internal helper functions + +func (rc *runContainer32) deleteAt(curIndex *int64, curPosInIndex *uint32, curSeq *int64) { + rc.card-- + (*curSeq)-- + ci := *curIndex + pos := *curPosInIndex + + // are we first, last, or in the middle of our interval32? + switch { + case pos == 0: + if int64(rc.iv[ci].start) == int64(rc.iv[ci].last) { + // our interval disappears + rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...) + // curIndex stays the same, since the delete did + // the advance for us. + *curPosInIndex = 0 + } else { + rc.iv[ci].start++ // no longer overflowable + } + case int64(pos) == rc.iv[ci].runlen()-1: + // last + rc.iv[ci].last-- + // our interval32 cannot disappear, else we would have been pos == 0, case first above. + (*curPosInIndex)-- + // if we leave *curIndex alone, then Next() will work properly even after the delete. + default: + //middle + // split into two, adding an interval32 + new0 := interval32{ + start: rc.iv[ci].start, + last: rc.iv[ci].start + *curPosInIndex - 1} + + new1start := int64(rc.iv[ci].start) + int64(*curPosInIndex) + 1 + if new1start > int64(MaxUint32) { + panic("overflow?!?!") + } + new1 := interval32{ + start: uint32(new1start), + last: rc.iv[ci].last} + tail := append([]interval32{new0, new1}, rc.iv[ci+1:]...) + rc.iv = append(rc.iv[:ci], tail...) + // update curIndex and curPosInIndex + (*curIndex)++ + *curPosInIndex = 0 + } + +} + +func have4Overlap32(astart, alast, bstart, blast int64) bool { + if alast+1 <= bstart { + return false + } + return blast+1 > astart +} + +func intersectWithLeftover32(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval32) { + if !have4Overlap32(astart, alast, bstart, blast) { + return + } + isOverlap = true + + // do the intersection: + if bstart > astart { + intersection.start = uint32(bstart) + } else { + intersection.start = uint32(astart) + } + switch { + case blast < alast: + isLeftoverA = true + leftoverstart = blast + 1 + intersection.last = uint32(blast) + case alast < blast: + isLeftoverB = true + leftoverstart = alast + 1 + intersection.last = uint32(alast) + default: + // alast == blast + intersection.last = uint32(alast) + } + + return +} + +func (rc *runContainer32) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) { + + rc.myOpts.startIndex = startIndex + rc.myOpts.endxIndex = 0 + + w, _, _ := rc.search(key, &rc.myOpts) + // rc.search always returns w < len(rc.iv) + if w < startIndex { + // not found and comes before lower bound startIndex, + // so just use the lower bound. + if startIndex == int64(len(rc.iv)) { + // also this bump up means that we are done + return startIndex, true + } + return startIndex, false + } + + return w, false +} + +func sliceToString32(m []interval32) string { + s := "" + for i := range m { + s += fmt.Sprintf("%v: %s, ", i, m[i]) + } + return s +} + +// selectInt32 returns the j-th value in the container. +// We panic of j is out of bounds. +func (rc *runContainer32) selectInt32(j uint32) int { + n := rc.cardinality() + if int64(j) > n { + panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) + } + + var offset int64 + for k := range rc.iv { + nextOffset := offset + rc.iv[k].runlen() + 1 + if nextOffset > int64(j) { + return int(int64(rc.iv[k].start) + (int64(j) - offset)) + } + offset = nextOffset + } + panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) +} + +// helper for invert +func (rc *runContainer32) invertlastInterval(origin uint32, lastIdx int) []interval32 { + cur := rc.iv[lastIdx] + if cur.last == MaxUint32 { + if cur.start == origin { + return nil // empty container + } + return []interval32{{start: origin, last: cur.start - 1}} + } + if cur.start == origin { + return []interval32{{start: cur.last + 1, last: MaxUint32}} + } + // invert splits + return []interval32{ + {start: origin, last: cur.start - 1}, + {start: cur.last + 1, last: MaxUint32}, + } +} + +// invert returns a new container (not inplace), that is +// the inversion of rc. For each bit b in rc, the +// returned value has !b +func (rc *runContainer32) invert() *runContainer32 { + ni := len(rc.iv) + var m []interval32 + switch ni { + case 0: + return &runContainer32{iv: []interval32{{0, MaxUint32}}} + case 1: + return &runContainer32{iv: rc.invertlastInterval(0, 0)} + } + var invstart int64 + ult := ni - 1 + for i, cur := range rc.iv { + if i == ult { + // invertlastInteval will add both intervals (b) and (c) in + // diagram below. + m = append(m, rc.invertlastInterval(uint32(invstart), i)...) + break + } + // INVAR: i and cur are not the last interval, there is a next at i+1 + // + // ........[cur.start, cur.last] ...... [next.start, next.last].... + // ^ ^ ^ + // (a) (b) (c) + // + // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it. + if cur.start > 0 { + m = append(m, interval32{start: uint32(invstart), last: cur.start - 1}) + } + invstart = int64(cur.last + 1) + } + return &runContainer32{iv: m} +} + +func (iv interval32) equal(b interval32) bool { + if iv.start == b.start { + return iv.last == b.last + } + return false +} + +func (iv interval32) isSuperSetOf(b interval32) bool { + return iv.start <= b.start && b.last <= iv.last +} + +func (iv interval32) subtractInterval(del interval32) (left []interval32, delcount int64) { + isect, isEmpty := intersectInterval32s(iv, del) + + if isEmpty { + return nil, 0 + } + if del.isSuperSetOf(iv) { + return nil, iv.runlen() + } + + switch { + case isect.start > iv.start && isect.last < iv.last: + new0 := interval32{start: iv.start, last: isect.start - 1} + new1 := interval32{start: isect.last + 1, last: iv.last} + return []interval32{new0, new1}, isect.runlen() + case isect.start == iv.start: + return []interval32{{start: isect.last + 1, last: iv.last}}, isect.runlen() + default: + return []interval32{{start: iv.start, last: isect.start - 1}}, isect.runlen() + } +} + +func (rc *runContainer32) isubtract(del interval32) { + origiv := make([]interval32, len(rc.iv)) + copy(origiv, rc.iv) + n := int64(len(rc.iv)) + if n == 0 { + return // already done. + } + + _, isEmpty := intersectInterval32s( + interval32{ + start: rc.iv[0].start, + last: rc.iv[n-1].last, + }, del) + if isEmpty { + return // done + } + // INVAR there is some intersection between rc and del + istart, startAlready, _ := rc.search(int64(del.start), nil) + ilast, lastAlready, _ := rc.search(int64(del.last), nil) + rc.card = -1 + if istart == -1 { + if ilast == n-1 && !lastAlready { + rc.iv = nil + return + } + } + // some intervals will remain + switch { + case startAlready && lastAlready: + res0, _ := rc.iv[istart].subtractInterval(del) + + // would overwrite values in iv b/c res0 can have len 2. so + // write to origiv instead. + lost := 1 + ilast - istart + changeSize := int64(len(res0)) - lost + newSize := int64(len(rc.iv)) + changeSize + + // rc.iv = append(pre, caboose...) + // return + + if ilast != istart { + res1, _ := rc.iv[ilast].subtractInterval(del) + res0 = append(res0, res1...) + changeSize = int64(len(res0)) - lost + newSize = int64(len(rc.iv)) + changeSize + } + switch { + case changeSize < 0: + // shrink + copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:]) + copy(rc.iv[istart:istart+int64(len(res0))], res0) + rc.iv = rc.iv[:newSize] + return + case changeSize == 0: + // stay the same + copy(rc.iv[istart:istart+int64(len(res0))], res0) + return + default: + // changeSize > 0 is only possible when ilast == istart. + // Hence we now know: changeSize == 1 and len(res0) == 2 + rc.iv = append(rc.iv, interval32{}) + // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize] + + // copy the tail into place + copy(rc.iv[ilast+2:], rc.iv[ilast+1:]) + // copy the new item(s) into place + copy(rc.iv[istart:istart+2], res0) + return + } + + case !startAlready && !lastAlready: + // we get to discard whole intervals + + // from the search() definition: + + // if del.start is not present, then istart is + // set as follows: + // + // a) istart == n-1 if del.start is beyond our + // last interval32 in rc.iv; + // + // b) istart == -1 if del.start is before our first + // interval32 in rc.iv; + // + // c) istart is set to the minimum index of rc.iv + // which comes strictly before the del.start; + // so del.start > rc.iv[istart].last, + // and if istart+1 exists, then del.start < rc.iv[istart+1].startx + + // if del.last is not present, then ilast is + // set as follows: + // + // a) ilast == n-1 if del.last is beyond our + // last interval32 in rc.iv; + // + // b) ilast == -1 if del.last is before our first + // interval32 in rc.iv; + // + // c) ilast is set to the minimum index of rc.iv + // which comes strictly before the del.last; + // so del.last > rc.iv[ilast].last, + // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start + + // INVAR: istart >= 0 + pre := rc.iv[:istart+1] + if ilast == n-1 { + rc.iv = pre + return + } + // INVAR: ilast < n-1 + lost := ilast - istart + changeSize := -lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + rc.iv = rc.iv[:newSize] + return + + case startAlready && !lastAlready: + // we can only shrink or stay the same size + // i.e. we either eliminate the whole interval, + // or just cut off the right side. + res0, _ := rc.iv[istart].subtractInterval(del) + if len(res0) > 0 { + // len(res) must be 1 + rc.iv[istart] = res0[0] + } + lost := 1 + (ilast - istart) + changeSize := int64(len(res0)) - lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + rc.iv = rc.iv[:newSize] + return + + case !startAlready && lastAlready: + // we can only shrink or stay the same size + res1, _ := rc.iv[ilast].subtractInterval(del) + lost := ilast - istart + changeSize := int64(len(res1)) - lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + // move the tail first to make room for res1 + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + copy(rc.iv[istart+1:], res1) + rc.iv = rc.iv[:newSize] + return + } +} + +// compute rc minus b, and return the result as a new value (not inplace). +// port of run_container_andnot from CRoaring... +// https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496 +func (rc *runContainer32) AndNotRunContainer32(b *runContainer32) *runContainer32 { + + if len(b.iv) == 0 || len(rc.iv) == 0 { + return rc + } + + dst := newRunContainer32() + apos := 0 + bpos := 0 + + a := rc + + astart := a.iv[apos].start + alast := a.iv[apos].last + bstart := b.iv[bpos].start + blast := b.iv[bpos].last + + alen := len(a.iv) + blen := len(b.iv) + + for apos < alen && bpos < blen { + switch { + case alast < bstart: + // output the first run + dst.iv = append(dst.iv, interval32{start: astart, last: alast}) + apos++ + if apos < alen { + astart = a.iv[apos].start + alast = a.iv[apos].last + } + case blast < astart: + // exit the second run + bpos++ + if bpos < blen { + bstart = b.iv[bpos].start + blast = b.iv[bpos].last + } + default: + // a: [ ] + // b: [ ] + // alast >= bstart + // blast >= astart + if astart < bstart { + dst.iv = append(dst.iv, interval32{start: astart, last: bstart - 1}) + } + if alast > blast { + astart = blast + 1 + } else { + apos++ + if apos < alen { + astart = a.iv[apos].start + alast = a.iv[apos].last + } + } + } + } + if apos < alen { + dst.iv = append(dst.iv, interval32{start: astart, last: alast}) + apos++ + if apos < alen { + dst.iv = append(dst.iv, a.iv[apos:]...) + } + } + + return dst +} + +func (rc *runContainer32) numberOfRuns() (nr int) { + return len(rc.iv) +} + +func (rc *runContainer32) containerType() contype { + return run32Contype +} + +func (rc *runContainer32) equals32(srb *runContainer32) bool { + //p("both rc32") + // Check if the containers are the same object. + if rc == srb { + //p("same object") + return true + } + + if len(srb.iv) != len(rc.iv) { + //p("iv len differ") + return false + } + + for i, v := range rc.iv { + if v != srb.iv[i] { + //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i]) + return false + } + } + //p("all intervals same, returning true") + return true +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle16.go b/vendor/github.com/RoaringBitmap/roaring/rle16.go new file mode 100644 index 0000000000..951af65f3f --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rle16.go @@ -0,0 +1,1747 @@ +package roaring + +// +// Copyright (c) 2016 by the roaring authors. +// Licensed under the Apache License, Version 2.0. +// +// We derive a few lines of code from the sort.Search +// function in the golang standard library. That function +// is Copyright 2009 The Go Authors, and licensed +// under the following BSD-style license. +/* +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +import ( + "fmt" + "sort" + "unsafe" +) + +//go:generate msgp -unexported + +// runContainer16 does run-length encoding of sets of +// uint16 integers. +type runContainer16 struct { + iv []interval16 + card int64 + + // avoid allocation during search + myOpts searchOptions `msg:"-"` +} + +// interval16 is the internal to runContainer16 +// structure that maintains the individual [start, last] +// closed intervals. +type interval16 struct { + start uint16 + length uint16 // length minus 1 +} + +func newInterval16Range(start, last uint16) interval16 { + if last < start { + panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start)) + } + + return interval16{ + start, + last - start, + } +} + +// runlen returns the count of integers in the interval. +func (iv interval16) runlen() int64 { + return int64(iv.length) + 1 +} + +func (iv interval16) last() uint16 { + return iv.start + iv.length +} + +// String produces a human viewable string of the contents. +func (iv interval16) String() string { + return fmt.Sprintf("[%d, %d]", iv.start, iv.length) +} + +func ivalString16(iv []interval16) string { + var s string + var j int + var p interval16 + for j, p = range iv { + s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last()) + } + return s +} + +// String produces a human viewable string of the contents. +func (rc *runContainer16) String() string { + if len(rc.iv) == 0 { + return "runContainer16{}" + } + is := ivalString16(rc.iv) + return `runContainer16{` + is + `}` +} + +// uint16Slice is a sort.Sort convenience method +type uint16Slice []uint16 + +// Len returns the length of p. +func (p uint16Slice) Len() int { return len(p) } + +// Less returns p[i] < p[j] +func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] } + +// Swap swaps elements i and j. +func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +//msgp:ignore addHelper + +// addHelper helps build a runContainer16. +type addHelper16 struct { + runstart uint16 + runlen uint16 + actuallyAdded uint16 + m []interval16 + rc *runContainer16 +} + +func (ah *addHelper16) storeIval(runstart, runlen uint16) { + mi := interval16{start: runstart, length: runlen} + ah.m = append(ah.m, mi) +} + +func (ah *addHelper16) add(cur, prev uint16, i int) { + if cur == prev+1 { + ah.runlen++ + ah.actuallyAdded++ + } else { + if cur < prev { + panic(fmt.Sprintf("newRunContainer16FromVals sees "+ + "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+ + " before calling us with alreadySorted == true.", i, cur, prev)) + } + if cur == prev { + // ignore duplicates + } else { + ah.actuallyAdded++ + ah.storeIval(ah.runstart, ah.runlen) + ah.runstart = cur + ah.runlen = 0 + } + } +} + +// newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast] +func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 { + rc := &runContainer16{} + rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast)) + return rc +} + +// newRunContainer16FromVals makes a new container from vals. +// +// For efficiency, vals should be sorted in ascending order. +// Ideally vals should not contain duplicates, but we detect and +// ignore them. If vals is already sorted in ascending order, then +// pass alreadySorted = true. Otherwise, for !alreadySorted, +// we will sort vals before creating a runContainer16 of them. +// We sort the original vals, so this will change what the +// caller sees in vals as a side effect. +func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 { + // keep this in sync with newRunContainer16FromArray below + + rc := &runContainer16{} + ah := addHelper16{rc: rc} + + if !alreadySorted { + sort.Sort(uint16Slice(vals)) + } + n := len(vals) + var cur, prev uint16 + switch { + case n == 0: + // nothing more + case n == 1: + ah.m = append(ah.m, newInterval16Range(vals[0], vals[0])) + ah.actuallyAdded++ + default: + ah.runstart = vals[0] + ah.actuallyAdded++ + for i := 1; i < n; i++ { + prev = vals[i-1] + cur = vals[i] + ah.add(cur, prev, i) + } + ah.storeIval(ah.runstart, ah.runlen) + } + rc.iv = ah.m + rc.card = int64(ah.actuallyAdded) + return rc +} + +// newRunContainer16FromBitmapContainer makes a new run container from bc, +// somewhat efficiently. For reference, see the Java +// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192 +func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 { + + rc := &runContainer16{} + nbrRuns := bc.numberOfRuns() + if nbrRuns == 0 { + return rc + } + rc.iv = make([]interval16, nbrRuns) + + longCtr := 0 // index of current long in bitmap + curWord := bc.bitmap[0] // its value + runCount := 0 + for { + // potentially multiword advance to first 1 bit + for curWord == 0 && longCtr < len(bc.bitmap)-1 { + longCtr++ + curWord = bc.bitmap[longCtr] + } + + if curWord == 0 { + // wrap up, no more runs + return rc + } + localRunStart := countTrailingZeros(curWord) + runStart := localRunStart + 64*longCtr + // stuff 1s into number's LSBs + curWordWith1s := curWord | (curWord - 1) + + // find the next 0, potentially in a later word + runEnd := 0 + for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 { + longCtr++ + curWordWith1s = bc.bitmap[longCtr] + } + + if curWordWith1s == maxWord { + // a final unterminated run of 1s + runEnd = wordSizeInBits + longCtr*64 + rc.iv[runCount].start = uint16(runStart) + rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1 + return rc + } + localRunEnd := countTrailingZeros(^curWordWith1s) + runEnd = localRunEnd + longCtr*64 + rc.iv[runCount].start = uint16(runStart) + rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart) + runCount++ + // now, zero out everything right of runEnd. + curWord = curWordWith1s & (curWordWith1s + 1) + // We've lathered and rinsed, so repeat... + } + +} + +// +// newRunContainer16FromArray populates a new +// runContainer16 from the contents of arr. +// +func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 { + // keep this in sync with newRunContainer16FromVals above + + rc := &runContainer16{} + ah := addHelper16{rc: rc} + + n := arr.getCardinality() + var cur, prev uint16 + switch { + case n == 0: + // nothing more + case n == 1: + ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0])) + ah.actuallyAdded++ + default: + ah.runstart = arr.content[0] + ah.actuallyAdded++ + for i := 1; i < n; i++ { + prev = arr.content[i-1] + cur = arr.content[i] + ah.add(cur, prev, i) + } + ah.storeIval(ah.runstart, ah.runlen) + } + rc.iv = ah.m + rc.card = int64(ah.actuallyAdded) + return rc +} + +// set adds the integers in vals to the set. Vals +// must be sorted in increasing order; if not, you should set +// alreadySorted to false, and we will sort them in place for you. +// (Be aware of this side effect -- it will affect the callers +// view of vals). +// +// If you have a small number of additions to an already +// big runContainer16, calling Add() may be faster. +func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) { + + rc2 := newRunContainer16FromVals(alreadySorted, vals...) + un := rc.union(rc2) + rc.iv = un.iv + rc.card = 0 +} + +// canMerge returns true iff the intervals +// a and b either overlap or they are +// contiguous and so can be merged into +// a single interval. +func canMerge16(a, b interval16) bool { + if int64(a.last())+1 < int64(b.start) { + return false + } + return int64(b.last())+1 >= int64(a.start) +} + +// haveOverlap differs from canMerge in that +// it tells you if the intersection of a +// and b would contain an element (otherwise +// it would be the empty set, and we return +// false). +func haveOverlap16(a, b interval16) bool { + if int64(a.last())+1 <= int64(b.start) { + return false + } + return int64(b.last())+1 > int64(a.start) +} + +// mergeInterval16s joins a and b into a +// new interval, and panics if it cannot. +func mergeInterval16s(a, b interval16) (res interval16) { + if !canMerge16(a, b) { + panic(fmt.Sprintf("cannot merge %#v and %#v", a, b)) + } + + if b.start < a.start { + res.start = b.start + } else { + res.start = a.start + } + + if b.last() > a.last() { + res.length = b.last() - res.start + } else { + res.length = a.last() - res.start + } + + return +} + +// intersectInterval16s returns the intersection +// of a and b. The isEmpty flag will be true if +// a and b were disjoint. +func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) { + if !haveOverlap16(a, b) { + isEmpty = true + return + } + if b.start > a.start { + res.start = b.start + } else { + res.start = a.start + } + + bEnd := b.last() + aEnd := a.last() + var resEnd uint16 + + if bEnd < aEnd { + resEnd = bEnd + } else { + resEnd = aEnd + } + res.length = resEnd - res.start + return +} + +// union merges two runContainer16s, producing +// a new runContainer16 with the union of rc and b. +func (rc *runContainer16) union(b *runContainer16) *runContainer16 { + + // rc is also known as 'a' here, but golint insisted we + // call it rc for consistency with the rest of the methods. + + var m []interval16 + + alim := int64(len(rc.iv)) + blim := int64(len(b.iv)) + + var na int64 // next from a + var nb int64 // next from b + + // merged holds the current merge output, which might + // get additional merges before being appended to m. + var merged interval16 + var mergedUsed bool // is merged being used at the moment? + + var cura interval16 // currently considering this interval16 from a + var curb interval16 // currently considering this interval16 from b + + pass := 0 + for na < alim && nb < blim { + pass++ + cura = rc.iv[na] + curb = b.iv[nb] + + if mergedUsed { + mergedUpdated := false + if canMerge16(cura, merged) { + merged = mergeInterval16s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + mergedUpdated = true + } + if canMerge16(curb, merged) { + merged = mergeInterval16s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + mergedUpdated = true + } + if !mergedUpdated { + // we know that merged is disjoint from cura and curb + m = append(m, merged) + mergedUsed = false + } + continue + + } else { + // !mergedUsed + if !canMerge16(cura, curb) { + if cura.start < curb.start { + m = append(m, cura) + na++ + } else { + m = append(m, curb) + nb++ + } + } else { + merged = mergeInterval16s(cura, curb) + mergedUsed = true + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + } + } + } + var aDone, bDone bool + if na >= alim { + aDone = true + } + if nb >= blim { + bDone = true + } + // finish by merging anything remaining into merged we can: + if mergedUsed { + if !aDone { + aAdds: + for na < alim { + cura = rc.iv[na] + if canMerge16(cura, merged) { + merged = mergeInterval16s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + } else { + break aAdds + } + } + + } + + if !bDone { + bAdds: + for nb < blim { + curb = b.iv[nb] + if canMerge16(curb, merged) { + merged = mergeInterval16s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + } else { + break bAdds + } + } + + } + + m = append(m, merged) + } + if na < alim { + m = append(m, rc.iv[na:]...) + } + if nb < blim { + m = append(m, b.iv[nb:]...) + } + + res := &runContainer16{iv: m} + return res +} + +// unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b. +func (rc *runContainer16) unionCardinality(b *runContainer16) uint64 { + + // rc is also known as 'a' here, but golint insisted we + // call it rc for consistency with the rest of the methods. + answer := uint64(0) + + alim := int64(len(rc.iv)) + blim := int64(len(b.iv)) + + var na int64 // next from a + var nb int64 // next from b + + // merged holds the current merge output, which might + // get additional merges before being appended to m. + var merged interval16 + var mergedUsed bool // is merged being used at the moment? + + var cura interval16 // currently considering this interval16 from a + var curb interval16 // currently considering this interval16 from b + + pass := 0 + for na < alim && nb < blim { + pass++ + cura = rc.iv[na] + curb = b.iv[nb] + + if mergedUsed { + mergedUpdated := false + if canMerge16(cura, merged) { + merged = mergeInterval16s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + mergedUpdated = true + } + if canMerge16(curb, merged) { + merged = mergeInterval16s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + mergedUpdated = true + } + if !mergedUpdated { + // we know that merged is disjoint from cura and curb + //m = append(m, merged) + answer += uint64(merged.last()) - uint64(merged.start) + 1 + mergedUsed = false + } + continue + + } else { + // !mergedUsed + if !canMerge16(cura, curb) { + if cura.start < curb.start { + answer += uint64(cura.last()) - uint64(cura.start) + 1 + //m = append(m, cura) + na++ + } else { + answer += uint64(curb.last()) - uint64(curb.start) + 1 + //m = append(m, curb) + nb++ + } + } else { + merged = mergeInterval16s(cura, curb) + mergedUsed = true + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + } + } + } + var aDone, bDone bool + if na >= alim { + aDone = true + } + if nb >= blim { + bDone = true + } + // finish by merging anything remaining into merged we can: + if mergedUsed { + if !aDone { + aAdds: + for na < alim { + cura = rc.iv[na] + if canMerge16(cura, merged) { + merged = mergeInterval16s(cura, merged) + na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1) + } else { + break aAdds + } + } + + } + + if !bDone { + bAdds: + for nb < blim { + curb = b.iv[nb] + if canMerge16(curb, merged) { + merged = mergeInterval16s(curb, merged) + nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1) + } else { + break bAdds + } + } + + } + + //m = append(m, merged) + answer += uint64(merged.last()) - uint64(merged.start) + 1 + } + for _, r := range rc.iv[na:] { + answer += uint64(r.last()) - uint64(r.start) + 1 + } + for _, r := range b.iv[nb:] { + answer += uint64(r.last()) - uint64(r.start) + 1 + } + return answer +} + +// indexOfIntervalAtOrAfter is a helper for union. +func (rc *runContainer16) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 { + rc.myOpts.startIndex = startIndex + rc.myOpts.endxIndex = 0 + + w, already, _ := rc.search(key, &rc.myOpts) + if already { + return w + } + return w + 1 +} + +// intersect returns a new runContainer16 holding the +// intersection of rc (also known as 'a') and b. +func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 { + + a := rc + numa := int64(len(a.iv)) + numb := int64(len(b.iv)) + res := &runContainer16{} + if numa == 0 || numb == 0 { + return res + } + + if numa == 1 && numb == 1 { + if !haveOverlap16(a.iv[0], b.iv[0]) { + return res + } + } + + var output []interval16 + + var acuri int64 + var bcuri int64 + + astart := int64(a.iv[acuri].start) + bstart := int64(b.iv[bcuri].start) + + var intersection interval16 + var leftoverstart int64 + var isOverlap, isLeftoverA, isLeftoverB bool + var done bool +toploop: + for acuri < numa && bcuri < numb { + + isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = + intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last())) + + if !isOverlap { + switch { + case astart < bstart: + acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) + if done { + break toploop + } + astart = int64(a.iv[acuri].start) + + case astart > bstart: + bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) + if done { + break toploop + } + bstart = int64(b.iv[bcuri].start) + + //default: + // panic("impossible that astart == bstart, since !isOverlap") + } + + } else { + // isOverlap + output = append(output, intersection) + switch { + case isLeftoverA: + // note that we change astart without advancing acuri, + // since we need to capture any 2ndary intersections with a.iv[acuri] + astart = leftoverstart + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + case isLeftoverB: + // note that we change bstart without advancing bcuri, + // since we need to capture any 2ndary intersections with b.iv[bcuri] + bstart = leftoverstart + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + default: + // neither had leftover, both completely consumed + // optionally, assert for sanity: + //if a.iv[acuri].endx != b.iv[bcuri].endx { + // panic("huh? should only be possible that endx agree now!") + //} + + // advance to next a interval + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + + // advance to next b interval + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + } + } + } // end for toploop + + if len(output) == 0 { + return res + } + + res.iv = output + return res +} + +// intersectCardinality returns the cardinality of the +// intersection of rc (also known as 'a') and b. +func (rc *runContainer16) intersectCardinality(b *runContainer16) int64 { + answer := int64(0) + + a := rc + numa := int64(len(a.iv)) + numb := int64(len(b.iv)) + if numa == 0 || numb == 0 { + return 0 + } + + if numa == 1 && numb == 1 { + if !haveOverlap16(a.iv[0], b.iv[0]) { + return 0 + } + } + + var acuri int64 + var bcuri int64 + + astart := int64(a.iv[acuri].start) + bstart := int64(b.iv[bcuri].start) + + var intersection interval16 + var leftoverstart int64 + var isOverlap, isLeftoverA, isLeftoverB bool + var done bool + pass := 0 +toploop: + for acuri < numa && bcuri < numb { + pass++ + + isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = + intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last())) + + if !isOverlap { + switch { + case astart < bstart: + acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart) + if done { + break toploop + } + astart = int64(a.iv[acuri].start) + + case astart > bstart: + bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart) + if done { + break toploop + } + bstart = int64(b.iv[bcuri].start) + + //default: + // panic("impossible that astart == bstart, since !isOverlap") + } + + } else { + // isOverlap + answer += int64(intersection.last()) - int64(intersection.start) + 1 + switch { + case isLeftoverA: + // note that we change astart without advancing acuri, + // since we need to capture any 2ndary intersections with a.iv[acuri] + astart = leftoverstart + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + case isLeftoverB: + // note that we change bstart without advancing bcuri, + // since we need to capture any 2ndary intersections with b.iv[bcuri] + bstart = leftoverstart + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + default: + // neither had leftover, both completely consumed + // optionally, assert for sanity: + //if a.iv[acuri].endx != b.iv[bcuri].endx { + // panic("huh? should only be possible that endx agree now!") + //} + + // advance to next a interval + acuri++ + if acuri >= numa { + break toploop + } + astart = int64(a.iv[acuri].start) + + // advance to next b interval + bcuri++ + if bcuri >= numb { + break toploop + } + bstart = int64(b.iv[bcuri].start) + } + } + } // end for toploop + + return answer +} + +// get returns true iff key is in the container. +func (rc *runContainer16) contains(key uint16) bool { + _, in, _ := rc.search(int64(key), nil) + return in +} + +// numIntervals returns the count of intervals in the container. +func (rc *runContainer16) numIntervals() int { + return len(rc.iv) +} + +// search returns alreadyPresent to indicate if the +// key is already in one of our interval16s. +// +// If key is alreadyPresent, then whichInterval16 tells +// you where. +// +// If key is not already present, then whichInterval16 is +// set as follows: +// +// a) whichInterval16 == len(rc.iv)-1 if key is beyond our +// last interval16 in rc.iv; +// +// b) whichInterval16 == -1 if key is before our first +// interval16 in rc.iv; +// +// c) whichInterval16 is set to the minimum index of rc.iv +// which comes strictly before the key; +// so rc.iv[whichInterval16].last < key, +// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start +// (Note that whichInterval16+1 won't exist when +// whichInterval16 is the last interval.) +// +// runContainer16.search always returns whichInterval16 < len(rc.iv). +// +// If not nil, opts can be used to further restrict +// the search space. +// +func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval16 int64, alreadyPresent bool, numCompares int) { + n := int64(len(rc.iv)) + if n == 0 { + return -1, false, 0 + } + + startIndex := int64(0) + endxIndex := n + if opts != nil { + startIndex = opts.startIndex + + // let endxIndex == 0 mean no effect + if opts.endxIndex > 0 { + endxIndex = opts.endxIndex + } + } + + // sort.Search returns the smallest index i + // in [0, n) at which f(i) is true, assuming that on the range [0, n), + // f(i) == true implies f(i+1) == true. + // If there is no such index, Search returns n. + + // For correctness, this began as verbatim snippet from + // sort.Search in the Go standard lib. + // We inline our comparison function for speed, and + // annotate with numCompares + // to observe and test that extra bounds are utilized. + i, j := startIndex, endxIndex + for i < j { + h := i + (j-i)/2 // avoid overflow when computing h as the bisector + // i <= h < j + numCompares++ + if !(key < int64(rc.iv[h].start)) { + i = h + 1 + } else { + j = h + } + } + below := i + // end std lib snippet. + + // The above is a simple in-lining and annotation of: + /* below := sort.Search(n, + func(i int) bool { + return key < rc.iv[i].start + }) + */ + whichInterval16 = below - 1 + + if below == n { + // all falses => key is >= start of all interval16s + // ... so does it belong to the last interval16? + if key < int64(rc.iv[n-1].last())+1 { + // yes, it belongs to the last interval16 + alreadyPresent = true + return + } + // no, it is beyond the last interval16. + // leave alreadyPreset = false + return + } + + // INVAR: key is below rc.iv[below] + if below == 0 { + // key is before the first first interval16. + // leave alreadyPresent = false + return + } + + // INVAR: key is >= rc.iv[below-1].start and + // key is < rc.iv[below].start + + // is key in below-1 interval16? + if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last())+1 { + // yes, it is. key is in below-1 interval16. + alreadyPresent = true + return + } + + // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start + // leave alreadyPresent = false + return +} + +// cardinality returns the count of the integers stored in the +// runContainer16. +func (rc *runContainer16) cardinality() int64 { + if len(rc.iv) == 0 { + rc.card = 0 + return 0 + } + if rc.card > 0 { + return rc.card // already cached + } + // have to compute it + var n int64 + for _, p := range rc.iv { + n += p.runlen() + } + rc.card = n // cache it + return n +} + +// AsSlice decompresses the contents into a []uint16 slice. +func (rc *runContainer16) AsSlice() []uint16 { + s := make([]uint16, rc.cardinality()) + j := 0 + for _, p := range rc.iv { + for i := p.start; i <= p.last(); i++ { + s[j] = i + j++ + } + } + return s +} + +// newRunContainer16 creates an empty run container. +func newRunContainer16() *runContainer16 { + return &runContainer16{} +} + +// newRunContainer16CopyIv creates a run container, initializing +// with a copy of the supplied iv slice. +// +func newRunContainer16CopyIv(iv []interval16) *runContainer16 { + rc := &runContainer16{ + iv: make([]interval16, len(iv)), + } + copy(rc.iv, iv) + return rc +} + +func (rc *runContainer16) Clone() *runContainer16 { + rc2 := newRunContainer16CopyIv(rc.iv) + return rc2 +} + +// newRunContainer16TakeOwnership returns a new runContainer16 +// backed by the provided iv slice, which we will +// assume exclusive control over from now on. +// +func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 { + rc := &runContainer16{ + iv: iv, + } + return rc +} + +const baseRc16Size = int(unsafe.Sizeof(runContainer16{})) +const perIntervalRc16Size = int(unsafe.Sizeof(interval16{})) + +const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0))) + +// see also runContainer16SerializedSizeInBytes(numRuns int) int + +// getSizeInBytes returns the number of bytes of memory +// required by this runContainer16. +func (rc *runContainer16) getSizeInBytes() int { + return perIntervalRc16Size*len(rc.iv) + baseRc16Size +} + +// runContainer16SerializedSizeInBytes returns the number of bytes of disk +// required to hold numRuns in a runContainer16. +func runContainer16SerializedSizeInBytes(numRuns int) int { + return perIntervalRc16Size*numRuns + baseDiskRc16Size +} + +// Add adds a single value k to the set. +func (rc *runContainer16) Add(k uint16) (wasNew bool) { + // TODO comment from runContainer16.java: + // it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).add(k) + // but note that some unit tests use this method to build up test + // runcontainers without calling runOptimize + + k64 := int64(k) + + index, present, _ := rc.search(k64, nil) + if present { + return // already there + } + wasNew = true + + // increment card if it is cached already + if rc.card > 0 { + rc.card++ + } + n := int64(len(rc.iv)) + if index == -1 { + // we may need to extend the first run + if n > 0 { + if rc.iv[0].start == k+1 { + rc.iv[0].start = k + rc.iv[0].length++ + return + } + } + // nope, k stands alone, starting the new first interval16. + rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...) + return + } + + // are we off the end? handle both index == n and index == n-1: + if index >= n-1 { + if int64(rc.iv[n-1].last())+1 == k64 { + rc.iv[n-1].length++ + return + } + rc.iv = append(rc.iv, newInterval16Range(k, k)) + return + } + + // INVAR: index and index+1 both exist, and k goes between them. + // + // Now: add k into the middle, + // possibly fusing with index or index+1 interval16 + // and possibly resulting in fusing of two interval16s + // that had a one integer gap. + + left := index + right := index + 1 + + // are we fusing left and right by adding k? + if int64(rc.iv[left].last())+1 == k64 && int64(rc.iv[right].start) == k64+1 { + // fuse into left + rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start + // remove redundant right + rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...) + return + } + + // are we an addition to left? + if int64(rc.iv[left].last())+1 == k64 { + // yes + rc.iv[left].length++ + return + } + + // are we an addition to right? + if int64(rc.iv[right].start) == k64+1 { + // yes + rc.iv[right].start = k + rc.iv[right].length++ + return + } + + // k makes a standalone new interval16, inserted in the middle + tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...) + rc.iv = append(rc.iv[:left+1], tail...) + return +} + +//msgp:ignore runIterator + +// runIterator16 advice: you must call Next() at least once +// before calling Cur(); and you should call HasNext() +// before calling Next() to insure there are contents. +type runIterator16 struct { + rc *runContainer16 + curIndex int64 + curPosInIndex uint16 + curSeq int64 +} + +// newRunIterator16 returns a new empty run container. +func (rc *runContainer16) newRunIterator16() *runIterator16 { + return &runIterator16{rc: rc, curIndex: -1} +} + +// HasNext returns false if calling Next will panic. It +// returns true when there is at least one more value +// available in the iteration sequence. +func (ri *runIterator16) hasNext() bool { + if len(ri.rc.iv) == 0 { + return false + } + if ri.curIndex == -1 { + return true + } + return ri.curSeq+1 < ri.rc.cardinality() +} + +// cur returns the current value pointed to by the iterator. +func (ri *runIterator16) cur() uint16 { + return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex +} + +// Next returns the next value in the iteration sequence. +func (ri *runIterator16) next() uint16 { + if !ri.hasNext() { + panic("no Next available") + } + if ri.curIndex >= int64(len(ri.rc.iv)) { + panic("runIterator.Next() going beyond what is available") + } + if ri.curIndex == -1 { + // first time is special + ri.curIndex = 0 + } else { + ri.curPosInIndex++ + if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last())+1 { + ri.curPosInIndex = 0 + ri.curIndex++ + } + ri.curSeq++ + } + return ri.cur() +} + +// remove removes the element that the iterator +// is on from the run container. You can use +// Cur if you want to double check what is about +// to be deleted. +func (ri *runIterator16) remove() uint16 { + n := ri.rc.cardinality() + if n == 0 { + panic("runIterator.Remove called on empty runContainer16") + } + cur := ri.cur() + + ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq) + return cur +} + +type manyRunIterator16 struct { + rc *runContainer16 + curIndex int64 + curPosInIndex uint16 + curSeq int64 +} + +func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 { + return &manyRunIterator16{rc: rc, curIndex: -1} +} + +func (ri *manyRunIterator16) hasNext() bool { + if len(ri.rc.iv) == 0 { + return false + } + if ri.curIndex == -1 { + return true + } + return ri.curSeq+1 < ri.rc.cardinality() +} + +// hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany +func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int { + n := 0 + if !ri.hasNext() { + return n + } + // start and end are inclusive + for n < len(buf) { + if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 { + ri.curPosInIndex = 0 + ri.curIndex++ + if ri.curIndex == int64(len(ri.rc.iv)) { + break + } + buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs + if ri.curIndex != 0 { + ri.curSeq += 1 + } + n += 1 + // not strictly necessarily due to len(buf)-n min check, but saves some work + continue + } + // add as many as you can from this seq + moreVals := minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n) + + base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs + + // allows BCE + buf2 := buf[n : n+moreVals] + for i := range buf2 { + buf2[i] = base + uint32(i) + } + + // update values + ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16 + ri.curSeq += int64(moreVals) + n += moreVals + } + return n +} + +// remove removes key from the container. +func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) { + + var index int64 + var curSeq int64 + index, wasPresent, _ = rc.search(int64(key), nil) + if !wasPresent { + return // already removed, nothing to do. + } + pos := key - rc.iv[index].start + rc.deleteAt(&index, &pos, &curSeq) + return +} + +// internal helper functions + +func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16, curSeq *int64) { + rc.card-- + *curSeq-- + ci := *curIndex + pos := *curPosInIndex + + // are we first, last, or in the middle of our interval16? + switch { + case pos == 0: + if int64(rc.iv[ci].length) == 0 { + // our interval disappears + rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...) + // curIndex stays the same, since the delete did + // the advance for us. + *curPosInIndex = 0 + } else { + rc.iv[ci].start++ // no longer overflowable + rc.iv[ci].length-- + } + case pos == rc.iv[ci].length: + // length + rc.iv[ci].length-- + // our interval16 cannot disappear, else we would have been pos == 0, case first above. + *curPosInIndex-- + // if we leave *curIndex alone, then Next() will work properly even after the delete. + default: + //middle + // split into two, adding an interval16 + new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1) + + new1start := int64(rc.iv[ci].start+*curPosInIndex) + 1 + if new1start > int64(MaxUint16) { + panic("overflow?!?!") + } + new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last()) + tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...) + rc.iv = append(rc.iv[:ci], tail...) + // update curIndex and curPosInIndex + *curIndex++ + *curPosInIndex = 0 + } + +} + +func have4Overlap16(astart, alast, bstart, blast int64) bool { + if alast+1 <= bstart { + return false + } + return blast+1 > astart +} + +func intersectWithLeftover16(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval16) { + if !have4Overlap16(astart, alast, bstart, blast) { + return + } + isOverlap = true + + // do the intersection: + if bstart > astart { + intersection.start = uint16(bstart) + } else { + intersection.start = uint16(astart) + } + + switch { + case blast < alast: + isLeftoverA = true + leftoverstart = blast + 1 + intersection.length = uint16(blast) - intersection.start + case alast < blast: + isLeftoverB = true + leftoverstart = alast + 1 + intersection.length = uint16(alast) - intersection.start + default: + // alast == blast + intersection.length = uint16(alast) - intersection.start + } + + return +} + +func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) { + + rc.myOpts.startIndex = startIndex + rc.myOpts.endxIndex = 0 + + w, _, _ := rc.search(key, &rc.myOpts) + // rc.search always returns w < len(rc.iv) + if w < startIndex { + // not found and comes before lower bound startIndex, + // so just use the lower bound. + if startIndex == int64(len(rc.iv)) { + // also this bump up means that we are done + return startIndex, true + } + return startIndex, false + } + + return w, false +} + +func sliceToString16(m []interval16) string { + s := "" + for i := range m { + s += fmt.Sprintf("%v: %s, ", i, m[i]) + } + return s +} + +// selectInt16 returns the j-th value in the container. +// We panic of j is out of bounds. +func (rc *runContainer16) selectInt16(j uint16) int { + n := rc.cardinality() + if int64(j) > n { + panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) + } + + var offset int64 + for k := range rc.iv { + nextOffset := offset + rc.iv[k].runlen() + 1 + if nextOffset > int64(j) { + return int(int64(rc.iv[k].start) + (int64(j) - offset)) + } + offset = nextOffset + } + panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n)) +} + +// helper for invert +func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 { + cur := rc.iv[lastIdx] + if cur.last() == MaxUint16 { + if cur.start == origin { + return nil // empty container + } + return []interval16{newInterval16Range(origin, cur.start-1)} + } + if cur.start == origin { + return []interval16{newInterval16Range(cur.last()+1, MaxUint16)} + } + // invert splits + return []interval16{ + newInterval16Range(origin, cur.start-1), + newInterval16Range(cur.last()+1, MaxUint16), + } +} + +// invert returns a new container (not inplace), that is +// the inversion of rc. For each bit b in rc, the +// returned value has !b +func (rc *runContainer16) invert() *runContainer16 { + ni := len(rc.iv) + var m []interval16 + switch ni { + case 0: + return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}} + case 1: + return &runContainer16{iv: rc.invertlastInterval(0, 0)} + } + var invstart int64 + ult := ni - 1 + for i, cur := range rc.iv { + if i == ult { + // invertlastInteval will add both intervals (b) and (c) in + // diagram below. + m = append(m, rc.invertlastInterval(uint16(invstart), i)...) + break + } + // INVAR: i and cur are not the last interval, there is a next at i+1 + // + // ........[cur.start, cur.last] ...... [next.start, next.last].... + // ^ ^ ^ + // (a) (b) (c) + // + // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it. + if cur.start > 0 { + m = append(m, newInterval16Range(uint16(invstart), cur.start-1)) + } + invstart = int64(cur.last() + 1) + } + return &runContainer16{iv: m} +} + +func (iv interval16) equal(b interval16) bool { + return iv.start == b.start && iv.length == b.length +} + +func (iv interval16) isSuperSetOf(b interval16) bool { + return iv.start <= b.start && b.last() <= iv.last() +} + +func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int64) { + isect, isEmpty := intersectInterval16s(iv, del) + + if isEmpty { + return nil, 0 + } + if del.isSuperSetOf(iv) { + return nil, iv.runlen() + } + + switch { + case isect.start > iv.start && isect.last() < iv.last(): + new0 := newInterval16Range(iv.start, isect.start-1) + new1 := newInterval16Range(isect.last()+1, iv.last()) + return []interval16{new0, new1}, isect.runlen() + case isect.start == iv.start: + return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen() + default: + return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen() + } +} + +func (rc *runContainer16) isubtract(del interval16) { + origiv := make([]interval16, len(rc.iv)) + copy(origiv, rc.iv) + n := int64(len(rc.iv)) + if n == 0 { + return // already done. + } + + _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del) + if isEmpty { + return // done + } + + // INVAR there is some intersection between rc and del + istart, startAlready, _ := rc.search(int64(del.start), nil) + ilast, lastAlready, _ := rc.search(int64(del.last()), nil) + rc.card = -1 + if istart == -1 { + if ilast == n-1 && !lastAlready { + rc.iv = nil + return + } + } + // some intervals will remain + switch { + case startAlready && lastAlready: + res0, _ := rc.iv[istart].subtractInterval(del) + + // would overwrite values in iv b/c res0 can have len 2. so + // write to origiv instead. + lost := 1 + ilast - istart + changeSize := int64(len(res0)) - lost + newSize := int64(len(rc.iv)) + changeSize + + // rc.iv = append(pre, caboose...) + // return + + if ilast != istart { + res1, _ := rc.iv[ilast].subtractInterval(del) + res0 = append(res0, res1...) + changeSize = int64(len(res0)) - lost + newSize = int64(len(rc.iv)) + changeSize + } + switch { + case changeSize < 0: + // shrink + copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:]) + copy(rc.iv[istart:istart+int64(len(res0))], res0) + rc.iv = rc.iv[:newSize] + return + case changeSize == 0: + // stay the same + copy(rc.iv[istart:istart+int64(len(res0))], res0) + return + default: + // changeSize > 0 is only possible when ilast == istart. + // Hence we now know: changeSize == 1 and len(res0) == 2 + rc.iv = append(rc.iv, interval16{}) + // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize] + + // copy the tail into place + copy(rc.iv[ilast+2:], rc.iv[ilast+1:]) + // copy the new item(s) into place + copy(rc.iv[istart:istart+2], res0) + return + } + + case !startAlready && !lastAlready: + // we get to discard whole intervals + + // from the search() definition: + + // if del.start is not present, then istart is + // set as follows: + // + // a) istart == n-1 if del.start is beyond our + // last interval16 in rc.iv; + // + // b) istart == -1 if del.start is before our first + // interval16 in rc.iv; + // + // c) istart is set to the minimum index of rc.iv + // which comes strictly before the del.start; + // so del.start > rc.iv[istart].last, + // and if istart+1 exists, then del.start < rc.iv[istart+1].startx + + // if del.last is not present, then ilast is + // set as follows: + // + // a) ilast == n-1 if del.last is beyond our + // last interval16 in rc.iv; + // + // b) ilast == -1 if del.last is before our first + // interval16 in rc.iv; + // + // c) ilast is set to the minimum index of rc.iv + // which comes strictly before the del.last; + // so del.last > rc.iv[ilast].last, + // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start + + // INVAR: istart >= 0 + pre := rc.iv[:istart+1] + if ilast == n-1 { + rc.iv = pre + return + } + // INVAR: ilast < n-1 + lost := ilast - istart + changeSize := -lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + rc.iv = rc.iv[:newSize] + return + + case startAlready && !lastAlready: + // we can only shrink or stay the same size + // i.e. we either eliminate the whole interval, + // or just cut off the right side. + res0, _ := rc.iv[istart].subtractInterval(del) + if len(res0) > 0 { + // len(res) must be 1 + rc.iv[istart] = res0[0] + } + lost := 1 + (ilast - istart) + changeSize := int64(len(res0)) - lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + rc.iv = rc.iv[:newSize] + return + + case !startAlready && lastAlready: + // we can only shrink or stay the same size + res1, _ := rc.iv[ilast].subtractInterval(del) + lost := ilast - istart + changeSize := int64(len(res1)) - lost + newSize := int64(len(rc.iv)) + changeSize + if changeSize != 0 { + // move the tail first to make room for res1 + copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:]) + } + copy(rc.iv[istart+1:], res1) + rc.iv = rc.iv[:newSize] + return + } +} + +// compute rc minus b, and return the result as a new value (not inplace). +// port of run_container_andnot from CRoaring... +// https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496 +func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 { + + if len(b.iv) == 0 || len(rc.iv) == 0 { + return rc + } + + dst := newRunContainer16() + apos := 0 + bpos := 0 + + a := rc + + astart := a.iv[apos].start + alast := a.iv[apos].last() + bstart := b.iv[bpos].start + blast := b.iv[bpos].last() + + alen := len(a.iv) + blen := len(b.iv) + + for apos < alen && bpos < blen { + switch { + case alast < bstart: + // output the first run + dst.iv = append(dst.iv, newInterval16Range(astart, alast)) + apos++ + if apos < alen { + astart = a.iv[apos].start + alast = a.iv[apos].last() + } + case blast < astart: + // exit the second run + bpos++ + if bpos < blen { + bstart = b.iv[bpos].start + blast = b.iv[bpos].last() + } + default: + // a: [ ] + // b: [ ] + // alast >= bstart + // blast >= astart + if astart < bstart { + dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1)) + } + if alast > blast { + astart = blast + 1 + } else { + apos++ + if apos < alen { + astart = a.iv[apos].start + alast = a.iv[apos].last() + } + } + } + } + if apos < alen { + dst.iv = append(dst.iv, newInterval16Range(astart, alast)) + apos++ + if apos < alen { + dst.iv = append(dst.iv, a.iv[apos:]...) + } + } + + return dst +} + +func (rc *runContainer16) numberOfRuns() (nr int) { + return len(rc.iv) +} + +func (rc *runContainer16) containerType() contype { + return run16Contype +} + +func (rc *runContainer16) equals16(srb *runContainer16) bool { + //p("both rc16") + // Check if the containers are the same object. + if rc == srb { + //p("same object") + return true + } + + if len(srb.iv) != len(rc.iv) { + //p("iv len differ") + return false + } + + for i, v := range rc.iv { + if v != srb.iv[i] { + //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i]) + return false + } + } + //p("all intervals same, returning true") + return true +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle16_gen.go b/vendor/github.com/RoaringBitmap/roaring/rle16_gen.go new file mode 100644 index 0000000000..05bf4463f1 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rle16_gen.go @@ -0,0 +1,1126 @@ +package roaring + +// NOTE: THIS FILE WAS PRODUCED BY THE +// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) +// DO NOT EDIT + +import "github.com/tinylib/msgp/msgp" + +// DecodeMsg implements msgp.Decodable +func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zbai uint32 + zbai, err = dc.ReadMapHeader() + if err != nil { + return + } + for zbai > 0 { + zbai-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "runstart": + z.runstart, err = dc.ReadUint16() + if err != nil { + return + } + case "runlen": + z.runlen, err = dc.ReadUint16() + if err != nil { + return + } + case "actuallyAdded": + z.actuallyAdded, err = dc.ReadUint16() + if err != nil { + return + } + case "m": + var zcmr uint32 + zcmr, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.m) >= int(zcmr) { + z.m = (z.m)[:zcmr] + } else { + z.m = make([]interval16, zcmr) + } + for zxvk := range z.m { + var zajw uint32 + zajw, err = dc.ReadMapHeader() + if err != nil { + return + } + for zajw > 0 { + zajw-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.m[zxvk].start, err = dc.ReadUint16() + if err != nil { + return + } + case "last": + z.m[zxvk].length, err = dc.ReadUint16() + z.m[zxvk].length -= z.m[zxvk].start + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "rc": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer16) + } + var zwht uint32 + zwht, err = dc.ReadMapHeader() + if err != nil { + return + } + for zwht > 0 { + zwht-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zhct uint32 + zhct, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.rc.iv) >= int(zhct) { + z.rc.iv = (z.rc.iv)[:zhct] + } else { + z.rc.iv = make([]interval16, zhct) + } + for zbzg := range z.rc.iv { + var zcua uint32 + zcua, err = dc.ReadMapHeader() + if err != nil { + return + } + for zcua > 0 { + zcua-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.rc.iv[zbzg].start, err = dc.ReadUint16() + if err != nil { + return + } + case "last": + z.rc.iv[zbzg].length, err = dc.ReadUint16() + z.rc.iv[zbzg].length -= z.rc.iv[zbzg].start + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "card": + z.rc.card, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 5 + // write "runstart" + err = en.Append(0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.runstart) + if err != nil { + return + } + // write "runlen" + err = en.Append(0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) + if err != nil { + return err + } + err = en.WriteUint16(z.runlen) + if err != nil { + return + } + // write "actuallyAdded" + err = en.Append(0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) + if err != nil { + return err + } + err = en.WriteUint16(z.actuallyAdded) + if err != nil { + return + } + // write "m" + err = en.Append(0xa1, 0x6d) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.m))) + if err != nil { + return + } + for zxvk := range z.m { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.m[zxvk].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.m[zxvk].last()) + if err != nil { + return + } + } + // write "rc" + err = en.Append(0xa2, 0x72, 0x63) + if err != nil { + return err + } + if z.rc == nil { + err = en.WriteNil() + if err != nil { + return + } + } else { + // map header, size 2 + // write "iv" + err = en.Append(0x82, 0xa2, 0x69, 0x76) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.rc.iv))) + if err != nil { + return + } + for zbzg := range z.rc.iv { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.rc.iv[zbzg].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.rc.iv[zbzg].last()) + if err != nil { + return + } + } + // write "card" + err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) + if err != nil { + return err + } + err = en.WriteInt64(z.rc.card) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 5 + // string "runstart" + o = append(o, 0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint16(o, z.runstart) + // string "runlen" + o = append(o, 0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) + o = msgp.AppendUint16(o, z.runlen) + // string "actuallyAdded" + o = append(o, 0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) + o = msgp.AppendUint16(o, z.actuallyAdded) + // string "m" + o = append(o, 0xa1, 0x6d) + o = msgp.AppendArrayHeader(o, uint32(len(z.m))) + for zxvk := range z.m { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint16(o, z.m[zxvk].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint16(o, z.m[zxvk].last()) + } + // string "rc" + o = append(o, 0xa2, 0x72, 0x63) + if z.rc == nil { + o = msgp.AppendNil(o) + } else { + // map header, size 2 + // string "iv" + o = append(o, 0x82, 0xa2, 0x69, 0x76) + o = msgp.AppendArrayHeader(o, uint32(len(z.rc.iv))) + for zbzg := range z.rc.iv { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint16(o, z.rc.iv[zbzg].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint16(o, z.rc.iv[zbzg].last()) + } + // string "card" + o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) + o = msgp.AppendInt64(o, z.rc.card) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zxhx uint32 + zxhx, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zxhx > 0 { + zxhx-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "runstart": + z.runstart, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "runlen": + z.runlen, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "actuallyAdded": + z.actuallyAdded, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "m": + var zlqf uint32 + zlqf, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.m) >= int(zlqf) { + z.m = (z.m)[:zlqf] + } else { + z.m = make([]interval16, zlqf) + } + for zxvk := range z.m { + var zdaf uint32 + zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zdaf > 0 { + zdaf-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.m[zxvk].start, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "last": + z.m[zxvk].length, bts, err = msgp.ReadUint16Bytes(bts) + z.m[zxvk].length -= z.m[zxvk].start + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "rc": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer16) + } + var zpks uint32 + zpks, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zpks > 0 { + zpks-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zjfb uint32 + zjfb, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.rc.iv) >= int(zjfb) { + z.rc.iv = (z.rc.iv)[:zjfb] + } else { + z.rc.iv = make([]interval16, zjfb) + } + for zbzg := range z.rc.iv { + var zcxo uint32 + zcxo, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zcxo > 0 { + zcxo-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.rc.iv[zbzg].start, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "last": + z.rc.iv[zbzg].length, bts, err = msgp.ReadUint16Bytes(bts) + z.rc.iv[zbzg].length -= z.rc.iv[zbzg].start + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "card": + z.rc.card, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *addHelper16) Msgsize() (s int) { + s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 + if z.rc == nil { + s += msgp.NilSize + } else { + s += 1 + 3 + msgp.ArrayHeaderSize + (len(z.rc.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size + } + return +} + +// DecodeMsg implements msgp.Decodable +func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zeff uint32 + zeff, err = dc.ReadMapHeader() + if err != nil { + return + } + for zeff > 0 { + zeff-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.start, err = dc.ReadUint16() + if err != nil { + return + } + case "last": + z.length, err = dc.ReadUint16() + z.length = -z.start + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.last()) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint16(o, z.start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint16(o, z.last()) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zrsw uint32 + zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zrsw > 0 { + zrsw-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.start, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "last": + z.length, bts, err = msgp.ReadUint16Bytes(bts) + z.length -= z.start + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z interval16) Msgsize() (s int) { + s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zdnj uint32 + zdnj, err = dc.ReadMapHeader() + if err != nil { + return + } + for zdnj > 0 { + zdnj-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zobc uint32 + zobc, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.iv) >= int(zobc) { + z.iv = (z.iv)[:zobc] + } else { + z.iv = make([]interval16, zobc) + } + for zxpk := range z.iv { + var zsnv uint32 + zsnv, err = dc.ReadMapHeader() + if err != nil { + return + } + for zsnv > 0 { + zsnv-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.iv[zxpk].start, err = dc.ReadUint16() + if err != nil { + return + } + case "last": + z.iv[zxpk].length, err = dc.ReadUint16() + z.iv[zxpk].length -= z.iv[zxpk].start + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "card": + z.card, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "iv" + err = en.Append(0x82, 0xa2, 0x69, 0x76) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.iv))) + if err != nil { + return + } + for zxpk := range z.iv { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.iv[zxpk].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint16(z.iv[zxpk].last()) + if err != nil { + return + } + } + // write "card" + err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) + if err != nil { + return err + } + err = en.WriteInt64(z.card) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "iv" + o = append(o, 0x82, 0xa2, 0x69, 0x76) + o = msgp.AppendArrayHeader(o, uint32(len(z.iv))) + for zxpk := range z.iv { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint16(o, z.iv[zxpk].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint16(o, z.iv[zxpk].last()) + } + // string "card" + o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) + o = msgp.AppendInt64(o, z.card) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zkgt uint32 + zkgt, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zkgt > 0 { + zkgt-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zema uint32 + zema, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.iv) >= int(zema) { + z.iv = (z.iv)[:zema] + } else { + z.iv = make([]interval16, zema) + } + for zxpk := range z.iv { + var zpez uint32 + zpez, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zpez > 0 { + zpez-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.iv[zxpk].start, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "last": + z.iv[zxpk].length, bts, err = msgp.ReadUint16Bytes(bts) + z.iv[zxpk].length -= z.iv[zxpk].start + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "card": + z.card, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *runContainer16) Msgsize() (s int) { + s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zqke uint32 + zqke, err = dc.ReadMapHeader() + if err != nil { + return + } + for zqke > 0 { + zqke-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "rc": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer16) + } + err = z.rc.DecodeMsg(dc) + if err != nil { + return + } + } + case "curIndex": + z.curIndex, err = dc.ReadInt64() + if err != nil { + return + } + case "curPosInIndex": + z.curPosInIndex, err = dc.ReadUint16() + if err != nil { + return + } + case "curSeq": + z.curSeq, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 4 + // write "rc" + err = en.Append(0x84, 0xa2, 0x72, 0x63) + if err != nil { + return err + } + if z.rc == nil { + err = en.WriteNil() + if err != nil { + return + } + } else { + err = z.rc.EncodeMsg(en) + if err != nil { + return + } + } + // write "curIndex" + err = en.Append(0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) + if err != nil { + return err + } + err = en.WriteInt64(z.curIndex) + if err != nil { + return + } + // write "curPosInIndex" + err = en.Append(0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) + if err != nil { + return err + } + err = en.WriteUint16(z.curPosInIndex) + if err != nil { + return + } + // write "curSeq" + err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) + if err != nil { + return err + } + err = en.WriteInt64(z.curSeq) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 4 + // string "rc" + o = append(o, 0x84, 0xa2, 0x72, 0x63) + if z.rc == nil { + o = msgp.AppendNil(o) + } else { + o, err = z.rc.MarshalMsg(o) + if err != nil { + return + } + } + // string "curIndex" + o = append(o, 0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) + o = msgp.AppendInt64(o, z.curIndex) + // string "curPosInIndex" + o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) + o = msgp.AppendUint16(o, z.curPosInIndex) + // string "curSeq" + o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) + o = msgp.AppendInt64(o, z.curSeq) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zqyh uint32 + zqyh, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zqyh > 0 { + zqyh-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "rc": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer16) + } + bts, err = z.rc.UnmarshalMsg(bts) + if err != nil { + return + } + } + case "curIndex": + z.curIndex, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + case "curPosInIndex": + z.curPosInIndex, bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + case "curSeq": + z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *runIterator16) Msgsize() (s int) { + s = 1 + 3 + if z.rc == nil { + s += msgp.NilSize + } else { + s += z.rc.Msgsize() + } + s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { + var zjpj uint32 + zjpj, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap((*z)) >= int(zjpj) { + (*z) = (*z)[:zjpj] + } else { + (*z) = make(uint16Slice, zjpj) + } + for zywj := range *z { + (*z)[zywj], err = dc.ReadUint16() + if err != nil { + return + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteArrayHeader(uint32(len(z))) + if err != nil { + return + } + for zzpf := range z { + err = en.WriteUint16(z[zzpf]) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendArrayHeader(o, uint32(len(z))) + for zzpf := range z { + o = msgp.AppendUint16(o, z[zzpf]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { + var zgmo uint32 + zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap((*z)) >= int(zgmo) { + (*z) = (*z)[:zgmo] + } else { + (*z) = make(uint16Slice, zgmo) + } + for zrfe := range *z { + (*z)[zrfe], bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z uint16Slice) Msgsize() (s int) { + s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) + return +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rle_gen.go b/vendor/github.com/RoaringBitmap/roaring/rle_gen.go new file mode 100644 index 0000000000..bc9da75f3a --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rle_gen.go @@ -0,0 +1,1118 @@ +package roaring + +// NOTE: THIS FILE WAS PRODUCED BY THE +// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) +// DO NOT EDIT + +import "github.com/tinylib/msgp/msgp" + +// DecodeMsg implements msgp.Decodable +func (z *addHelper32) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zbai uint32 + zbai, err = dc.ReadMapHeader() + if err != nil { + return + } + for zbai > 0 { + zbai-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "runstart": + z.runstart, err = dc.ReadUint32() + if err != nil { + return + } + case "runlen": + z.runlen, err = dc.ReadUint32() + if err != nil { + return + } + case "actuallyAdded": + z.actuallyAdded, err = dc.ReadUint32() + if err != nil { + return + } + case "m": + var zcmr uint32 + zcmr, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.m) >= int(zcmr) { + z.m = (z.m)[:zcmr] + } else { + z.m = make([]interval32, zcmr) + } + for zxvk := range z.m { + var zajw uint32 + zajw, err = dc.ReadMapHeader() + if err != nil { + return + } + for zajw > 0 { + zajw-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.m[zxvk].start, err = dc.ReadUint32() + if err != nil { + return + } + case "last": + z.m[zxvk].last, err = dc.ReadUint32() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "rc": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer32) + } + var zwht uint32 + zwht, err = dc.ReadMapHeader() + if err != nil { + return + } + for zwht > 0 { + zwht-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zhct uint32 + zhct, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.rc.iv) >= int(zhct) { + z.rc.iv = (z.rc.iv)[:zhct] + } else { + z.rc.iv = make([]interval32, zhct) + } + for zbzg := range z.rc.iv { + var zcua uint32 + zcua, err = dc.ReadMapHeader() + if err != nil { + return + } + for zcua > 0 { + zcua-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.rc.iv[zbzg].start, err = dc.ReadUint32() + if err != nil { + return + } + case "last": + z.rc.iv[zbzg].last, err = dc.ReadUint32() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "card": + z.rc.card, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *addHelper32) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 5 + // write "runstart" + err = en.Append(0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.runstart) + if err != nil { + return + } + // write "runlen" + err = en.Append(0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) + if err != nil { + return err + } + err = en.WriteUint32(z.runlen) + if err != nil { + return + } + // write "actuallyAdded" + err = en.Append(0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) + if err != nil { + return err + } + err = en.WriteUint32(z.actuallyAdded) + if err != nil { + return + } + // write "m" + err = en.Append(0xa1, 0x6d) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.m))) + if err != nil { + return + } + for zxvk := range z.m { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.m[zxvk].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.m[zxvk].last) + if err != nil { + return + } + } + // write "rc" + err = en.Append(0xa2, 0x72, 0x63) + if err != nil { + return err + } + if z.rc == nil { + err = en.WriteNil() + if err != nil { + return + } + } else { + // map header, size 2 + // write "iv" + err = en.Append(0x82, 0xa2, 0x69, 0x76) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.rc.iv))) + if err != nil { + return + } + for zbzg := range z.rc.iv { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.rc.iv[zbzg].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.rc.iv[zbzg].last) + if err != nil { + return + } + } + // write "card" + err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) + if err != nil { + return err + } + err = en.WriteInt64(z.rc.card) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *addHelper32) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 5 + // string "runstart" + o = append(o, 0x85, 0xa8, 0x72, 0x75, 0x6e, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint32(o, z.runstart) + // string "runlen" + o = append(o, 0xa6, 0x72, 0x75, 0x6e, 0x6c, 0x65, 0x6e) + o = msgp.AppendUint32(o, z.runlen) + // string "actuallyAdded" + o = append(o, 0xad, 0x61, 0x63, 0x74, 0x75, 0x61, 0x6c, 0x6c, 0x79, 0x41, 0x64, 0x64, 0x65, 0x64) + o = msgp.AppendUint32(o, z.actuallyAdded) + // string "m" + o = append(o, 0xa1, 0x6d) + o = msgp.AppendArrayHeader(o, uint32(len(z.m))) + for zxvk := range z.m { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint32(o, z.m[zxvk].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint32(o, z.m[zxvk].last) + } + // string "rc" + o = append(o, 0xa2, 0x72, 0x63) + if z.rc == nil { + o = msgp.AppendNil(o) + } else { + // map header, size 2 + // string "iv" + o = append(o, 0x82, 0xa2, 0x69, 0x76) + o = msgp.AppendArrayHeader(o, uint32(len(z.rc.iv))) + for zbzg := range z.rc.iv { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint32(o, z.rc.iv[zbzg].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint32(o, z.rc.iv[zbzg].last) + } + // string "card" + o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) + o = msgp.AppendInt64(o, z.rc.card) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *addHelper32) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zxhx uint32 + zxhx, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zxhx > 0 { + zxhx-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "runstart": + z.runstart, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "runlen": + z.runlen, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "actuallyAdded": + z.actuallyAdded, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "m": + var zlqf uint32 + zlqf, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.m) >= int(zlqf) { + z.m = (z.m)[:zlqf] + } else { + z.m = make([]interval32, zlqf) + } + for zxvk := range z.m { + var zdaf uint32 + zdaf, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zdaf > 0 { + zdaf-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.m[zxvk].start, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "last": + z.m[zxvk].last, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "rc": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer32) + } + var zpks uint32 + zpks, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zpks > 0 { + zpks-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zjfb uint32 + zjfb, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.rc.iv) >= int(zjfb) { + z.rc.iv = (z.rc.iv)[:zjfb] + } else { + z.rc.iv = make([]interval32, zjfb) + } + for zbzg := range z.rc.iv { + var zcxo uint32 + zcxo, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zcxo > 0 { + zcxo-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.rc.iv[zbzg].start, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "last": + z.rc.iv[zbzg].last, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "card": + z.rc.card, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *addHelper32) Msgsize() (s int) { + s = 1 + 9 + msgp.Uint32Size + 7 + msgp.Uint32Size + 14 + msgp.Uint32Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 3 + if z.rc == nil { + s += msgp.NilSize + } else { + s += 1 + 3 + msgp.ArrayHeaderSize + (len(z.rc.iv) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 5 + msgp.Int64Size + } + return +} + +// DecodeMsg implements msgp.Decodable +func (z *interval32) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zeff uint32 + zeff, err = dc.ReadMapHeader() + if err != nil { + return + } + for zeff > 0 { + zeff-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.start, err = dc.ReadUint32() + if err != nil { + return + } + case "last": + z.last, err = dc.ReadUint32() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z interval32) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.last) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z interval32) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint32(o, z.start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint32(o, z.last) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *interval32) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zrsw uint32 + zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zrsw > 0 { + zrsw-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.start, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "last": + z.last, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z interval32) Msgsize() (s int) { + s = 1 + 6 + msgp.Uint32Size + 5 + msgp.Uint32Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *runContainer32) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zdnj uint32 + zdnj, err = dc.ReadMapHeader() + if err != nil { + return + } + for zdnj > 0 { + zdnj-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zobc uint32 + zobc, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.iv) >= int(zobc) { + z.iv = (z.iv)[:zobc] + } else { + z.iv = make([]interval32, zobc) + } + for zxpk := range z.iv { + var zsnv uint32 + zsnv, err = dc.ReadMapHeader() + if err != nil { + return + } + for zsnv > 0 { + zsnv-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.iv[zxpk].start, err = dc.ReadUint32() + if err != nil { + return + } + case "last": + z.iv[zxpk].last, err = dc.ReadUint32() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + case "card": + z.card, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *runContainer32) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "iv" + err = en.Append(0x82, 0xa2, 0x69, 0x76) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.iv))) + if err != nil { + return + } + for zxpk := range z.iv { + // map header, size 2 + // write "start" + err = en.Append(0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.iv[zxpk].start) + if err != nil { + return + } + // write "last" + err = en.Append(0xa4, 0x6c, 0x61, 0x73, 0x74) + if err != nil { + return err + } + err = en.WriteUint32(z.iv[zxpk].last) + if err != nil { + return + } + } + // write "card" + err = en.Append(0xa4, 0x63, 0x61, 0x72, 0x64) + if err != nil { + return err + } + err = en.WriteInt64(z.card) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *runContainer32) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "iv" + o = append(o, 0x82, 0xa2, 0x69, 0x76) + o = msgp.AppendArrayHeader(o, uint32(len(z.iv))) + for zxpk := range z.iv { + // map header, size 2 + // string "start" + o = append(o, 0x82, 0xa5, 0x73, 0x74, 0x61, 0x72, 0x74) + o = msgp.AppendUint32(o, z.iv[zxpk].start) + // string "last" + o = append(o, 0xa4, 0x6c, 0x61, 0x73, 0x74) + o = msgp.AppendUint32(o, z.iv[zxpk].last) + } + // string "card" + o = append(o, 0xa4, 0x63, 0x61, 0x72, 0x64) + o = msgp.AppendInt64(o, z.card) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *runContainer32) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zkgt uint32 + zkgt, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zkgt > 0 { + zkgt-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "iv": + var zema uint32 + zema, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.iv) >= int(zema) { + z.iv = (z.iv)[:zema] + } else { + z.iv = make([]interval32, zema) + } + for zxpk := range z.iv { + var zpez uint32 + zpez, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zpez > 0 { + zpez-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "start": + z.iv[zxpk].start, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "last": + z.iv[zxpk].last, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + case "card": + z.card, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *runContainer32) Msgsize() (s int) { + s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint32Size + msgp.Uint32Size)) + 5 + msgp.Int64Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *runIterator32) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zqke uint32 + zqke, err = dc.ReadMapHeader() + if err != nil { + return + } + for zqke > 0 { + zqke-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "rc": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer32) + } + err = z.rc.DecodeMsg(dc) + if err != nil { + return + } + } + case "curIndex": + z.curIndex, err = dc.ReadInt64() + if err != nil { + return + } + case "curPosInIndex": + z.curPosInIndex, err = dc.ReadUint32() + if err != nil { + return + } + case "curSeq": + z.curSeq, err = dc.ReadInt64() + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *runIterator32) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 4 + // write "rc" + err = en.Append(0x84, 0xa2, 0x72, 0x63) + if err != nil { + return err + } + if z.rc == nil { + err = en.WriteNil() + if err != nil { + return + } + } else { + err = z.rc.EncodeMsg(en) + if err != nil { + return + } + } + // write "curIndex" + err = en.Append(0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) + if err != nil { + return err + } + err = en.WriteInt64(z.curIndex) + if err != nil { + return + } + // write "curPosInIndex" + err = en.Append(0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) + if err != nil { + return err + } + err = en.WriteUint32(z.curPosInIndex) + if err != nil { + return + } + // write "curSeq" + err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) + if err != nil { + return err + } + err = en.WriteInt64(z.curSeq) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *runIterator32) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 4 + // string "rc" + o = append(o, 0x84, 0xa2, 0x72, 0x63) + if z.rc == nil { + o = msgp.AppendNil(o) + } else { + o, err = z.rc.MarshalMsg(o) + if err != nil { + return + } + } + // string "curIndex" + o = append(o, 0xa8, 0x63, 0x75, 0x72, 0x49, 0x6e, 0x64, 0x65, 0x78) + o = msgp.AppendInt64(o, z.curIndex) + // string "curPosInIndex" + o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) + o = msgp.AppendUint32(o, z.curPosInIndex) + // string "curSeq" + o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) + o = msgp.AppendInt64(o, z.curSeq) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *runIterator32) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zqyh uint32 + zqyh, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zqyh > 0 { + zqyh-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "rc": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.rc = nil + } else { + if z.rc == nil { + z.rc = new(runContainer32) + } + bts, err = z.rc.UnmarshalMsg(bts) + if err != nil { + return + } + } + case "curIndex": + z.curIndex, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + case "curPosInIndex": + z.curPosInIndex, bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + case "curSeq": + z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *runIterator32) Msgsize() (s int) { + s = 1 + 3 + if z.rc == nil { + s += msgp.NilSize + } else { + s += z.rc.Msgsize() + } + s += 9 + msgp.Int64Size + 14 + msgp.Uint32Size + 7 + msgp.Int64Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *uint32Slice) DecodeMsg(dc *msgp.Reader) (err error) { + var zjpj uint32 + zjpj, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap((*z)) >= int(zjpj) { + (*z) = (*z)[:zjpj] + } else { + (*z) = make(uint32Slice, zjpj) + } + for zywj := range *z { + (*z)[zywj], err = dc.ReadUint32() + if err != nil { + return + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z uint32Slice) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteArrayHeader(uint32(len(z))) + if err != nil { + return + } + for zzpf := range z { + err = en.WriteUint32(z[zzpf]) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z uint32Slice) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendArrayHeader(o, uint32(len(z))) + for zzpf := range z { + o = msgp.AppendUint32(o, z[zzpf]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *uint32Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { + var zgmo uint32 + zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap((*z)) >= int(zgmo) { + (*z) = (*z)[:zgmo] + } else { + (*z) = make(uint32Slice, zgmo) + } + for zrfe := range *z { + (*z)[zrfe], bts, err = msgp.ReadUint32Bytes(bts) + if err != nil { + return + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z uint32Slice) Msgsize() (s int) { + s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint32Size)) + return +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rlecommon.go b/vendor/github.com/RoaringBitmap/roaring/rlecommon.go new file mode 100644 index 0000000000..133636787a --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rlecommon.go @@ -0,0 +1,163 @@ +package roaring + +import ( + "fmt" +) + +// common to rle32.go and rle16.go + +// rleVerbose controls whether p() prints show up. +// The testing package sets this based on +// testing.Verbose(). +var rleVerbose bool + +// p is a shorthand for fmt.Printf with beginning and +// trailing newlines. p() makes it easy +// to add diagnostic print statements. +func p(format string, args ...interface{}) { + if rleVerbose { + fmt.Printf("\n"+format+"\n", args...) + } +} + +// MaxUint32 is the largest uint32 value. +const MaxUint32 = 4294967295 + +// MaxUint16 is the largest 16 bit unsigned int. +// This is the largest value an interval16 can store. +const MaxUint16 = 65535 + +// searchOptions allows us to accelerate runContainer32.search with +// prior knowledge of (mostly lower) bounds. This is used by Union +// and Intersect. +type searchOptions struct { + // start here instead of at 0 + startIndex int64 + + // upper bound instead of len(rc.iv); + // endxIndex == 0 means ignore the bound and use + // endxIndex == n ==len(rc.iv) which is also + // naturally the default for search() + // when opt = nil. + endxIndex int64 +} + +// And finds the intersection of rc and b. +func (rc *runContainer32) And(b *Bitmap) *Bitmap { + out := NewBitmap() + for _, p := range rc.iv { + for i := p.start; i <= p.last; i++ { + if b.Contains(i) { + out.Add(i) + } + } + } + return out +} + +// Xor returns the exclusive-or of rc and b. +func (rc *runContainer32) Xor(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + for v := p.start; v <= p.last; v++ { + if out.Contains(v) { + out.RemoveRange(uint64(v), uint64(v+1)) + } else { + out.Add(v) + } + } + } + return out +} + +// Or returns the union of rc and b. +func (rc *runContainer32) Or(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + for v := p.start; v <= p.last; v++ { + out.Add(v) + } + } + return out +} + +// trial is used in the randomized testing of runContainers +type trial struct { + n int + percentFill float64 + ntrial int + + // only in the union test + // only subtract test + percentDelete float64 + + // only in 067 randomized operations + // we do this + 1 passes + numRandomOpsPass int + + // allow sampling range control + // only recent tests respect this. + srang *interval16 +} + +// And finds the intersection of rc and b. +func (rc *runContainer16) And(b *Bitmap) *Bitmap { + out := NewBitmap() + for _, p := range rc.iv { + plast := p.last() + for i := p.start; i <= plast; i++ { + if b.Contains(uint32(i)) { + out.Add(uint32(i)) + } + } + } + return out +} + +// Xor returns the exclusive-or of rc and b. +func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + plast := p.last() + for v := p.start; v <= plast; v++ { + w := uint32(v) + if out.Contains(w) { + out.RemoveRange(uint64(w), uint64(w+1)) + } else { + out.Add(w) + } + } + } + return out +} + +// Or returns the union of rc and b. +func (rc *runContainer16) Or(b *Bitmap) *Bitmap { + out := b.Clone() + for _, p := range rc.iv { + plast := p.last() + for v := p.start; v <= plast; v++ { + out.Add(uint32(v)) + } + } + return out +} + +//func (rc *runContainer32) and(container) container { +// panic("TODO. not yet implemented") +//} + +// serializedSizeInBytes returns the number of bytes of memory +// required by this runContainer16. This is for the +// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ +func (rc *runContainer16) serializedSizeInBytes() int { + // number of runs in one uint16, then each run + // needs two more uint16 + return 2 + len(rc.iv)*4 +} + +// serializedSizeInBytes returns the number of bytes of memory +// required by this runContainer32. +func (rc *runContainer32) serializedSizeInBytes() int { + return 4 + len(rc.iv)*8 +} diff --git a/vendor/github.com/RoaringBitmap/roaring/rlei.go b/vendor/github.com/RoaringBitmap/roaring/rlei.go new file mode 100644 index 0000000000..a15a017e47 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/rlei.go @@ -0,0 +1,695 @@ +package roaring + +/////////////////////////////////////////////////// +// +// container interface methods for runContainer16 +// +/////////////////////////////////////////////////// + +import ( + "fmt" +) + +// compile time verify we meet interface requirements +var _ container = &runContainer16{} + +func (rc *runContainer16) clone() container { + return newRunContainer16CopyIv(rc.iv) +} + +func (rc *runContainer16) minimum() uint16 { + return rc.iv[0].start // assume not empty +} + +func (rc *runContainer16) maximum() uint16 { + return rc.iv[len(rc.iv)-1].last() // assume not empty +} + +func (rc *runContainer16) isFull() bool { + return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) +} + +func (rc *runContainer16) and(a container) container { + if rc.isFull() { + return a.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.intersect(c) + case *arrayContainer: + return rc.andArray(c) + case *bitmapContainer: + return rc.andBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) andCardinality(a container) int { + switch c := a.(type) { + case *runContainer16: + return int(rc.intersectCardinality(c)) + case *arrayContainer: + return rc.andArrayCardinality(c) + case *bitmapContainer: + return rc.andBitmapContainerCardinality(c) + } + panic("unsupported container type") +} + +// andBitmapContainer finds the intersection of rc and b. +func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { + bc2 := newBitmapContainerFromRun(rc) + return bc2.andBitmap(bc) +} + +func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { + pos := 0 + answer := 0 + maxpos := ac.getCardinality() + if maxpos == 0 { + return 0 // won't happen in actual code + } + v := ac.content[pos] +mainloop: + for _, p := range rc.iv { + for v < p.start { + pos++ + if pos == maxpos { + break mainloop + } + v = ac.content[pos] + } + for v <= p.last() { + answer++ + pos++ + if pos == maxpos { + break mainloop + } + v = ac.content[pos] + } + } + return answer +} + +func (rc *runContainer16) iand(a container) container { + if rc.isFull() { + return a.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.inplaceIntersect(c) + case *arrayContainer: + return rc.andArray(c) + case *bitmapContainer: + return rc.iandBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { + // TODO: optimize by doing less allocation, possibly? + + // sect will be new + sect := rc.intersect(rc2) + *rc = *sect + return rc +} + +func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { + isect := rc.andBitmapContainer(bc) + *rc = *newRunContainer16FromContainer(isect) + return rc +} + +func (rc *runContainer16) andArray(ac *arrayContainer) container { + if len(rc.iv) == 0 { + return newArrayContainer() + } + + acCardinality := ac.getCardinality() + c := newArrayContainerCapacity(acCardinality) + + for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { + iv := rc.iv[rlePos] + arrayVal := ac.content[arrayPos] + + for iv.last() < arrayVal { + rlePos++ + if rlePos == len(rc.iv) { + return c + } + iv = rc.iv[rlePos] + } + + if iv.start > arrayVal { + arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) + } else { + c.content = append(c.content, arrayVal) + arrayPos++ + } + } + return c +} + +func (rc *runContainer16) andNot(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.andNotArray(c) + case *bitmapContainer: + return rc.andNotBitmap(c) + case *runContainer16: + return rc.andNotRunContainer16(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { + k := 0 + var val int64 + for _, p := range rc.iv { + n := p.runlen() + for j := int64(0); j < n; j++ { + val = int64(p.start) + j + x[k+i] = uint32(val) | mask + k++ + } + } +} + +func (rc *runContainer16) getShortIterator() shortIterable { + return rc.newRunIterator16() +} + +func (rc *runContainer16) getManyIterator() manyIterable { + return rc.newManyRunIterator16() +} + +// add the values in the range [firstOfRange, endx). endx +// is still abe to express 2^16 because it is an int not an uint16. +func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { + + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) + } + addme := newRunContainer16TakeOwnership([]interval16{ + { + start: uint16(firstOfRange), + length: uint16(endx - 1 - firstOfRange), + }, + }) + *rc = *rc.union(addme) + return rc +} + +// remove the values in the range [firstOfRange,endx) +func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ + " nothing to do.", firstOfRange, endx)) + //return rc + } + x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) + rc.isubtract(x) + return rc +} + +// not flip the values in the range [firstOfRange,endx) +func (rc *runContainer16) not(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) + } + + return rc.Not(firstOfRange, endx) +} + +// Not flips the values in the range [firstOfRange,endx). +// This is not inplace. Only the returned value has the flipped bits. +// +// Currently implemented as (!A intersect B) union (A minus B), +// where A is rc, and B is the supplied [firstOfRange, endx) interval. +// +// TODO(time optimization): convert this to a single pass +// algorithm by copying AndNotRunContainer16() and modifying it. +// Current routine is correct but +// makes 2 more passes through the arrays than should be +// strictly necessary. Measure both ways though--this may not matter. +// +func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { + + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) + } + + if firstOfRange >= endx { + return rc.Clone() + } + + a := rc + // algo: + // (!A intersect B) union (A minus B) + + nota := a.invert() + + bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} + b := newRunContainer16TakeOwnership(bs) + + notAintersectB := nota.intersect(b) + + aMinusB := a.AndNotRunContainer16(b) + + rc2 := notAintersectB.union(aMinusB) + return rc2 +} + +// equals is now logical equals; it does not require the +// same underlying container type. +func (rc *runContainer16) equals(o container) bool { + srb, ok := o.(*runContainer16) + + if !ok { + // maybe value instead of pointer + val, valok := o.(*runContainer16) + if valok { + srb = val + ok = true + } + } + if ok { + // Check if the containers are the same object. + if rc == srb { + return true + } + + if len(srb.iv) != len(rc.iv) { + return false + } + + for i, v := range rc.iv { + if v != srb.iv[i] { + return false + } + } + return true + } + + // use generic comparison + if o.getCardinality() != rc.getCardinality() { + return false + } + rit := rc.getShortIterator() + bit := o.getShortIterator() + + //k := 0 + for rit.hasNext() { + if bit.next() != rit.next() { + return false + } + //k++ + } + return true +} + +func (rc *runContainer16) iaddReturnMinimized(x uint16) container { + rc.Add(x) + return rc +} + +func (rc *runContainer16) iadd(x uint16) (wasNew bool) { + return rc.Add(x) +} + +func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { + rc.removeKey(x) + return rc +} + +func (rc *runContainer16) iremove(x uint16) bool { + return rc.removeKey(x) +} + +func (rc *runContainer16) or(a container) container { + if rc.isFull() { + return rc.clone() + } + switch c := a.(type) { + case *runContainer16: + return rc.union(c) + case *arrayContainer: + return rc.orArray(c) + case *bitmapContainer: + return rc.orBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) orCardinality(a container) int { + switch c := a.(type) { + case *runContainer16: + return int(rc.unionCardinality(c)) + case *arrayContainer: + return rc.orArrayCardinality(c) + case *bitmapContainer: + return rc.orBitmapContainerCardinality(c) + } + panic("unsupported container type") +} + +// orBitmapContainer finds the union of rc and bc. +func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { + bc2 := newBitmapContainerFromRun(rc) + return bc2.iorBitmap(bc) +} + +func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { + answer := 0 + for i := range rc.iv { + answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) + } + //bc.computeCardinality() + return answer +} + +func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { + return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) +} + +// orArray finds the union of rc and ac. +func (rc *runContainer16) orArray(ac *arrayContainer) container { + bc1 := newBitmapContainerFromRun(rc) + bc2 := ac.toBitmapContainer() + return bc1.orBitmap(bc2) +} + +// orArray finds the union of rc and ac. +func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { + return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) +} + +func (rc *runContainer16) ior(a container) container { + if rc.isFull() { + return rc + } + switch c := a.(type) { + case *runContainer16: + return rc.inplaceUnion(c) + case *arrayContainer: + return rc.iorArray(c) + case *bitmapContainer: + return rc.iorBitmapContainer(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { + p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv)) + for _, p := range rc2.iv { + last := int64(p.last()) + for i := int64(p.start); i <= last; i++ { + rc.Add(uint16(i)) + } + } + return rc +} + +func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { + + it := bc.getShortIterator() + for it.hasNext() { + rc.Add(it.next()) + } + return rc +} + +func (rc *runContainer16) iorArray(ac *arrayContainer) container { + it := ac.getShortIterator() + for it.hasNext() { + rc.Add(it.next()) + } + return rc +} + +// lazyIOR is described (not yet implemented) in +// this nice note from @lemire on +// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 +// +// Description of lazyOR and lazyIOR from @lemire: +// +// Lazy functions are optional and can be simply +// wrapper around non-lazy functions. +// +// The idea of "laziness" is as follows. It is +// inspired by the concept of lazy evaluation +// you might be familiar with (functional programming +// and all that). So a roaring bitmap is +// such that all its containers are, in some +// sense, chosen to use as little memory as +// possible. This is nice. Also, all bitsets +// are "cardinality aware" so that you can do +// fast rank/select queries, or query the +// cardinality of the whole bitmap... very fast, +// without latency. +// +// However, imagine that you are aggregating 100 +// bitmaps together. So you OR the first two, then OR +// that with the third one and so forth. Clearly, +// intermediate bitmaps don't need to be as +// compressed as possible, right? They can be +// in a "dirty state". You only need the end +// result to be in a nice state... which you +// can achieve by calling repairAfterLazy at the end. +// +// The Java/C code does something special for +// the in-place lazy OR runs. The idea is that +// instead of taking two run containers and +// generating a new one, we actually try to +// do the computation in-place through a +// technique invented by @gssiyankai (pinging him!). +// What you do is you check whether the host +// run container has lots of extra capacity. +// If it does, you move its data at the end of +// the backing array, and then you write +// the answer at the beginning. What this +// trick does is minimize memory allocations. +// +func (rc *runContainer16) lazyIOR(a container) container { + // not lazy at the moment + // TODO: make it lazy + return rc.ior(a) + + /* + switch c := a.(type) { + case *arrayContainer: + return rc.lazyIorArray(c) + case *bitmapContainer: + return rc.lazyIorBitmap(c) + case *runContainer16: + return rc.lazyIorRun16(c) + } + panic("unsupported container type") + */ +} + +// lazyOR is described above in lazyIOR. +func (rc *runContainer16) lazyOR(a container) container { + + // not lazy at the moment + // TODO: make it lazy + return rc.or(a) + + /* + switch c := a.(type) { + case *arrayContainer: + return rc.lazyOrArray(c) + case *bitmapContainer: + return rc.lazyOrBitmap(c) + case *runContainer16: + return rc.lazyOrRunContainer16(c) + } + panic("unsupported container type") + */ +} + +func (rc *runContainer16) intersects(a container) bool { + // TODO: optimize by doing inplace/less allocation, possibly? + isect := rc.and(a) + return isect.getCardinality() > 0 +} + +func (rc *runContainer16) xor(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.xorArray(c) + case *bitmapContainer: + return rc.xorBitmap(c) + case *runContainer16: + return rc.xorRunContainer16(c) + } + panic("unsupported container type") +} + +func (rc *runContainer16) iandNot(a container) container { + switch c := a.(type) { + case *arrayContainer: + return rc.iandNotArray(c) + case *bitmapContainer: + return rc.iandNotBitmap(c) + case *runContainer16: + return rc.iandNotRunContainer16(c) + } + panic("unsupported container type") +} + +// flip the values in the range [firstOfRange,endx) +func (rc *runContainer16) inot(firstOfRange, endx int) container { + if firstOfRange >= endx { + panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) + } + // TODO: minimize copies, do it all inplace; not() makes a copy. + rc = rc.Not(firstOfRange, endx) + return rc +} + +func (rc *runContainer16) getCardinality() int { + return int(rc.cardinality()) +} + +func (rc *runContainer16) rank(x uint16) int { + n := int64(len(rc.iv)) + xx := int64(x) + w, already, _ := rc.search(xx, nil) + if w < 0 { + return 0 + } + if !already && w == n-1 { + return rc.getCardinality() + } + var rnk int64 + if !already { + for i := int64(0); i <= w; i++ { + rnk += rc.iv[i].runlen() + } + return int(rnk) + } + for i := int64(0); i < w; i++ { + rnk += rc.iv[i].runlen() + } + rnk += int64(x-rc.iv[w].start) + 1 + return int(rnk) +} + +func (rc *runContainer16) selectInt(x uint16) int { + return rc.selectInt16(x) +} + +func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { + return rc.AndNotRunContainer16(b) +} + +func (rc *runContainer16) andNotArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + return rcb.andNotBitmap(acb) +} + +func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + return rcb.andNotBitmap(bc) +} + +func (rc *runContainer16) toBitmapContainer() *bitmapContainer { + p("run16 toBitmap starting; rc has %v ranges", len(rc.iv)) + bc := newBitmapContainer() + for i := range rc.iv { + bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) + } + bc.computeCardinality() + return bc +} + +func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { + rcb := rc.toBitmapContainer() + x2b := x2.toBitmapContainer() + rcb.iandNotBitmapSurely(x2b) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + rcb.iandNotBitmapSurely(acb) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + rcb.iandNotBitmapSurely(bc) + // TODO: check size and optimize the return value + // TODO: is inplace modification really required? If not, elide the copy. + rc2 := newRunContainer16FromBitmapContainer(rcb) + *rc = *rc2 + return rc +} + +func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { + rcb := rc.toBitmapContainer() + x2b := x2.toBitmapContainer() + return rcb.xorBitmap(x2b) +} + +func (rc *runContainer16) xorArray(ac *arrayContainer) container { + rcb := rc.toBitmapContainer() + acb := ac.toBitmapContainer() + return rcb.xorBitmap(acb) +} + +func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { + rcb := rc.toBitmapContainer() + return rcb.xorBitmap(bc) +} + +// convert to bitmap or array *if needed* +func (rc *runContainer16) toEfficientContainer() container { + + // runContainer16SerializedSizeInBytes(numRuns) + sizeAsRunContainer := rc.getSizeInBytes() + sizeAsBitmapContainer := bitmapContainerSizeInBytes() + card := int(rc.cardinality()) + sizeAsArrayContainer := arrayContainerSizeInBytes(card) + if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { + return rc + } + if card <= arrayDefaultMaxSize { + return rc.toArrayContainer() + } + bc := newBitmapContainerFromRun(rc) + return bc +} + +func (rc *runContainer16) toArrayContainer() *arrayContainer { + ac := newArrayContainer() + for i := range rc.iv { + ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) + } + return ac +} + +func newRunContainer16FromContainer(c container) *runContainer16 { + + switch x := c.(type) { + case *runContainer16: + return x.Clone() + case *arrayContainer: + return newRunContainer16FromArray(x) + case *bitmapContainer: + return newRunContainer16FromBitmapContainer(x) + } + panic("unsupported container type") +} diff --git a/vendor/github.com/RoaringBitmap/roaring/roaring.go b/vendor/github.com/RoaringBitmap/roaring/roaring.go new file mode 100644 index 0000000000..5045a41933 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/roaring.go @@ -0,0 +1,1345 @@ +// Package roaring is an implementation of Roaring Bitmaps in Go. +// They provide fast compressed bitmap data structures (also called bitset). +// They are ideally suited to represent sets of integers over +// relatively small ranges. +// See http://roaringbitmap.org for details. +package roaring + +import ( + "bufio" + "bytes" + "encoding/base64" + "fmt" + "io" + "strconv" +) + +// Bitmap represents a compressed bitmap where you can add integers. +type Bitmap struct { + highlowcontainer roaringArray +} + +// ToBase64 serializes a bitmap as Base64 +func (rb *Bitmap) ToBase64() (string, error) { + buf := new(bytes.Buffer) + _, err := rb.WriteTo(buf) + return base64.StdEncoding.EncodeToString(buf.Bytes()), err + +} + +// FromBase64 deserializes a bitmap from Base64 +func (rb *Bitmap) FromBase64(str string) (int64, error) { + data, err := base64.StdEncoding.DecodeString(str) + if err != nil { + return 0, err + } + buf := bytes.NewBuffer(data) + + return rb.ReadFrom(buf) +} + +// WriteTo writes a serialized version of this bitmap to stream. +// The format is compatible with other RoaringBitmap +// implementations (Java, C) and is documented here: +// https://github.com/RoaringBitmap/RoaringFormatSpec +func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { + return rb.highlowcontainer.writeTo(stream) +} + +// ToBytes returns an array of bytes corresponding to what is written +// when calling WriteTo +func (rb *Bitmap) ToBytes() ([]byte, error) { + return rb.highlowcontainer.toBytes() +} + +// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized +// version of this bitmap to stream. The format is not +// compatible with the WriteTo() format, and is +// experimental: it may produce smaller on disk +// footprint and/or be faster to read, depending +// on your content. Currently only the Go roaring +// implementation supports this format. +func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) { + return 0, rb.highlowcontainer.writeToMsgpack(stream) +} + +// ReadFrom reads a serialized version of this bitmap from stream. +// The format is compatible with other RoaringBitmap +// implementations (Java, C) and is documented here: +// https://github.com/RoaringBitmap/RoaringFormatSpec +func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { + return rb.highlowcontainer.readFrom(stream) +} + +// FromBuffer creates a bitmap from its serialized version stored in buffer +// +// The format specification is available here: +// https://github.com/RoaringBitmap/RoaringFormatSpec +// +// The provided byte array (buf) is expected to be a constant. +// The function makes the best effort attempt not to copy data. +// You should take care not to modify buff as it will +// likely result in unexpected program behavior. +// +// Resulting bitmaps are effectively immutable in the following sense: +// a copy-on-write marker is used so that when you modify the resulting +// bitmap, copies of selected data (containers) are made. +// You should *not* change the copy-on-write status of the resulting +// bitmaps (SetCopyOnWrite). +// +func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { + return rb.highlowcontainer.fromBuffer(buf) +} + +// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap +func (rb *Bitmap) RunOptimize() { + rb.highlowcontainer.runOptimize() +} + +// HasRunCompression returns true if the bitmap benefits from run compression +func (rb *Bitmap) HasRunCompression() bool { + return rb.highlowcontainer.hasRunCompression() +} + +// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized +// version of this bitmap from stream. The format is +// expected is that written by the WriteToMsgpack() +// call; see additional notes there. +func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) { + return 0, rb.highlowcontainer.readFromMsgpack(stream) +} + +// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap +func (rb *Bitmap) MarshalBinary() ([]byte, error) { + var buf bytes.Buffer + writer := bufio.NewWriter(&buf) + _, err := rb.WriteTo(writer) + if err != nil { + return nil, err + } + err = writer.Flush() + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap +func (rb *Bitmap) UnmarshalBinary(data []byte) error { + var buf bytes.Buffer + _, err := buf.Write(data) + if err != nil { + return err + } + reader := bufio.NewReader(&buf) + _, err = rb.ReadFrom(reader) + return err +} + +// NewBitmap creates a new empty Bitmap (see also New) +func NewBitmap() *Bitmap { + return &Bitmap{} +} + +// New creates a new empty Bitmap (same as NewBitmap) +func New() *Bitmap { + return &Bitmap{} +} + +// Clear resets the Bitmap to be logically empty, but may retain +// some memory allocations that may speed up future operations +func (rb *Bitmap) Clear() { + rb.highlowcontainer.clear() +} + +// ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order +func (rb *Bitmap) ToArray() []uint32 { + array := make([]uint32, rb.GetCardinality()) + pos := 0 + pos2 := 0 + + for pos < rb.highlowcontainer.size() { + hs := uint32(rb.highlowcontainer.getKeyAtIndex(pos)) << 16 + c := rb.highlowcontainer.getContainerAtIndex(pos) + pos++ + c.fillLeastSignificant16bits(array, pos2, hs) + pos2 += c.getCardinality() + } + return array +} + +// GetSizeInBytes estimates the memory usage of the Bitmap. Note that this +// might differ slightly from the amount of bytes required for persistent storage +func (rb *Bitmap) GetSizeInBytes() uint64 { + size := uint64(8) + for _, c := range rb.highlowcontainer.containers { + size += uint64(2) + uint64(c.getSizeInBytes()) + } + return size +} + +// GetSerializedSizeInBytes computes the serialized size in bytes +// of the Bitmap. It should correspond to the +// number of bytes written when invoking WriteTo. You can expect +// that this function is much cheaper computationally than WriteTo. +func (rb *Bitmap) GetSerializedSizeInBytes() uint64 { + return rb.highlowcontainer.serializedSizeInBytes() +} + +// BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes +// assuming that one wants to store "cardinality" integers in [0, universe_size) +func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 { + contnbr := (universeSize + uint64(65535)) / uint64(65536) + if contnbr > cardinality { + contnbr = cardinality + // we can't have more containers than we have values + } + headermax := 8*contnbr + 4 + if 4 > (contnbr+7)/8 { + headermax += 4 + } else { + headermax += (contnbr + 7) / 8 + } + valsarray := uint64(arrayContainerSizeInBytes(int(cardinality))) + valsbitmap := contnbr * uint64(bitmapContainerSizeInBytes()) + valsbest := valsarray + if valsbest > valsbitmap { + valsbest = valsbitmap + } + return valsbest + headermax +} + +// IntIterable allows you to iterate over the values in a Bitmap +type IntIterable interface { + HasNext() bool + Next() uint32 +} + +type intIterator struct { + pos int + hs uint32 + iter shortIterable + highlowcontainer *roaringArray +} + +// HasNext returns true if there are more integers to iterate over +func (ii *intIterator) HasNext() bool { + return ii.pos < ii.highlowcontainer.size() +} + +func (ii *intIterator) init() { + if ii.highlowcontainer.size() > ii.pos { + ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getShortIterator() + ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + } +} + +// Next returns the next integer +func (ii *intIterator) Next() uint32 { + x := uint32(ii.iter.next()) | ii.hs + if !ii.iter.hasNext() { + ii.pos = ii.pos + 1 + ii.init() + } + return x +} + +func newIntIterator(a *Bitmap) *intIterator { + p := new(intIterator) + p.pos = 0 + p.highlowcontainer = &a.highlowcontainer + p.init() + return p +} + +// ManyIntIterable allows you to iterate over the values in a Bitmap +type ManyIntIterable interface { + // pass in a buffer to fill up with values, returns how many values were returned + NextMany([]uint32) int +} + +type manyIntIterator struct { + pos int + hs uint32 + iter manyIterable + highlowcontainer *roaringArray +} + +func (ii *manyIntIterator) init() { + if ii.highlowcontainer.size() > ii.pos { + ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator() + ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 + } else { + ii.iter = nil + } +} + +func (ii *manyIntIterator) NextMany(buf []uint32) int { + n := 0 + for n < len(buf) { + if ii.iter == nil { + break + } + moreN := ii.iter.nextMany(ii.hs, buf[n:]) + n += moreN + if moreN == 0 { + ii.pos = ii.pos + 1 + ii.init() + } + } + + return n +} + +func newManyIntIterator(a *Bitmap) *manyIntIterator { + p := new(manyIntIterator) + p.pos = 0 + p.highlowcontainer = &a.highlowcontainer + p.init() + return p +} + +// String creates a string representation of the Bitmap +func (rb *Bitmap) String() string { + // inspired by https://github.com/fzandona/goroar/ + var buffer bytes.Buffer + start := []byte("{") + buffer.Write(start) + i := rb.Iterator() + counter := 0 + if i.HasNext() { + counter = counter + 1 + buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) + } + for i.HasNext() { + buffer.WriteString(",") + counter = counter + 1 + // to avoid exhausting the memory + if counter > 0x40000 { + buffer.WriteString("...") + break + } + buffer.WriteString(strconv.FormatInt(int64(i.Next()), 10)) + } + buffer.WriteString("}") + return buffer.String() +} + +// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order +func (rb *Bitmap) Iterator() IntIterable { + return newIntIterator(rb) +} + +// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order +func (rb *Bitmap) ManyIterator() ManyIntIterable { + return newManyIntIterator(rb) +} + +// Clone creates a copy of the Bitmap +func (rb *Bitmap) Clone() *Bitmap { + ptr := new(Bitmap) + ptr.highlowcontainer = *rb.highlowcontainer.clone() + return ptr +} + +// Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty +func (rb *Bitmap) Minimum() uint32 { + return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16) +} + +// Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty +func (rb *Bitmap) Maximum() uint32 { + lastindex := len(rb.highlowcontainer.containers) - 1 + return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16) +} + +// Contains returns true if the integer is contained in the bitmap +func (rb *Bitmap) Contains(x uint32) bool { + hb := highbits(x) + c := rb.highlowcontainer.getContainer(hb) + return c != nil && c.contains(lowbits(x)) +} + +// ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called) +func (rb *Bitmap) ContainsInt(x int) bool { + return rb.Contains(uint32(x)) +} + +// Equals returns true if the two bitmaps contain the same integers +func (rb *Bitmap) Equals(o interface{}) bool { + srb, ok := o.(*Bitmap) + if ok { + return srb.highlowcontainer.equals(rb.highlowcontainer) + } + return false +} + +// Add the integer x to the bitmap +func (rb *Bitmap) Add(x uint32) { + hb := highbits(x) + ra := &rb.highlowcontainer + i := ra.getIndex(hb) + if i >= 0 { + var c container + c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) + rb.highlowcontainer.setContainerAtIndex(i, c) + } else { + newac := newArrayContainer() + rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) + } +} + +// add the integer x to the bitmap, return the container and its index +func (rb *Bitmap) addwithptr(x uint32) (int, container) { + hb := highbits(x) + ra := &rb.highlowcontainer + i := ra.getIndex(hb) + var c container + if i >= 0 { + c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x)) + rb.highlowcontainer.setContainerAtIndex(i, c) + return i, c + } + newac := newArrayContainer() + c = newac.iaddReturnMinimized(lowbits(x)) + rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c) + return -i - 1, c +} + +// CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present) +func (rb *Bitmap) CheckedAdd(x uint32) bool { + // TODO: add unit tests for this method + hb := highbits(x) + i := rb.highlowcontainer.getIndex(hb) + if i >= 0 { + C := rb.highlowcontainer.getWritableContainerAtIndex(i) + oldcard := C.getCardinality() + C = C.iaddReturnMinimized(lowbits(x)) + rb.highlowcontainer.setContainerAtIndex(i, C) + return C.getCardinality() > oldcard + } + newac := newArrayContainer() + rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x))) + return true + +} + +// AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add) +func (rb *Bitmap) AddInt(x int) { + rb.Add(uint32(x)) +} + +// Remove the integer x from the bitmap +func (rb *Bitmap) Remove(x uint32) { + hb := highbits(x) + i := rb.highlowcontainer.getIndex(hb) + if i >= 0 { + c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveReturnMinimized(lowbits(x)) + rb.highlowcontainer.setContainerAtIndex(i, c) + if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 { + rb.highlowcontainer.removeAtIndex(i) + } + } +} + +// CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively remove (and false if the integer was not present) +func (rb *Bitmap) CheckedRemove(x uint32) bool { + // TODO: add unit tests for this method + hb := highbits(x) + i := rb.highlowcontainer.getIndex(hb) + if i >= 0 { + C := rb.highlowcontainer.getWritableContainerAtIndex(i) + oldcard := C.getCardinality() + C = C.iremoveReturnMinimized(lowbits(x)) + rb.highlowcontainer.setContainerAtIndex(i, C) + if rb.highlowcontainer.getContainerAtIndex(i).getCardinality() == 0 { + rb.highlowcontainer.removeAtIndex(i) + return true + } + return C.getCardinality() < oldcard + } + return false + +} + +// IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0)) +func (rb *Bitmap) IsEmpty() bool { + return rb.highlowcontainer.size() == 0 +} + +// GetCardinality returns the number of integers contained in the bitmap +func (rb *Bitmap) GetCardinality() uint64 { + size := uint64(0) + for _, c := range rb.highlowcontainer.containers { + size += uint64(c.getCardinality()) + } + return size +} + +// Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality()) +func (rb *Bitmap) Rank(x uint32) uint64 { + size := uint64(0) + for i := 0; i < rb.highlowcontainer.size(); i++ { + key := rb.highlowcontainer.getKeyAtIndex(i) + if key > highbits(x) { + return size + } + if key < highbits(x) { + size += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality()) + } else { + return size + uint64(rb.highlowcontainer.getContainerAtIndex(i).rank(lowbits(x))) + } + } + return size +} + +// Select returns the xth integer in the bitmap +func (rb *Bitmap) Select(x uint32) (uint32, error) { + if rb.GetCardinality() <= uint64(x) { + return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) + } + + remaining := x + for i := 0; i < rb.highlowcontainer.size(); i++ { + c := rb.highlowcontainer.getContainerAtIndex(i) + if remaining >= uint32(c.getCardinality()) { + remaining -= uint32(c.getCardinality()) + } else { + key := rb.highlowcontainer.getKeyAtIndex(i) + return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil + } + } + return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) +} + +// And computes the intersection between two bitmaps and stores the result in the current bitmap +func (rb *Bitmap) And(x2 *Bitmap) { + pos1 := 0 + pos2 := 0 + intersectionsize := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + +main: + for { + if pos1 < length1 && pos2 < length2 { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 == s2 { + c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) + c2 := x2.highlowcontainer.getContainerAtIndex(pos2) + diff := c1.iand(c2) + if diff.getCardinality() > 0 { + rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) + intersectionsize++ + } + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else if s1 < s2 { + pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else { //s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + rb.highlowcontainer.resize(intersectionsize) +} + +// OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified +func (rb *Bitmap) OrCardinality(x2 *Bitmap) uint64 { + pos1 := 0 + pos2 := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + answer := uint64(0) +main: + for { + if (pos1 < length1) && (pos2 < length2) { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + + for { + if s1 < s2 { + answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) + pos1++ + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 > s2 { + answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) + pos2++ + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { + // TODO: could be faster if we did not have to materialize the container + answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)).getCardinality()) + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + for ; pos1 < length1; pos1++ { + answer += uint64(rb.highlowcontainer.getContainerAtIndex(pos1).getCardinality()) + } + for ; pos2 < length2; pos2++ { + answer += uint64(x2.highlowcontainer.getContainerAtIndex(pos2).getCardinality()) + } + return answer +} + +// AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified +func (rb *Bitmap) AndCardinality(x2 *Bitmap) uint64 { + pos1 := 0 + pos2 := 0 + answer := uint64(0) + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + +main: + for { + if pos1 < length1 && pos2 < length2 { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 == s2 { + c1 := rb.highlowcontainer.getContainerAtIndex(pos1) + c2 := x2.highlowcontainer.getContainerAtIndex(pos2) + answer += uint64(c1.andCardinality(c2)) + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else if s1 < s2 { + pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else { //s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + return answer +} + +// Intersects checks whether two bitmap intersects, bitmaps are not modified +func (rb *Bitmap) Intersects(x2 *Bitmap) bool { + pos1 := 0 + pos2 := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + +main: + for { + if pos1 < length1 && pos2 < length2 { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 == s2 { + c1 := rb.highlowcontainer.getContainerAtIndex(pos1) + c2 := x2.highlowcontainer.getContainerAtIndex(pos2) + if c1.intersects(c2) { + return true + } + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else if s1 < s2 { + pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else { //s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + return false +} + +// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap +func (rb *Bitmap) Xor(x2 *Bitmap) { + pos1 := 0 + pos2 := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + for { + if (pos1 < length1) && (pos2 < length2) { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + if s1 < s2 { + pos1 = rb.highlowcontainer.advanceUntil(s2, pos1) + if pos1 == length1 { + break + } + } else if s1 > s2 { + c := x2.highlowcontainer.getWritableContainerAtIndex(pos2) + rb.highlowcontainer.insertNewKeyValueAt(pos1, x2.highlowcontainer.getKeyAtIndex(pos2), c) + length1++ + pos1++ + pos2++ + } else { + // TODO: couple be computed in-place for reduced memory usage + c := rb.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) + if c.getCardinality() > 0 { + rb.highlowcontainer.setContainerAtIndex(pos1, c) + pos1++ + } else { + rb.highlowcontainer.removeAtIndex(pos1) + length1-- + } + pos2++ + } + } else { + break + } + } + if pos1 == length1 { + rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } +} + +// Or computes the union between two bitmaps and stores the result in the current bitmap +func (rb *Bitmap) Or(x2 *Bitmap) { + pos1 := 0 + pos2 := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() +main: + for (pos1 < length1) && (pos2 < length2) { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + + for { + if s1 < s2 { + pos1++ + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 > s2 { + rb.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone()) + pos1++ + length1++ + pos2++ + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { + rb.highlowcontainer.replaceKeyAndContainerAtIndex(pos1, s1, rb.highlowcontainer.getWritableContainerAtIndex(pos1).ior(x2.highlowcontainer.getContainerAtIndex(pos2)), false) + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } + if pos1 == length1 { + rb.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } +} + +/*func (rb *Bitmap) Or(x2 *Bitmap) { + results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage + rb.highlowcontainer = results.highlowcontainer +}*/ + +// AndNot computes the difference between two bitmaps and stores the result in the current bitmap +func (rb *Bitmap) AndNot(x2 *Bitmap) { + pos1 := 0 + pos2 := 0 + intersectionsize := 0 + length1 := rb.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + +main: + for { + if pos1 < length1 && pos2 < length2 { + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 == s2 { + c1 := rb.highlowcontainer.getWritableContainerAtIndex(pos1) + c2 := x2.highlowcontainer.getContainerAtIndex(pos2) + diff := c1.iandNot(c2) + if diff.getCardinality() > 0 { + rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, diff, false) + intersectionsize++ + } + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else if s1 < s2 { + c1 := rb.highlowcontainer.getContainerAtIndex(pos1) + mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) + rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) + intersectionsize++ + pos1++ + if pos1 == length1 { + break main + } + s1 = rb.highlowcontainer.getKeyAtIndex(pos1) + } else { //s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + // TODO:implement as a copy + for pos1 < length1 { + c1 := rb.highlowcontainer.getContainerAtIndex(pos1) + s1 := rb.highlowcontainer.getKeyAtIndex(pos1) + mustCopyOnWrite := rb.highlowcontainer.needsCopyOnWrite(pos1) + rb.highlowcontainer.replaceKeyAndContainerAtIndex(intersectionsize, s1, c1, mustCopyOnWrite) + intersectionsize++ + pos1++ + } + rb.highlowcontainer.resize(intersectionsize) +} + +// Or computes the union between two bitmaps and returns the result +func Or(x1, x2 *Bitmap) *Bitmap { + answer := NewBitmap() + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() +main: + for (pos1 < length1) && (pos2 < length2) { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + + for { + if s1 < s2 { + answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) + pos1++ + if pos1 == length1 { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 > s2 { + answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) + pos2++ + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { + + answer.highlowcontainer.appendContainer(s1, x1.highlowcontainer.getContainerAtIndex(pos1).or(x2.highlowcontainer.getContainerAtIndex(pos2)), false) + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } + if pos1 == length1 { + answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } else if pos2 == length2 { + answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) + } + return answer +} + +// And computes the intersection between two bitmaps and returns the result +func And(x1, x2 *Bitmap) *Bitmap { + answer := NewBitmap() + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() +main: + for pos1 < length1 && pos2 < length2 { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 == s2 { + C := x1.highlowcontainer.getContainerAtIndex(pos1) + C = C.and(x2.highlowcontainer.getContainerAtIndex(pos2)) + + if C.getCardinality() > 0 { + answer.highlowcontainer.appendContainer(s1, C, false) + } + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else if s1 < s2 { + pos1 = x1.highlowcontainer.advanceUntil(s2, pos1) + if pos1 == length1 { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + } else { // s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } + return answer +} + +// Xor computes the symmetric difference between two bitmaps and returns the result +func Xor(x1, x2 *Bitmap) *Bitmap { + answer := NewBitmap() + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + for { + if (pos1 < length1) && (pos2 < length2) { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + if s1 < s2 { + answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) + pos1++ + } else if s1 > s2 { + answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2) + pos2++ + } else { + c := x1.highlowcontainer.getContainerAtIndex(pos1).xor(x2.highlowcontainer.getContainerAtIndex(pos2)) + if c.getCardinality() > 0 { + answer.highlowcontainer.appendContainer(s1, c, false) + } + pos1++ + pos2++ + } + } else { + break + } + } + if pos1 == length1 { + answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2) + } else if pos2 == length2 { + answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) + } + return answer +} + +// AndNot computes the difference between two bitmaps and returns the result +func AndNot(x1, x2 *Bitmap) *Bitmap { + answer := NewBitmap() + pos1 := 0 + pos2 := 0 + length1 := x1.highlowcontainer.size() + length2 := x2.highlowcontainer.size() + +main: + for { + if pos1 < length1 && pos2 < length2 { + s1 := x1.highlowcontainer.getKeyAtIndex(pos1) + s2 := x2.highlowcontainer.getKeyAtIndex(pos2) + for { + if s1 < s2 { + answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1) + pos1++ + if pos1 == length1 { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + } else if s1 == s2 { + c1 := x1.highlowcontainer.getContainerAtIndex(pos1) + c2 := x2.highlowcontainer.getContainerAtIndex(pos2) + diff := c1.andNot(c2) + if diff.getCardinality() > 0 { + answer.highlowcontainer.appendContainer(s1, diff, false) + } + pos1++ + pos2++ + if (pos1 == length1) || (pos2 == length2) { + break main + } + s1 = x1.highlowcontainer.getKeyAtIndex(pos1) + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } else { //s1 > s2 + pos2 = x2.highlowcontainer.advanceUntil(s1, pos2) + if pos2 == length2 { + break main + } + s2 = x2.highlowcontainer.getKeyAtIndex(pos2) + } + } + } else { + break + } + } + if pos2 == length2 { + answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1) + } + return answer +} + +// AddMany add all of the values in dat +func (rb *Bitmap) AddMany(dat []uint32) { + if len(dat) == 0 { + return + } + prev := dat[0] + idx, c := rb.addwithptr(prev) + for _, i := range dat[1:] { + if highbits(prev) == highbits(i) { + c = c.iaddReturnMinimized(lowbits(i)) + rb.highlowcontainer.setContainerAtIndex(idx, c) + } else { + idx, c = rb.addwithptr(i) + } + prev = i + } +} + +// BitmapOf generates a new bitmap filled with the specified integers +func BitmapOf(dat ...uint32) *Bitmap { + ans := NewBitmap() + ans.AddMany(dat) + return ans +} + +// Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, +// and any integer present in the range and not in the bitmap is added. +// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range +// while uint64(0x100000000) cannot be represented as a 32-bit value. +func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { + + if rangeEnd > MaxUint32+1 { + panic("rangeEnd > MaxUint32+1") + } + if rangeStart > MaxUint32+1 { + panic("rangeStart > MaxUint32+1") + } + + if rangeStart >= rangeEnd { + return + } + + hbStart := highbits(uint32(rangeStart)) + lbStart := lowbits(uint32(rangeStart)) + hbLast := highbits(uint32(rangeEnd - 1)) + lbLast := lowbits(uint32(rangeEnd - 1)) + + var max uint32 = maxLowBit + for hb := hbStart; hb <= hbLast; hb++ { + var containerStart uint32 + if hb == hbStart { + containerStart = uint32(lbStart) + } + containerLast := max + if hb == hbLast { + containerLast = uint32(lbLast) + } + + i := rb.highlowcontainer.getIndex(hb) + + if i >= 0 { + c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) + if c.getCardinality() > 0 { + rb.highlowcontainer.setContainerAtIndex(i, c) + } else { + rb.highlowcontainer.removeAtIndex(i) + } + } else { // *think* the range of ones must never be + // empty. + rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) + } + } +} + +// FlipInt calls Flip after casting the parameters (convenience method) +func (rb *Bitmap) FlipInt(rangeStart, rangeEnd int) { + rb.Flip(uint64(rangeStart), uint64(rangeEnd)) +} + +// AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap. +// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range +// while uint64(0x100000000) cannot be represented as a 32-bit value. +func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { + if rangeStart >= rangeEnd { + return + } + if rangeEnd-1 > MaxUint32 { + panic("rangeEnd-1 > MaxUint32") + } + hbStart := uint32(highbits(uint32(rangeStart))) + lbStart := uint32(lowbits(uint32(rangeStart))) + hbLast := uint32(highbits(uint32(rangeEnd - 1))) + lbLast := uint32(lowbits(uint32(rangeEnd - 1))) + + var max uint32 = maxLowBit + for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ { + containerStart := uint32(0) + if hb == uint16(hbStart) { + containerStart = lbStart + } + containerLast := max + if hb == uint16(hbLast) { + containerLast = lbLast + } + + i := rb.highlowcontainer.getIndex(hb) + + if i >= 0 { + c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) + rb.highlowcontainer.setContainerAtIndex(i, c) + } else { // *think* the range of ones must never be + // empty. + rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) + } + } +} + +// RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap. +// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range +// while uint64(0x100000000) cannot be represented as a 32-bit value. +func (rb *Bitmap) RemoveRange(rangeStart, rangeEnd uint64) { + if rangeStart >= rangeEnd { + return + } + if rangeEnd-1 > MaxUint32 { + // logically, we should assume that the user wants to + // remove all values from rangeStart to infinity + // see https://github.com/RoaringBitmap/roaring/issues/141 + rangeEnd = uint64(0x100000000) + } + hbStart := uint32(highbits(uint32(rangeStart))) + lbStart := uint32(lowbits(uint32(rangeStart))) + hbLast := uint32(highbits(uint32(rangeEnd - 1))) + lbLast := uint32(lowbits(uint32(rangeEnd - 1))) + + var max uint32 = maxLowBit + + if hbStart == hbLast { + i := rb.highlowcontainer.getIndex(uint16(hbStart)) + if i < 0 { + return + } + c := rb.highlowcontainer.getWritableContainerAtIndex(i).iremoveRange(int(lbStart), int(lbLast+1)) + if c.getCardinality() > 0 { + rb.highlowcontainer.setContainerAtIndex(i, c) + } else { + rb.highlowcontainer.removeAtIndex(i) + } + return + } + ifirst := rb.highlowcontainer.getIndex(uint16(hbStart)) + ilast := rb.highlowcontainer.getIndex(uint16(hbLast)) + + if ifirst >= 0 { + if lbStart != 0 { + c := rb.highlowcontainer.getWritableContainerAtIndex(ifirst).iremoveRange(int(lbStart), int(max+1)) + if c.getCardinality() > 0 { + rb.highlowcontainer.setContainerAtIndex(ifirst, c) + ifirst++ + } + } + } else { + ifirst = -ifirst - 1 + } + if ilast >= 0 { + if lbLast != max { + c := rb.highlowcontainer.getWritableContainerAtIndex(ilast).iremoveRange(int(0), int(lbLast+1)) + if c.getCardinality() > 0 { + rb.highlowcontainer.setContainerAtIndex(ilast, c) + } else { + ilast++ + } + } else { + ilast++ + } + } else { + ilast = -ilast - 1 + } + rb.highlowcontainer.removeIndexRange(ifirst, ilast) +} + +// Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, +// and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving +// the current bitmap unchanged. +// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range +// while uint64(0x100000000) cannot be represented as a 32-bit value. +func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { + if rangeStart >= rangeEnd { + return bm.Clone() + } + + if rangeStart > MaxUint32 { + panic("rangeStart > MaxUint32") + } + if rangeEnd-1 > MaxUint32 { + panic("rangeEnd-1 > MaxUint32") + } + + answer := NewBitmap() + hbStart := highbits(uint32(rangeStart)) + lbStart := lowbits(uint32(rangeStart)) + hbLast := highbits(uint32(rangeEnd - 1)) + lbLast := lowbits(uint32(rangeEnd - 1)) + + // copy the containers before the active area + answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart) + + var max uint32 = maxLowBit + for hb := hbStart; hb <= hbLast; hb++ { + var containerStart uint32 + if hb == hbStart { + containerStart = uint32(lbStart) + } + containerLast := max + if hb == hbLast { + containerLast = uint32(lbLast) + } + + i := bm.highlowcontainer.getIndex(hb) + j := answer.highlowcontainer.getIndex(hb) + + if i >= 0 { + c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) + if c.getCardinality() > 0 { + answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c) + } + + } else { // *think* the range of ones must never be + // empty. + answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, + rangeOfOnes(int(containerStart), int(containerLast))) + } + } + // copy the containers after the active area. + answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast) + + return answer +} + +// SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious +// if the parameter is true, otherwise we leave the default where hard copies are made +// (copy-on-write requires extra care in a threaded context). +// Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe. +func (rb *Bitmap) SetCopyOnWrite(val bool) { + rb.highlowcontainer.copyOnWrite = val +} + +// GetCopyOnWrite gets this bitmap's copy-on-write property +func (rb *Bitmap) GetCopyOnWrite() (val bool) { + return rb.highlowcontainer.copyOnWrite +} + +// FlipInt calls Flip after casting the parameters (convenience method) +func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { + return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) +} + +// Statistics provides details on the container types in use. +type Statistics struct { + Cardinality uint64 + Containers uint64 + + ArrayContainers uint64 + ArrayContainerBytes uint64 + ArrayContainerValues uint64 + + BitmapContainers uint64 + BitmapContainerBytes uint64 + BitmapContainerValues uint64 + + RunContainers uint64 + RunContainerBytes uint64 + RunContainerValues uint64 +} + +// Stats returns details on container type usage in a Statistics struct. +func (rb *Bitmap) Stats() Statistics { + stats := Statistics{} + stats.Containers = uint64(len(rb.highlowcontainer.containers)) + for _, c := range rb.highlowcontainer.containers { + stats.Cardinality += uint64(c.getCardinality()) + + switch c.(type) { + case *arrayContainer: + stats.ArrayContainers++ + stats.ArrayContainerBytes += uint64(c.getSizeInBytes()) + stats.ArrayContainerValues += uint64(c.getCardinality()) + case *bitmapContainer: + stats.BitmapContainers++ + stats.BitmapContainerBytes += uint64(c.getSizeInBytes()) + stats.BitmapContainerValues += uint64(c.getCardinality()) + case *runContainer16: + stats.RunContainers++ + stats.RunContainerBytes += uint64(c.getSizeInBytes()) + stats.RunContainerValues += uint64(c.getCardinality()) + } + } + return stats +} diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go new file mode 100644 index 0000000000..d9659159d6 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go @@ -0,0 +1,893 @@ +package roaring + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + + snappy "github.com/glycerine/go-unsnap-stream" + "github.com/tinylib/msgp/msgp" +) + +//go:generate msgp -unexported + +type container interface { + clone() container + and(container) container + andCardinality(container) int + iand(container) container // i stands for inplace + andNot(container) container + iandNot(container) container // i stands for inplace + getCardinality() int + // rank returns the number of integers that are + // smaller or equal to x. rank(infinity) would be getCardinality(). + rank(uint16) int + + iadd(x uint16) bool // inplace, returns true if x was new. + iaddReturnMinimized(uint16) container // may change return type to minimize storage. + + //addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused) + iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx) + + iremove(x uint16) bool // inplace, returns true if x was present. + iremoveReturnMinimized(uint16) container // may change return type to minimize storage. + + not(start, final int) container // range is [firstOfRange,lastOfRange) + inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) + xor(r container) container + getShortIterator() shortIterable + getManyIterator() manyIterable + contains(i uint16) bool + maximum() uint16 + minimum() uint16 + + // equals is now logical equals; it does not require the + // same underlying container types, but compares across + // any of the implementations. + equals(r container) bool + + fillLeastSignificant16bits(array []uint32, i int, mask uint32) + or(r container) container + orCardinality(r container) int + isFull() bool + ior(r container) container // i stands for inplace + intersects(r container) bool // whether the two containers intersect + lazyOR(r container) container + lazyIOR(r container) container + getSizeInBytes() int + //removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused) + iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) + selectInt(x uint16) int // selectInt returns the xth integer in the container + serializedSizeInBytes() int + readFrom(io.Reader) (int, error) + writeTo(io.Writer) (int, error) + + numberOfRuns() int + toEfficientContainer() container + String() string + containerType() contype +} + +type contype uint8 + +const ( + bitmapContype contype = iota + arrayContype + run16Contype + run32Contype +) + +// careful: range is [firstOfRange,lastOfRange] +func rangeOfOnes(start, last int) container { + if start > MaxUint16 { + panic("rangeOfOnes called with start > MaxUint16") + } + if last > MaxUint16 { + panic("rangeOfOnes called with last > MaxUint16") + } + if start < 0 { + panic("rangeOfOnes called with start < 0") + } + if last < 0 { + panic("rangeOfOnes called with last < 0") + } + return newRunContainer16Range(uint16(start), uint16(last)) +} + +type roaringArray struct { + keys []uint16 + containers []container `msg:"-"` // don't try to serialize directly. + needCopyOnWrite []bool + copyOnWrite bool + + // conserz is used at serialization time + // to serialize containers. Otherwise empty. + conserz []containerSerz +} + +// containerSerz facilitates serializing container (tricky to +// serialize because it is an interface) by providing a +// light wrapper with a type identifier. +type containerSerz struct { + t contype `msg:"t"` // type + r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type +} + +func newRoaringArray() *roaringArray { + return &roaringArray{} +} + +// runOptimize compresses the element containers to minimize space consumed. +// Q: how does this interact with copyOnWrite and needCopyOnWrite? +// A: since we aren't changing the logical content, just the representation, +// we don't bother to check the needCopyOnWrite bits. We replace +// (possibly all) elements of ra.containers in-place with space +// optimized versions. +func (ra *roaringArray) runOptimize() { + for i := range ra.containers { + ra.containers[i] = ra.containers[i].toEfficientContainer() + } +} + +func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) { + ra.keys = append(ra.keys, key) + ra.containers = append(ra.containers, value) + ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite) +} + +func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) { + mustCopyOnWrite := sa.needCopyOnWrite[startingindex] + ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite) +} + +func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) { + // cow only if the two request it, or if we already have a lightweight copy + copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex) + if !copyonwrite { + // since there is no copy-on-write, we need to clone the container (this is important) + ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite) + } else { + ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite) + if !sa.needsCopyOnWrite(startingindex) { + sa.setNeedsCopyOnWrite(startingindex) + } + } +} + +func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) { + for i := startingindex; i < end; i++ { + ra.appendWithoutCopy(sa, i) + } +} + +func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) { + for i := startingindex; i < end; i++ { + ra.appendCopy(sa, i) + } +} + +func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) { + // cow only if the two request it, or if we already have a lightweight copy + copyonwrite := ra.copyOnWrite && sa.copyOnWrite + + for i := 0; i < sa.size(); i++ { + if sa.keys[i] >= stoppingKey { + break + } + thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i) + if thiscopyonewrite { + ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite) + if !sa.needsCopyOnWrite(i) { + sa.setNeedsCopyOnWrite(i) + } + + } else { + // since there is no copy-on-write, we need to clone the container (this is important) + ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite) + + } + } +} + +func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) { + // cow only if the two request it, or if we already have a lightweight copy + copyonwrite := ra.copyOnWrite && sa.copyOnWrite + + startLocation := sa.getIndex(beforeStart) + if startLocation >= 0 { + startLocation++ + } else { + startLocation = -startLocation - 1 + } + + for i := startLocation; i < sa.size(); i++ { + thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i) + if thiscopyonewrite { + ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite) + if !sa.needsCopyOnWrite(i) { + sa.setNeedsCopyOnWrite(i) + } + } else { + // since there is no copy-on-write, we need to clone the container (this is important) + ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite) + + } + } +} + +func (ra *roaringArray) removeIndexRange(begin, end int) { + if end <= begin { + return + } + + r := end - begin + + copy(ra.keys[begin:], ra.keys[end:]) + copy(ra.containers[begin:], ra.containers[end:]) + copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:]) + + ra.resize(len(ra.keys) - r) +} + +func (ra *roaringArray) resize(newsize int) { + for k := newsize; k < len(ra.containers); k++ { + ra.containers[k] = nil + } + + ra.keys = ra.keys[:newsize] + ra.containers = ra.containers[:newsize] + ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize] +} + +func (ra *roaringArray) clear() { + ra.resize(0) + ra.copyOnWrite = false + ra.conserz = nil +} + +func (ra *roaringArray) clone() *roaringArray { + + sa := roaringArray{} + sa.copyOnWrite = ra.copyOnWrite + + // this is where copyOnWrite is used. + if ra.copyOnWrite { + sa.keys = make([]uint16, len(ra.keys)) + copy(sa.keys, ra.keys) + sa.containers = make([]container, len(ra.containers)) + copy(sa.containers, ra.containers) + sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite)) + + ra.markAllAsNeedingCopyOnWrite() + sa.markAllAsNeedingCopyOnWrite() + + // sa.needCopyOnWrite is shared + } else { + // make a full copy + + sa.keys = make([]uint16, len(ra.keys)) + copy(sa.keys, ra.keys) + + sa.containers = make([]container, len(ra.containers)) + for i := range sa.containers { + sa.containers[i] = ra.containers[i].clone() + } + + sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite)) + } + return &sa +} + +// unused function: +//func (ra *roaringArray) containsKey(x uint16) bool { +// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) +//} + +func (ra *roaringArray) getContainer(x uint16) container { + i := ra.binarySearch(0, int64(len(ra.keys)), x) + if i < 0 { + return nil + } + return ra.containers[i] +} + +func (ra *roaringArray) getContainerAtIndex(i int) container { + return ra.containers[i] +} + +func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container { + c := ra.getContainerAtIndex(i) + switch t := c.(type) { + case *arrayContainer: + c = t.toBitmapContainer() + case *runContainer16: + if !t.isFull() { + c = t.toBitmapContainer() + } + case *bitmapContainer: + if needsWriteable && ra.needCopyOnWrite[i] { + c = ra.containers[i].clone() + } + } + return c +} + +func (ra *roaringArray) getWritableContainerAtIndex(i int) container { + if ra.needCopyOnWrite[i] { + ra.containers[i] = ra.containers[i].clone() + ra.needCopyOnWrite[i] = false + } + return ra.containers[i] +} + +func (ra *roaringArray) getIndex(x uint16) int { + // before the binary search, we optimize for frequent cases + size := len(ra.keys) + if (size == 0) || (ra.keys[size-1] == x) { + return size - 1 + } + return ra.binarySearch(0, int64(size), x) +} + +func (ra *roaringArray) getKeyAtIndex(i int) uint16 { + return ra.keys[i] +} + +func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) { + ra.keys = append(ra.keys, 0) + ra.containers = append(ra.containers, nil) + + copy(ra.keys[i+1:], ra.keys[i:]) + copy(ra.containers[i+1:], ra.containers[i:]) + + ra.keys[i] = key + ra.containers[i] = value + + ra.needCopyOnWrite = append(ra.needCopyOnWrite, false) + copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:]) + ra.needCopyOnWrite[i] = false +} + +func (ra *roaringArray) remove(key uint16) bool { + i := ra.binarySearch(0, int64(len(ra.keys)), key) + if i >= 0 { // if a new key + ra.removeAtIndex(i) + return true + } + return false +} + +func (ra *roaringArray) removeAtIndex(i int) { + copy(ra.keys[i:], ra.keys[i+1:]) + copy(ra.containers[i:], ra.containers[i+1:]) + + copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:]) + + ra.resize(len(ra.keys) - 1) +} + +func (ra *roaringArray) setContainerAtIndex(i int, c container) { + ra.containers[i] = c +} + +func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) { + ra.keys[i] = key + ra.containers[i] = c + ra.needCopyOnWrite[i] = mustCopyOnWrite +} + +func (ra *roaringArray) size() int { + return len(ra.keys) +} + +func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int { + low := begin + high := end - 1 + for low+16 <= high { + middleIndex := low + (high-low)/2 // avoid overflow + middleValue := ra.keys[middleIndex] + + if middleValue < ikey { + low = middleIndex + 1 + } else if middleValue > ikey { + high = middleIndex - 1 + } else { + return int(middleIndex) + } + } + for ; low <= high; low++ { + val := ra.keys[low] + if val >= ikey { + if val == ikey { + return int(low) + } + break + } + } + return -int(low + 1) +} + +func (ra *roaringArray) equals(o interface{}) bool { + srb, ok := o.(roaringArray) + if ok { + + if srb.size() != ra.size() { + return false + } + for i, k := range ra.keys { + if k != srb.keys[i] { + return false + } + } + + for i, c := range ra.containers { + if !c.equals(srb.containers[i]) { + return false + } + } + return true + } + return false +} + +func (ra *roaringArray) headerSize() uint64 { + size := uint64(len(ra.keys)) + if ra.hasRunCompression() { + if size < noOffsetThreshold { // for small bitmaps, we omit the offsets + return 4 + (size+7)/8 + 4*size + } + return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie + } + return 4 + 4 + 8*size + +} + +// should be dirt cheap +func (ra *roaringArray) serializedSizeInBytes() uint64 { + answer := ra.headerSize() + for _, c := range ra.containers { + answer += uint64(c.serializedSizeInBytes()) + } + return answer +} + +// +// spec: https://github.com/RoaringBitmap/RoaringFormatSpec +// +func (ra *roaringArray) toBytes() ([]byte, error) { + stream := &bytes.Buffer{} + hasRun := ra.hasRunCompression() + isRunSizeInBytes := 0 + cookieSize := 8 + if hasRun { + cookieSize = 4 + isRunSizeInBytes = (len(ra.keys) + 7) / 8 + } + descriptiveHeaderSize := 4 * len(ra.keys) + preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize + + buf := make([]byte, preambleSize+4*len(ra.keys)) + + nw := 0 + + if hasRun { + binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie)) + nw += 2 + binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1)) + nw += 2 + + // compute isRun bitmap + var ir []byte + + isRun := newBitmapContainer() + for i, c := range ra.containers { + switch c.(type) { + case *runContainer16: + isRun.iadd(uint16(i)) + } + } + // convert to little endian + ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes] + nw += copy(buf[nw:], ir) + } else { + binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer)) + nw += 4 + binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys))) + nw += 4 + } + + // descriptive header + for i, key := range ra.keys { + binary.LittleEndian.PutUint16(buf[nw:], key) + nw += 2 + c := ra.containers[i] + binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1)) + nw += 2 + } + + startOffset := int64(preambleSize + 4*len(ra.keys)) + if !hasRun || (len(ra.keys) >= noOffsetThreshold) { + // offset header + for _, c := range ra.containers { + binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset)) + nw += 4 + switch rc := c.(type) { + case *runContainer16: + startOffset += 2 + int64(len(rc.iv))*4 + default: + startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality())) + } + } + } + + _, err := stream.Write(buf[:nw]) + if err != nil { + return nil, err + } + for i, c := range ra.containers { + _ = i + _, err := c.writeTo(stream) + if err != nil { + return nil, err + } + } + return stream.Bytes(), nil +} + +// +// spec: https://github.com/RoaringBitmap/RoaringFormatSpec +// +func (ra *roaringArray) writeTo(out io.Writer) (int64, error) { + by, err := ra.toBytes() + if err != nil { + return 0, err + } + n, err := out.Write(by) + if err == nil && n < len(by) { + err = io.ErrShortWrite + } + return int64(n), err +} + +func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { + pos := 0 + if len(buf) < 8 { + return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) + } + + cookie := binary.LittleEndian.Uint32(buf) + pos += 4 + var size uint32 // number of containers + haveRunContainers := false + var isRunBitmap []byte + + // cookie header + if cookie&0x0000FFFF == serialCookie { + haveRunContainers = true + size = uint32(uint16(cookie>>16) + 1) // number of containers + + // create is-run-container bitmap + isRunBitmapSize := (int(size) + 7) / 8 + if pos+isRunBitmapSize > len(buf) { + return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize) + } + + isRunBitmap = buf[pos : pos+isRunBitmapSize] + pos += isRunBitmapSize + } else if cookie == serialCookieNoRunContainer { + size = binary.LittleEndian.Uint32(buf[pos:]) + pos += 4 + } else { + return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") + } + if size > (1 << 16) { + return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") + } + // descriptive header + // keycard - is {key, cardinality} tuple slice + if pos+2*2*int(size) > len(buf) { + return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size)) + } + keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)]) + pos += 2 * 2 * int(size) + + if !haveRunContainers || size >= noOffsetThreshold { + pos += 4 * int(size) + } + + // Allocate slices upfront as number of containers is known + if cap(ra.containers) >= int(size) { + ra.containers = ra.containers[:size] + } else { + ra.containers = make([]container, size) + } + if cap(ra.keys) >= int(size) { + ra.keys = ra.keys[:size] + } else { + ra.keys = make([]uint16, size) + } + if cap(ra.needCopyOnWrite) >= int(size) { + ra.needCopyOnWrite = ra.needCopyOnWrite[:size] + } else { + ra.needCopyOnWrite = make([]bool, size) + } + + for i := uint32(0); i < size; i++ { + key := uint16(keycard[2*i]) + card := int(keycard[2*i+1]) + 1 + ra.keys[i] = key + ra.needCopyOnWrite[i] = true + + if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { + // run container + nr := binary.LittleEndian.Uint16(buf[pos:]) + pos += 2 + if pos+int(nr)*4 > len(buf) { + return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) + } + nb := runContainer16{ + iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), + card: int64(card), + } + pos += int(nr) * 4 + ra.containers[i] = &nb + } else if card > arrayDefaultMaxSize { + // bitmap container + nb := bitmapContainer{ + cardinality: card, + bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), + } + pos += arrayDefaultMaxSize * 2 + ra.containers[i] = &nb + } else { + // array container + nb := arrayContainer{ + byteSliceAsUint16Slice(buf[pos : pos+card*2]), + } + pos += card * 2 + ra.containers[i] = &nb + } + } + + return int64(pos), nil +} + +func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) { + pos := 0 + var cookie uint32 + err := binary.Read(stream, binary.LittleEndian, &cookie) + if err != nil { + return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) + } + pos += 4 + var size uint32 + haveRunContainers := false + var isRun *bitmapContainer + if cookie&0x0000FFFF == serialCookie { + haveRunContainers = true + size = uint32(uint16(cookie>>16) + 1) + bytesToRead := (int(size) + 7) / 8 + numwords := (bytesToRead + 7) / 8 + by := make([]byte, bytesToRead, numwords*8) + nr, err := io.ReadFull(stream, by) + if err != nil { + return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+ + "runContainer bit flags of length %v bytes: %v", bytesToRead, err) + } + pos += bytesToRead + by = by[:cap(by)] + isRun = newBitmapContainer() + for i := 0; i < numwords; i++ { + isRun.bitmap[i] = binary.LittleEndian.Uint64(by) + by = by[8:] + } + } else if cookie == serialCookieNoRunContainer { + err = binary.Read(stream, binary.LittleEndian, &size) + if err != nil { + return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err) + } + pos += 4 + } else { + return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") + } + if size > (1 << 16) { + return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") + } + // descriptive header + keycard := make([]uint16, 2*size, 2*size) + err = binary.Read(stream, binary.LittleEndian, keycard) + if err != nil { + return 0, err + } + pos += 2 * 2 * int(size) + // offset header + if !haveRunContainers || size >= noOffsetThreshold { + io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored + pos += 4 * int(size) + } + for i := uint32(0); i < size; i++ { + key := int(keycard[2*i]) + card := int(keycard[2*i+1]) + 1 + if haveRunContainers && isRun.contains(uint16(i)) { + nb := newRunContainer16() + nr, err := nb.readFrom(stream) + if err != nil { + return 0, err + } + pos += nr + ra.appendContainer(uint16(key), nb, false) + } else if card > arrayDefaultMaxSize { + nb := newBitmapContainer() + nr, err := nb.readFrom(stream) + if err != nil { + return 0, err + } + nb.cardinality = card + pos += nr + ra.appendContainer(keycard[2*i], nb, false) + } else { + nb := newArrayContainerSize(card) + nr, err := nb.readFrom(stream) + if err != nil { + return 0, err + } + pos += nr + ra.appendContainer(keycard[2*i], nb, false) + } + } + return int64(pos), nil +} + +func (ra *roaringArray) hasRunCompression() bool { + for _, c := range ra.containers { + switch c.(type) { + case *runContainer16: + return true + } + } + return false +} + +func (ra *roaringArray) writeToMsgpack(stream io.Writer) error { + + ra.conserz = make([]containerSerz, len(ra.containers)) + for i, v := range ra.containers { + switch cn := v.(type) { + case *bitmapContainer: + bts, err := cn.MarshalMsg(nil) + if err != nil { + return err + } + ra.conserz[i].t = bitmapContype + ra.conserz[i].r = bts + case *arrayContainer: + bts, err := cn.MarshalMsg(nil) + if err != nil { + return err + } + ra.conserz[i].t = arrayContype + ra.conserz[i].r = bts + case *runContainer16: + bts, err := cn.MarshalMsg(nil) + if err != nil { + return err + } + ra.conserz[i].t = run16Contype + ra.conserz[i].r = bts + default: + panic(fmt.Errorf("Unrecognized container implementation: %T", cn)) + } + } + w := snappy.NewWriter(stream) + err := msgp.Encode(w, ra) + ra.conserz = nil + return err +} + +func (ra *roaringArray) readFromMsgpack(stream io.Reader) error { + r := snappy.NewReader(stream) + err := msgp.Decode(r, ra) + if err != nil { + return err + } + + if len(ra.containers) != len(ra.keys) { + ra.containers = make([]container, len(ra.keys)) + } + + for i, v := range ra.conserz { + switch v.t { + case bitmapContype: + c := &bitmapContainer{} + _, err = c.UnmarshalMsg(v.r) + if err != nil { + return err + } + ra.containers[i] = c + case arrayContype: + c := &arrayContainer{} + _, err = c.UnmarshalMsg(v.r) + if err != nil { + return err + } + ra.containers[i] = c + case run16Contype: + c := &runContainer16{} + _, err = c.UnmarshalMsg(v.r) + if err != nil { + return err + } + ra.containers[i] = c + default: + return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t) + } + } + ra.conserz = nil + return nil +} + +func (ra *roaringArray) advanceUntil(min uint16, pos int) int { + lower := pos + 1 + + if lower >= len(ra.keys) || ra.keys[lower] >= min { + return lower + } + + spansize := 1 + + for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min { + spansize *= 2 + } + var upper int + if lower+spansize < len(ra.keys) { + upper = lower + spansize + } else { + upper = len(ra.keys) - 1 + } + + if ra.keys[upper] == min { + return upper + } + + if ra.keys[upper] < min { + // means + // array + // has no + // item + // >= min + // pos = array.length; + return len(ra.keys) + } + + // we know that the next-smallest span was too small + lower += (spansize >> 1) + + mid := 0 + for lower+1 != upper { + mid = (lower + upper) >> 1 + if ra.keys[mid] == min { + return mid + } else if ra.keys[mid] < min { + lower = mid + } else { + upper = mid + } + } + return upper +} + +func (ra *roaringArray) markAllAsNeedingCopyOnWrite() { + for i := range ra.needCopyOnWrite { + ra.needCopyOnWrite[i] = true + } +} + +func (ra *roaringArray) needsCopyOnWrite(i int) bool { + return ra.needCopyOnWrite[i] +} + +func (ra *roaringArray) setNeedsCopyOnWrite(i int) { + ra.needCopyOnWrite[i] = true +} diff --git a/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go b/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go new file mode 100644 index 0000000000..99fb0f6972 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray_gen.go @@ -0,0 +1,529 @@ +package roaring + +// NOTE: THIS FILE WAS PRODUCED BY THE +// MSGP CODE GENERATION TOOL (github.com/tinylib/msgp) +// DO NOT EDIT + +import ( + "github.com/tinylib/msgp/msgp" +) + +// DecodeMsg implements msgp.Decodable +func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zxvk uint32 + zxvk, err = dc.ReadMapHeader() + if err != nil { + return + } + for zxvk > 0 { + zxvk-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "t": + { + var zbzg uint8 + zbzg, err = dc.ReadUint8() + z.t = contype(zbzg) + } + if err != nil { + return + } + case "r": + err = z.r.DecodeMsg(dc) + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "t" + err = en.Append(0x82, 0xa1, 0x74) + if err != nil { + return err + } + err = en.WriteUint8(uint8(z.t)) + if err != nil { + return + } + // write "r" + err = en.Append(0xa1, 0x72) + if err != nil { + return err + } + err = z.r.EncodeMsg(en) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "t" + o = append(o, 0x82, 0xa1, 0x74) + o = msgp.AppendUint8(o, uint8(z.t)) + // string "r" + o = append(o, 0xa1, 0x72) + o, err = z.r.MarshalMsg(o) + if err != nil { + return + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zbai uint32 + zbai, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zbai > 0 { + zbai-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "t": + { + var zcmr uint8 + zcmr, bts, err = msgp.ReadUint8Bytes(bts) + z.t = contype(zcmr) + } + if err != nil { + return + } + case "r": + bts, err = z.r.UnmarshalMsg(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *containerSerz) Msgsize() (s int) { + s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() + return +} + +// DecodeMsg implements msgp.Decodable +func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { + { + var zajw uint8 + zajw, err = dc.ReadUint8() + (*z) = contype(zajw) + } + if err != nil { + return + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z contype) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteUint8(uint8(z)) + if err != nil { + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z contype) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendUint8(o, uint8(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zwht uint8 + zwht, bts, err = msgp.ReadUint8Bytes(bts) + (*z) = contype(zwht) + } + if err != nil { + return + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z contype) Msgsize() (s int) { + s = msgp.Uint8Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zlqf uint32 + zlqf, err = dc.ReadMapHeader() + if err != nil { + return + } + for zlqf > 0 { + zlqf-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "keys": + var zdaf uint32 + zdaf, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.keys) >= int(zdaf) { + z.keys = (z.keys)[:zdaf] + } else { + z.keys = make([]uint16, zdaf) + } + for zhct := range z.keys { + z.keys[zhct], err = dc.ReadUint16() + if err != nil { + return + } + } + case "needCopyOnWrite": + var zpks uint32 + zpks, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.needCopyOnWrite) >= int(zpks) { + z.needCopyOnWrite = (z.needCopyOnWrite)[:zpks] + } else { + z.needCopyOnWrite = make([]bool, zpks) + } + for zcua := range z.needCopyOnWrite { + z.needCopyOnWrite[zcua], err = dc.ReadBool() + if err != nil { + return + } + } + case "copyOnWrite": + z.copyOnWrite, err = dc.ReadBool() + if err != nil { + return + } + case "conserz": + var zjfb uint32 + zjfb, err = dc.ReadArrayHeader() + if err != nil { + return + } + if cap(z.conserz) >= int(zjfb) { + z.conserz = (z.conserz)[:zjfb] + } else { + z.conserz = make([]containerSerz, zjfb) + } + for zxhx := range z.conserz { + var zcxo uint32 + zcxo, err = dc.ReadMapHeader() + if err != nil { + return + } + for zcxo > 0 { + zcxo-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "t": + { + var zeff uint8 + zeff, err = dc.ReadUint8() + z.conserz[zxhx].t = contype(zeff) + } + if err != nil { + return + } + case "r": + err = z.conserz[zxhx].r.DecodeMsg(dc) + if err != nil { + return + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + } + default: + err = dc.Skip() + if err != nil { + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 4 + // write "keys" + err = en.Append(0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.keys))) + if err != nil { + return + } + for zhct := range z.keys { + err = en.WriteUint16(z.keys[zhct]) + if err != nil { + return + } + } + // write "needCopyOnWrite" + err = en.Append(0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.needCopyOnWrite))) + if err != nil { + return + } + for zcua := range z.needCopyOnWrite { + err = en.WriteBool(z.needCopyOnWrite[zcua]) + if err != nil { + return + } + } + // write "copyOnWrite" + err = en.Append(0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) + if err != nil { + return err + } + err = en.WriteBool(z.copyOnWrite) + if err != nil { + return + } + // write "conserz" + err = en.Append(0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a) + if err != nil { + return err + } + err = en.WriteArrayHeader(uint32(len(z.conserz))) + if err != nil { + return + } + for zxhx := range z.conserz { + // map header, size 2 + // write "t" + err = en.Append(0x82, 0xa1, 0x74) + if err != nil { + return err + } + err = en.WriteUint8(uint8(z.conserz[zxhx].t)) + if err != nil { + return + } + // write "r" + err = en.Append(0xa1, 0x72) + if err != nil { + return err + } + err = z.conserz[zxhx].r.EncodeMsg(en) + if err != nil { + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 4 + // string "keys" + o = append(o, 0x84, 0xa4, 0x6b, 0x65, 0x79, 0x73) + o = msgp.AppendArrayHeader(o, uint32(len(z.keys))) + for zhct := range z.keys { + o = msgp.AppendUint16(o, z.keys[zhct]) + } + // string "needCopyOnWrite" + o = append(o, 0xaf, 0x6e, 0x65, 0x65, 0x64, 0x43, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) + o = msgp.AppendArrayHeader(o, uint32(len(z.needCopyOnWrite))) + for zcua := range z.needCopyOnWrite { + o = msgp.AppendBool(o, z.needCopyOnWrite[zcua]) + } + // string "copyOnWrite" + o = append(o, 0xab, 0x63, 0x6f, 0x70, 0x79, 0x4f, 0x6e, 0x57, 0x72, 0x69, 0x74, 0x65) + o = msgp.AppendBool(o, z.copyOnWrite) + // string "conserz" + o = append(o, 0xa7, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x72, 0x7a) + o = msgp.AppendArrayHeader(o, uint32(len(z.conserz))) + for zxhx := range z.conserz { + // map header, size 2 + // string "t" + o = append(o, 0x82, 0xa1, 0x74) + o = msgp.AppendUint8(o, uint8(z.conserz[zxhx].t)) + // string "r" + o = append(o, 0xa1, 0x72) + o, err = z.conserz[zxhx].r.MarshalMsg(o) + if err != nil { + return + } + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zrsw uint32 + zrsw, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zrsw > 0 { + zrsw-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "keys": + var zxpk uint32 + zxpk, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.keys) >= int(zxpk) { + z.keys = (z.keys)[:zxpk] + } else { + z.keys = make([]uint16, zxpk) + } + for zhct := range z.keys { + z.keys[zhct], bts, err = msgp.ReadUint16Bytes(bts) + if err != nil { + return + } + } + case "needCopyOnWrite": + var zdnj uint32 + zdnj, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.needCopyOnWrite) >= int(zdnj) { + z.needCopyOnWrite = (z.needCopyOnWrite)[:zdnj] + } else { + z.needCopyOnWrite = make([]bool, zdnj) + } + for zcua := range z.needCopyOnWrite { + z.needCopyOnWrite[zcua], bts, err = msgp.ReadBoolBytes(bts) + if err != nil { + return + } + } + case "copyOnWrite": + z.copyOnWrite, bts, err = msgp.ReadBoolBytes(bts) + if err != nil { + return + } + case "conserz": + var zobc uint32 + zobc, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + return + } + if cap(z.conserz) >= int(zobc) { + z.conserz = (z.conserz)[:zobc] + } else { + z.conserz = make([]containerSerz, zobc) + } + for zxhx := range z.conserz { + var zsnv uint32 + zsnv, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + return + } + for zsnv > 0 { + zsnv-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + return + } + switch msgp.UnsafeString(field) { + case "t": + { + var zkgt uint8 + zkgt, bts, err = msgp.ReadUint8Bytes(bts) + z.conserz[zxhx].t = contype(zkgt) + } + if err != nil { + return + } + case "r": + bts, err = z.conserz[zxhx].r.UnmarshalMsg(bts) + if err != nil { + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *roaringArray) Msgsize() (s int) { + s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize + for zxhx := range z.conserz { + s += 1 + 2 + msgp.Uint8Size + 2 + z.conserz[zxhx].r.Msgsize() + } + return +} diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization.go b/vendor/github.com/RoaringBitmap/roaring/serialization.go new file mode 100644 index 0000000000..59c39a6630 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/serialization.go @@ -0,0 +1,83 @@ +package roaring + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/tinylib/msgp/msgp" +) + +// writeTo for runContainer16 follows this +// spec: https://github.com/RoaringBitmap/RoaringFormatSpec +// +func (b *runContainer16) writeTo(stream io.Writer) (int, error) { + buf := make([]byte, 2+4*len(b.iv)) + binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv))) + for i, v := range b.iv { + binary.LittleEndian.PutUint16(buf[2+i*4:], v.start) + binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length) + } + return stream.Write(buf) +} + +func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) { + bts, err := b.MarshalMsg(nil) + if err != nil { + return 0, err + } + return stream.Write(bts) +} + +func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { + bts, err := b.MarshalMsg(nil) + if err != nil { + return 0, err + } + return stream.Write(bts) +} + +func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) { + err := msgp.Decode(stream, b) + return 0, err +} + +func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { + err := msgp.Decode(stream, b) + return 0, err +} + +var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected") + +func (b *runContainer16) readFrom(stream io.Reader) (int, error) { + b.iv = b.iv[:0] + b.card = 0 + var numRuns uint16 + err := binary.Read(stream, binary.LittleEndian, &numRuns) + if err != nil { + return 0, err + } + nr := int(numRuns) + encRun := make([]uint16, 2*nr) + by := make([]byte, 4*nr) + err = binary.Read(stream, binary.LittleEndian, &by) + if err != nil { + return 0, err + } + for i := range encRun { + if len(by) < 2 { + return 0, errCorruptedStream + } + encRun[i] = binary.LittleEndian.Uint16(by) + by = by[2:] + } + for i := 0; i < nr; i++ { + if i > 0 && b.iv[i-1].last() >= encRun[i*2] { + return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2]) + } + b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]}) + b.card += int64(encRun[i*2+1]) + 1 + } + return 0, err +} diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go new file mode 100644 index 0000000000..7fcef7691b --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go @@ -0,0 +1,118 @@ +// +build !amd64,!386 appengine + +package roaring + +import ( + "encoding/binary" + "io" +) + +func (b *arrayContainer) writeTo(stream io.Writer) (int, error) { + buf := make([]byte, 2*len(b.content)) + for i, v := range b.content { + base := i * 2 + buf[base] = byte(v) + buf[base+1] = byte(v >> 8) + } + return stream.Write(buf) +} + +func (b *arrayContainer) readFrom(stream io.Reader) (int, error) { + err := binary.Read(stream, binary.LittleEndian, b.content) + if err != nil { + return 0, err + } + return 2 * len(b.content), nil +} + +func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { + // Write set + buf := make([]byte, 8*len(b.bitmap)) + for i, v := range b.bitmap { + base := i * 8 + buf[base] = byte(v) + buf[base+1] = byte(v >> 8) + buf[base+2] = byte(v >> 16) + buf[base+3] = byte(v >> 24) + buf[base+4] = byte(v >> 32) + buf[base+5] = byte(v >> 40) + buf[base+6] = byte(v >> 48) + buf[base+7] = byte(v >> 56) + } + return stream.Write(buf) +} + +func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) { + err := binary.Read(stream, binary.LittleEndian, b.bitmap) + if err != nil { + return 0, err + } + b.computeCardinality() + return 8 * len(b.bitmap), nil +} + +func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { + by := make([]byte, len(bc.bitmap)*8) + for i := range bc.bitmap { + binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i]) + } + return by +} + +func uint64SliceAsByteSlice(slice []uint64) []byte { + by := make([]byte, len(slice)*8) + + for i, v := range slice { + binary.LittleEndian.PutUint64(by[i*8:], v) + } + + return by +} + +func byteSliceAsUint16Slice(slice []byte) []uint16 { + if len(slice)%2 != 0 { + panic("Slice size should be divisible by 2") + } + + b := make([]uint16, len(slice)/2) + + for i := range b { + b[i] = binary.LittleEndian.Uint16(slice[2*i:]) + } + + return b +} + +func byteSliceAsUint64Slice(slice []byte) []uint64 { + if len(slice)%8 != 0 { + panic("Slice size should be divisible by 8") + } + + b := make([]uint64, len(slice)/8) + + for i := range b { + b[i] = binary.LittleEndian.Uint64(slice[8*i:]) + } + + return b +} + +// Converts a byte slice to a interval16 slice. +// The function assumes that the slice byte buffer is run container data +// encoded according to Roaring Format Spec +func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 { + if len(byteSlice)%4 != 0 { + panic("Slice size should be divisible by 4") + } + + intervalSlice := make([]interval16, len(byteSlice)/4) + + for i := range intervalSlice { + intervalSlice[i] = interval16{ + start: binary.LittleEndian.Uint16(byteSlice[i*4:]), + length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]), + } + } + + return intervalSlice +} diff --git a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go new file mode 100644 index 0000000000..c1d3ad3046 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go @@ -0,0 +1,113 @@ +// +build 386 amd64,!appengine + +package roaring + +import ( + "io" + "reflect" + "unsafe" +) + +func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) { + buf := uint16SliceAsByteSlice(ac.content) + return stream.Write(buf) +} + +func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { + buf := uint64SliceAsByteSlice(bc.bitmap) + return stream.Write(buf) +} + +// readFrom reads an arrayContainer from stream. +// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content) +// *before* you call readFrom. We can't guess the size in the stream +// by this point. +func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) { + buf := uint16SliceAsByteSlice(ac.content) + return io.ReadFull(stream, buf) +} + +func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) { + buf := uint64SliceAsByteSlice(bc.bitmap) + n, err := io.ReadFull(stream, buf) + bc.computeCardinality() + return n, err +} + +func uint64SliceAsByteSlice(slice []uint64) []byte { + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len *= 8 + header.Cap *= 8 + + // return it + return *(*[]byte)(unsafe.Pointer(&header)) +} + +func uint16SliceAsByteSlice(slice []uint16) []byte { + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len *= 2 + header.Cap *= 2 + + // return it + return *(*[]byte)(unsafe.Pointer(&header)) +} + +func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { + return uint64SliceAsByteSlice(bc.bitmap) +} + +// Deserialization code follows + +func byteSliceAsUint16Slice(slice []byte) []uint16 { + if len(slice)%2 != 0 { + panic("Slice size should be divisible by 2") + } + + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len /= 2 + header.Cap /= 2 + + // return it + return *(*[]uint16)(unsafe.Pointer(&header)) +} + +func byteSliceAsUint64Slice(slice []byte) []uint64 { + if len(slice)%8 != 0 { + panic("Slice size should be divisible by 8") + } + + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len /= 8 + header.Cap /= 8 + + // return it + return *(*[]uint64)(unsafe.Pointer(&header)) +} + +func byteSliceAsInterval16Slice(slice []byte) []interval16 { + if len(slice)%4 != 0 { + panic("Slice size should be divisible by 4") + } + + // make a new slice header + header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) + + // update its capacity and length + header.Len /= 4 + header.Cap /= 4 + + // return it + return *(*[]interval16)(unsafe.Pointer(&header)) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go b/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go new file mode 100644 index 0000000000..5eaa22202c --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go @@ -0,0 +1,21 @@ +// +build gofuzz + +package roaring + +import "bytes" + +func FuzzSerializationStream(data []byte) int { + newrb := NewBitmap() + if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil { + return 0 + } + return 1 +} + +func FuzzSerializationBuffer(data []byte) int { + newrb := NewBitmap() + if _, err := newrb.FromBuffer(data); err != nil { + return 0 + } + return 1 +} diff --git a/vendor/github.com/RoaringBitmap/roaring/setutil.go b/vendor/github.com/RoaringBitmap/roaring/setutil.go new file mode 100644 index 0000000000..3e8c01dd1f --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/setutil.go @@ -0,0 +1,609 @@ +package roaring + +func equal(a, b []uint16) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func difference(set1 []uint16, set2 []uint16, buffer []uint16) int { + if 0 == len(set2) { + for k := 0; k < len(set1); k++ { + buffer[k] = set1[k] + } + return len(set1) + } + if 0 == len(set1) { + return 0 + } + pos := 0 + k1 := 0 + k2 := 0 + buffer = buffer[:cap(buffer)] + s1 := set1[k1] + s2 := set2[k2] + for { + if s1 < s2 { + buffer[pos] = s1 + pos++ + k1++ + if k1 >= len(set1) { + break + } + s1 = set1[k1] + } else if s1 == s2 { + k1++ + k2++ + if k1 >= len(set1) { + break + } + s1 = set1[k1] + if k2 >= len(set2) { + for ; k1 < len(set1); k1++ { + buffer[pos] = set1[k1] + pos++ + } + break + } + s2 = set2[k2] + } else { // if (val1>val2) + k2++ + if k2 >= len(set2) { + for ; k1 < len(set1); k1++ { + buffer[pos] = set1[k1] + pos++ + } + break + } + s2 = set2[k2] + } + } + return pos + +} + +func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { + if 0 == len(set2) { + buffer = buffer[:len(set1)] + copy(buffer, set1[:]) + return len(set1) + } + if 0 == len(set1) { + buffer = buffer[:len(set2)] + copy(buffer, set2[:]) + return len(set2) + } + pos := 0 + k1 := 0 + k2 := 0 + s1 := set1[k1] + s2 := set2[k2] + buffer = buffer[:cap(buffer)] + for { + if s1 < s2 { + buffer[pos] = s1 + pos++ + k1++ + if k1 >= len(set1) { + for ; k2 < len(set2); k2++ { + buffer[pos] = set2[k2] + pos++ + } + break + } + s1 = set1[k1] + } else if s1 == s2 { + k1++ + k2++ + if k1 >= len(set1) { + for ; k2 < len(set2); k2++ { + buffer[pos] = set2[k2] + pos++ + } + break + } + if k2 >= len(set2) { + for ; k1 < len(set1); k1++ { + buffer[pos] = set1[k1] + pos++ + } + break + } + s1 = set1[k1] + s2 = set2[k2] + } else { // if (val1>val2) + buffer[pos] = s2 + pos++ + k2++ + if k2 >= len(set2) { + for ; k1 < len(set1); k1++ { + buffer[pos] = set1[k1] + pos++ + } + break + } + s2 = set2[k2] + } + } + return pos +} + +func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int { + pos := 0 + k1 := 0 + k2 := 0 + if 0 == len(set2) { + buffer = buffer[:len(set1)] + copy(buffer, set1[:]) + return len(set1) + } + if 0 == len(set1) { + buffer = buffer[:len(set2)] + copy(buffer, set2[:]) + return len(set2) + } + s1 := set1[k1] + s2 := set2[k2] + buffer = buffer[:cap(buffer)] + for { + if s1 < s2 { + buffer[pos] = s1 + pos++ + k1++ + if k1 >= len(set1) { + copy(buffer[pos:], set2[k2:]) + pos += len(set2) - k2 + break + } + s1 = set1[k1] + } else if s1 == s2 { + buffer[pos] = s1 + pos++ + k1++ + k2++ + if k1 >= len(set1) { + copy(buffer[pos:], set2[k2:]) + pos += len(set2) - k2 + break + } + if k2 >= len(set2) { + copy(buffer[pos:], set1[k1:]) + pos += len(set1) - k1 + break + } + s1 = set1[k1] + s2 = set2[k2] + } else { // if (set1[k1]>set2[k2]) + buffer[pos] = s2 + pos++ + k2++ + if k2 >= len(set2) { + copy(buffer[pos:], set1[k1:]) + pos += len(set1) - k1 + break + } + s2 = set2[k2] + } + } + return pos +} + +func union2by2Cardinality(set1 []uint16, set2 []uint16) int { + pos := 0 + k1 := 0 + k2 := 0 + if 0 == len(set2) { + return len(set1) + } + if 0 == len(set1) { + return len(set2) + } + s1 := set1[k1] + s2 := set2[k2] + for { + if s1 < s2 { + pos++ + k1++ + if k1 >= len(set1) { + pos += len(set2) - k2 + break + } + s1 = set1[k1] + } else if s1 == s2 { + pos++ + k1++ + k2++ + if k1 >= len(set1) { + pos += len(set2) - k2 + break + } + if k2 >= len(set2) { + pos += len(set1) - k1 + break + } + s1 = set1[k1] + s2 = set2[k2] + } else { // if (set1[k1]>set2[k2]) + pos++ + k2++ + if k2 >= len(set2) { + pos += len(set1) - k1 + break + } + s2 = set2[k2] + } + } + return pos +} + +func intersection2by2( + set1 []uint16, + set2 []uint16, + buffer []uint16) int { + + if len(set1)*64 < len(set2) { + return onesidedgallopingintersect2by2(set1, set2, buffer) + } else if len(set2)*64 < len(set1) { + return onesidedgallopingintersect2by2(set2, set1, buffer) + } else { + return localintersect2by2(set1, set2, buffer) + } +} + +func intersection2by2Cardinality( + set1 []uint16, + set2 []uint16) int { + + if len(set1)*64 < len(set2) { + return onesidedgallopingintersect2by2Cardinality(set1, set2) + } else if len(set2)*64 < len(set1) { + return onesidedgallopingintersect2by2Cardinality(set2, set1) + } else { + return localintersect2by2Cardinality(set1, set2) + } +} + +func intersects2by2( + set1 []uint16, + set2 []uint16) bool { + // could be optimized if one set is much larger than the other one + if (0 == len(set1)) || (0 == len(set2)) { + return false + } + k1 := 0 + k2 := 0 + s1 := set1[k1] + s2 := set2[k2] +mainwhile: + for { + + if s2 < s1 { + for { + k2++ + if k2 == len(set2) { + break mainwhile + } + s2 = set2[k2] + if s2 >= s1 { + break + } + } + } + if s1 < s2 { + for { + k1++ + if k1 == len(set1) { + break mainwhile + } + s1 = set1[k1] + if s1 >= s2 { + break + } + } + + } else { + // (set2[k2] == set1[k1]) + return true + } + } + return false +} + +func localintersect2by2( + set1 []uint16, + set2 []uint16, + buffer []uint16) int { + + if (0 == len(set1)) || (0 == len(set2)) { + return 0 + } + k1 := 0 + k2 := 0 + pos := 0 + buffer = buffer[:cap(buffer)] + s1 := set1[k1] + s2 := set2[k2] +mainwhile: + for { + if s2 < s1 { + for { + k2++ + if k2 == len(set2) { + break mainwhile + } + s2 = set2[k2] + if s2 >= s1 { + break + } + } + } + if s1 < s2 { + for { + k1++ + if k1 == len(set1) { + break mainwhile + } + s1 = set1[k1] + if s1 >= s2 { + break + } + } + + } else { + // (set2[k2] == set1[k1]) + buffer[pos] = s1 + pos++ + k1++ + if k1 == len(set1) { + break + } + s1 = set1[k1] + k2++ + if k2 == len(set2) { + break + } + s2 = set2[k2] + } + } + return pos +} + +func localintersect2by2Cardinality( + set1 []uint16, + set2 []uint16) int { + + if (0 == len(set1)) || (0 == len(set2)) { + return 0 + } + k1 := 0 + k2 := 0 + pos := 0 + s1 := set1[k1] + s2 := set2[k2] +mainwhile: + for { + if s2 < s1 { + for { + k2++ + if k2 == len(set2) { + break mainwhile + } + s2 = set2[k2] + if s2 >= s1 { + break + } + } + } + if s1 < s2 { + for { + k1++ + if k1 == len(set1) { + break mainwhile + } + s1 = set1[k1] + if s1 >= s2 { + break + } + } + + } else { + // (set2[k2] == set1[k1]) + pos++ + k1++ + if k1 == len(set1) { + break + } + s1 = set1[k1] + k2++ + if k2 == len(set2) { + break + } + s2 = set2[k2] + } + } + return pos +} + +func advanceUntil( + array []uint16, + pos int, + length int, + min uint16) int { + lower := pos + 1 + + if lower >= length || array[lower] >= min { + return lower + } + + spansize := 1 + + for lower+spansize < length && array[lower+spansize] < min { + spansize *= 2 + } + var upper int + if lower+spansize < length { + upper = lower + spansize + } else { + upper = length - 1 + } + + if array[upper] == min { + return upper + } + + if array[upper] < min { + // means + // array + // has no + // item + // >= min + // pos = array.length; + return length + } + + // we know that the next-smallest span was too small + lower += (spansize >> 1) + + mid := 0 + for lower+1 != upper { + mid = (lower + upper) >> 1 + if array[mid] == min { + return mid + } else if array[mid] < min { + lower = mid + } else { + upper = mid + } + } + return upper + +} + +func onesidedgallopingintersect2by2( + smallset []uint16, + largeset []uint16, + buffer []uint16) int { + + if 0 == len(smallset) { + return 0 + } + buffer = buffer[:cap(buffer)] + k1 := 0 + k2 := 0 + pos := 0 + s1 := largeset[k1] + s2 := smallset[k2] +mainwhile: + + for { + if s1 < s2 { + k1 = advanceUntil(largeset, k1, len(largeset), s2) + if k1 == len(largeset) { + break mainwhile + } + s1 = largeset[k1] + } + if s2 < s1 { + k2++ + if k2 == len(smallset) { + break mainwhile + } + s2 = smallset[k2] + } else { + + buffer[pos] = s2 + pos++ + k2++ + if k2 == len(smallset) { + break + } + s2 = smallset[k2] + k1 = advanceUntil(largeset, k1, len(largeset), s2) + if k1 == len(largeset) { + break mainwhile + } + s1 = largeset[k1] + } + + } + return pos +} + +func onesidedgallopingintersect2by2Cardinality( + smallset []uint16, + largeset []uint16) int { + + if 0 == len(smallset) { + return 0 + } + k1 := 0 + k2 := 0 + pos := 0 + s1 := largeset[k1] + s2 := smallset[k2] +mainwhile: + + for { + if s1 < s2 { + k1 = advanceUntil(largeset, k1, len(largeset), s2) + if k1 == len(largeset) { + break mainwhile + } + s1 = largeset[k1] + } + if s2 < s1 { + k2++ + if k2 == len(smallset) { + break mainwhile + } + s2 = smallset[k2] + } else { + + pos++ + k2++ + if k2 == len(smallset) { + break + } + s2 = smallset[k2] + k1 = advanceUntil(largeset, k1, len(largeset), s2) + if k1 == len(largeset) { + break mainwhile + } + s1 = largeset[k1] + } + + } + return pos +} + +func binarySearch(array []uint16, ikey uint16) int { + low := 0 + high := len(array) - 1 + for low+16 <= high { + middleIndex := int(uint32(low+high) >> 1) + middleValue := array[middleIndex] + if middleValue < ikey { + low = middleIndex + 1 + } else if middleValue > ikey { + high = middleIndex - 1 + } else { + return middleIndex + } + } + for ; low <= high; low++ { + val := array[low] + if val >= ikey { + if val == ikey { + return low + } + break + } + } + return -(low + 1) +} diff --git a/vendor/github.com/RoaringBitmap/roaring/shortiterator.go b/vendor/github.com/RoaringBitmap/roaring/shortiterator.go new file mode 100644 index 0000000000..ef0acbd1ca --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/shortiterator.go @@ -0,0 +1,21 @@ +package roaring + +type shortIterable interface { + hasNext() bool + next() uint16 +} + +type shortIterator struct { + slice []uint16 + loc int +} + +func (si *shortIterator) hasNext() bool { + return si.loc < len(si.slice) +} + +func (si *shortIterator) next() uint16 { + a := si.slice[si.loc] + si.loc++ + return a +} diff --git a/vendor/github.com/RoaringBitmap/roaring/smat.go b/vendor/github.com/RoaringBitmap/roaring/smat.go new file mode 100644 index 0000000000..9da4756349 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/smat.go @@ -0,0 +1,383 @@ +// +build gofuzz + +/* +# Instructions for smat testing for roaring + +[smat](https://github.com/mschoch/smat) is a framework that provides +state machine assisted fuzz testing. + +To run the smat tests for roaring... + +## Prerequisites + + $ go get github.com/dvyukov/go-fuzz/go-fuzz + $ go get github.com/dvyukov/go-fuzz/go-fuzz-build + +## Steps + +1. Generate initial smat corpus: +``` + go test -tags=gofuzz -run=TestGenerateSmatCorpus +``` + +2. Build go-fuzz test program with instrumentation: +``` + go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring +``` + +3. Run go-fuzz: +``` + go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 +``` + +You should see output like... +``` +2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s +2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s +2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s +2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s +2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s +2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s +2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s +2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s +2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s +2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s +2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s +2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s +2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s +2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s +2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s +2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s + +``` + +Let it run, and if the # of crashers is > 0, check out the reports in +the workdir where you should be able to find the panic goroutine stack +traces. +*/ + +package roaring + +import ( + "fmt" + "sort" + + "github.com/mschoch/smat" + "github.com/willf/bitset" +) + +// fuzz test using state machine driven by byte stream. +func FuzzSmat(data []byte) int { + return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'), + smatActionMap, data) +} + +var smatDebug = false + +func smatLog(prefix, format string, args ...interface{}) { + if smatDebug { + fmt.Print(prefix) + fmt.Printf(format, args...) + } +} + +type smatContext struct { + pairs []*smatPair + + // Two registers, x & y. + x int + y int + + actions int +} + +type smatPair struct { + bm *Bitmap + bs *bitset.BitSet +} + +// ------------------------------------------------------------------ + +var smatActionMap = smat.ActionMap{ + smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })), + smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })), + smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })), + smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })), + smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })), + smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })), + + smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })), + + smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)), + smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)), + + smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)), + smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)), + + smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)), + smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)), + + smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)), + + smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)), + smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)), + + smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)), + smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)), + + smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)), + + smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)), + + smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)), + + smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)), +} + +var smatRunningPercentActions []smat.PercentAction + +func init() { + var ids []int + for actionId := range smatActionMap { + ids = append(ids, int(actionId)) + } + sort.Ints(ids) + + pct := 100 / len(smatActionMap) + for _, actionId := range ids { + smatRunningPercentActions = append(smatRunningPercentActions, + smat.PercentAction{pct, smat.ActionID(actionId)}) + } + + smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc) + smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc) +} + +// We only have one smat state: running. +func smatRunning(next byte) smat.ActionID { + return smat.PercentExecute(next, smatRunningPercentActions...) +} + +func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) { + return func(ctx smat.Context) (smat.State, error) { + c := ctx.(*smatContext) + c.actions++ + + smatLog(" ", "%s\n", name) + + return f(ctx) + } +} + +// Creates an smat action func based on a simple callback. +func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) { + return func(ctx smat.Context) (next smat.State, err error) { + c := ctx.(*smatContext) + cb(c) + return smatRunning, nil + } +} + +// Invokes a callback function with the input v bounded to len(c.pairs). +func (c *smatContext) withPair(v int, cb func(*smatPair)) { + if len(c.pairs) > 0 { + if v < 0 { + v = -v + } + v = v % len(c.pairs) + cb(c.pairs[v]) + } +} + +// ------------------------------------------------------------------ + +func smatSetupFunc(ctx smat.Context) (next smat.State, err error) { + return smatRunning, nil +} + +func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) { + return nil, err +} + +// ------------------------------------------------------------------ + +func smatPushPair(c *smatContext) { + c.pairs = append(c.pairs, &smatPair{ + bm: NewBitmap(), + bs: bitset.New(100), + }) +} + +func smatPopPair(c *smatContext) { + if len(c.pairs) > 0 { + c.pairs = c.pairs[0 : len(c.pairs)-1] + } +} + +func smatSetBit(c *smatContext) { + c.withPair(c.x, func(p *smatPair) { + y := uint32(c.y) + p.bm.AddInt(int(y)) + p.bs.Set(uint(y)) + p.checkEquals() + }) +} + +func smatRemoveBit(c *smatContext) { + c.withPair(c.x, func(p *smatPair) { + y := uint32(c.y) + p.bm.Remove(y) + p.bs.Clear(uint(y)) + p.checkEquals() + }) +} + +func smatAnd(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + px.bm.And(py.bm) + px.bs = px.bs.Intersection(py.bs) + px.checkEquals() + py.checkEquals() + }) + }) +} + +func smatOr(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + px.bm.Or(py.bm) + px.bs = px.bs.Union(py.bs) + px.checkEquals() + py.checkEquals() + }) + }) +} + +func smatAndCardinality(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + c0 := px.bm.AndCardinality(py.bm) + c1 := px.bs.IntersectionCardinality(py.bs) + if c0 != uint64(c1) { + panic("expected same add cardinality") + } + px.checkEquals() + py.checkEquals() + }) + }) +} + +func smatOrCardinality(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + c0 := px.bm.OrCardinality(py.bm) + c1 := px.bs.UnionCardinality(py.bs) + if c0 != uint64(c1) { + panic("expected same or cardinality") + } + px.checkEquals() + py.checkEquals() + }) + }) +} + +func smatRunOptimize(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + px.bm.RunOptimize() + px.checkEquals() + }) +} + +func smatClear(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + px.bm.Clear() + px.bs = px.bs.ClearAll() + px.checkEquals() + }) +} + +func smatCardinality(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c0 := px.bm.GetCardinality() + c1 := px.bs.Count() + if c0 != uint64(c1) { + panic("expected same cardinality") + } + }) +} + +func smatIsEmpty(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c0 := px.bm.IsEmpty() + c1 := px.bs.None() + if c0 != c1 { + panic("expected same is empty") + } + }) +} + +func smatIntersects(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + v0 := px.bm.Intersects(py.bm) + v1 := px.bs.IntersectionCardinality(py.bs) > 0 + if v0 != v1 { + panic("intersects not equal") + } + + px.checkEquals() + py.checkEquals() + }) + }) +} + +func smatFlip(c *smatContext) { + c.withPair(c.x, func(p *smatPair) { + y := uint32(c.y) + p.bm.Flip(uint64(y), uint64(y)+1) + p.bs = p.bs.Flip(uint(y)) + p.checkEquals() + }) +} + +func smatDifference(c *smatContext) { + c.withPair(c.x, func(px *smatPair) { + c.withPair(c.y, func(py *smatPair) { + px.bm.AndNot(py.bm) + px.bs = px.bs.Difference(py.bs) + px.checkEquals() + py.checkEquals() + }) + }) +} + +func (p *smatPair) checkEquals() { + if !p.equalsBitSet(p.bs, p.bm) { + panic("bitset mismatch") + } +} + +func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool { + for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) { + if !b.ContainsInt(int(i)) { + fmt.Printf("in a bitset, not b bitmap, i: %d\n", i) + fmt.Printf(" a bitset: %s\n b bitmap: %s\n", + a.String(), b.String()) + return false + } + } + + i := b.Iterator() + for i.HasNext() { + v := i.Next() + if !a.Test(uint(v)) { + fmt.Printf("in b bitmap, not a bitset, v: %d\n", v) + fmt.Printf(" a bitset: %s\n b bitmap: %s\n", + a.String(), b.String()) + return false + } + } + + return true +} diff --git a/vendor/github.com/RoaringBitmap/roaring/util.go b/vendor/github.com/RoaringBitmap/roaring/util.go new file mode 100644 index 0000000000..d212660d58 --- /dev/null +++ b/vendor/github.com/RoaringBitmap/roaring/util.go @@ -0,0 +1,315 @@ +package roaring + +import ( + "math/rand" + "sort" +) + +const ( + arrayDefaultMaxSize = 4096 // containers with 4096 or fewer integers should be array containers. + arrayLazyLowerBound = 1024 + maxCapacity = 1 << 16 + serialCookieNoRunContainer = 12346 // only arrays and bitmaps + invalidCardinality = -1 + serialCookie = 12347 // runs, arrays, and bitmaps + noOffsetThreshold = 4 + + // Compute wordSizeInBytes, the size of a word in bytes. + _m = ^uint64(0) + _logS = _m>>8&1 + _m>>16&1 + _m>>32&1 + wordSizeInBytes = 1 << _logS + + // other constants used in ctz_generic.go + wordSizeInBits = wordSizeInBytes << 3 // word size in bits +) + +const maxWord = 1<<wordSizeInBits - 1 + +// doesn't apply to runContainers +func getSizeInBytesFromCardinality(card int) int { + if card > arrayDefaultMaxSize { + // bitmapContainer + return maxCapacity / 8 + } + // arrayContainer + return 2 * card +} + +func fill(arr []uint64, val uint64) { + for i := range arr { + arr[i] = val + } +} +func fillRange(arr []uint64, start, end int, val uint64) { + for i := start; i < end; i++ { + arr[i] = val + } +} + +func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) { + if len(bitmap1) != len(bitmap2) { + panic("array lengths don't match") + } + // TODO: rewrite in assembly + pos := 0 + for k := range bitmap1 { + bitset := bitmap1[k] & bitmap2[k] + for bitset != 0 { + t := bitset & -bitset + container[pos] = uint16((k*64 + int(popcount(t-1)))) + pos = pos + 1 + bitset ^= t + } + } +} + +func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) { + if len(bitmap1) != len(bitmap2) { + panic("array lengths don't match") + } + // TODO: rewrite in assembly + pos := 0 + for k := range bitmap1 { + bitset := bitmap1[k] &^ bitmap2[k] + for bitset != 0 { + t := bitset & -bitset + container[pos] = uint16((k*64 + int(popcount(t-1)))) + pos = pos + 1 + bitset ^= t + } + } +} + +func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) { + if len(bitmap1) != len(bitmap2) { + panic("array lengths don't match") + } + // TODO: rewrite in assembly + pos := 0 + for k := 0; k < len(bitmap1); k++ { + bitset := bitmap1[k] ^ bitmap2[k] + for bitset != 0 { + t := bitset & -bitset + container[pos] = uint16((k*64 + int(popcount(t-1)))) + pos = pos + 1 + bitset ^= t + } + } +} + +func highbits(x uint32) uint16 { + return uint16(x >> 16) +} +func lowbits(x uint32) uint16 { + return uint16(x & 0xFFFF) +} + +const maxLowBit = 0xFFFF + +func flipBitmapRange(bitmap []uint64, start int, end int) { + if start >= end { + return + } + firstword := start / 64 + endword := (end - 1) / 64 + bitmap[firstword] ^= ^(^uint64(0) << uint(start%64)) + for i := firstword; i < endword; i++ { + //p("flipBitmapRange on i=%v", i) + bitmap[i] = ^bitmap[i] + } + bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64) +} + +func resetBitmapRange(bitmap []uint64, start int, end int) { + if start >= end { + return + } + firstword := start / 64 + endword := (end - 1) / 64 + if firstword == endword { + bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))) + return + } + bitmap[firstword] &= ^(^uint64(0) << uint(start%64)) + for i := firstword + 1; i < endword; i++ { + bitmap[i] = 0 + } + bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64)) + +} + +func setBitmapRange(bitmap []uint64, start int, end int) { + if start >= end { + return + } + firstword := start / 64 + endword := (end - 1) / 64 + if firstword == endword { + bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)) + return + } + bitmap[firstword] |= ^uint64(0) << uint(start%64) + for i := firstword + 1; i < endword; i++ { + bitmap[i] = ^uint64(0) + } + bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64) +} + +func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int { + before := wordCardinalityForBitmapRange(bitmap, start, end) + flipBitmapRange(bitmap, start, end) + after := wordCardinalityForBitmapRange(bitmap, start, end) + return int(after - before) +} + +func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int { + before := wordCardinalityForBitmapRange(bitmap, start, end) + resetBitmapRange(bitmap, start, end) + after := wordCardinalityForBitmapRange(bitmap, start, end) + return int(after - before) +} + +func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int { + before := wordCardinalityForBitmapRange(bitmap, start, end) + setBitmapRange(bitmap, start, end) + after := wordCardinalityForBitmapRange(bitmap, start, end) + return int(after - before) +} + +func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 { + answer := uint64(0) + if start >= end { + return answer + } + firstword := start / 64 + endword := (end - 1) / 64 + for i := firstword; i <= endword; i++ { + answer += popcount(bitmap[i]) + } + return answer +} + +func selectBitPosition(w uint64, j int) int { + seen := 0 + + // Divide 64bit + part := w & 0xFFFFFFFF + n := popcount(part) + if n <= uint64(j) { + part = w >> 32 + seen += 32 + j -= int(n) + } + w = part + + // Divide 32bit + part = w & 0xFFFF + n = popcount(part) + if n <= uint64(j) { + part = w >> 16 + seen += 16 + j -= int(n) + } + w = part + + // Divide 16bit + part = w & 0xFF + n = popcount(part) + if n <= uint64(j) { + part = w >> 8 + seen += 8 + j -= int(n) + } + w = part + + // Lookup in final byte + var counter uint + for counter = 0; counter < 8; counter++ { + j -= int((w >> counter) & 1) + if j < 0 { + break + } + } + return seen + int(counter) + +} + +func panicOn(err error) { + if err != nil { + panic(err) + } +} + +type ph struct { + orig int + rand int +} + +type pha []ph + +func (p pha) Len() int { return len(p) } +func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand } +func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func getRandomPermutation(n int) []int { + r := make([]ph, n) + for i := 0; i < n; i++ { + r[i].orig = i + r[i].rand = rand.Intn(1 << 29) + } + sort.Sort(pha(r)) + m := make([]int, n) + for i := range m { + m[i] = r[i].orig + } + return m +} + +func minOfInt(a, b int) int { + if a < b { + return a + } + return b +} + +func maxOfInt(a, b int) int { + if a > b { + return a + } + return b +} + +func maxOfUint16(a, b uint16) uint16 { + if a > b { + return a + } + return b +} + +func minOfUint16(a, b uint16) uint16 { + if a < b { + return a + } + return b +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func maxUint16(a, b uint16) uint16 { + if a > b { + return a + } + return b +} + +func minUint16(a, b uint16) uint16 { + if a < b { + return a + } + return b +} diff --git a/vendor/github.com/Smerity/govarint/LICENSE b/vendor/github.com/Smerity/govarint/LICENSE new file mode 100644 index 0000000000..be09cac865 --- /dev/null +++ b/vendor/github.com/Smerity/govarint/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Stephen Merity + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/Smerity/govarint/README.md b/vendor/github.com/Smerity/govarint/README.md new file mode 100644 index 0000000000..5b82dccb2f --- /dev/null +++ b/vendor/github.com/Smerity/govarint/README.md @@ -0,0 +1,67 @@ +# Govarint + +This project aims to provide a simple API for the performant encoding and decoding of 32 and 64 bit integers using a variety of algorithms. + +[![](http://i.imgur.com/mpgC23U.jpg)](https://www.flickr.com/photos/tsevis/8648521649/) + +## Usage + +Each integer encoding algorithm conforms to an encoding and decoding interface. +The interfaces also specify the size of the unsigned integer, either 32 or 64 bits, and will be referred to as XX below. +To create an encoder: + + NewU32Base128Encoder(w io.Writer) + NewU64Base128Encoder(w io.Writer) + NewU32GroupVarintEncoder(w io.Writer) + +For encoders, the only two commands are `PutUXX` and `Close`. +`Close` must be called as some integer encoding algorithms write in multiples. + + var buf bytes.Buffer + enc := NewU32Base128Encoder(&buf) + enc.PutU32(117) + enc.PutU32(343) + enc.Close() + +To create a decoder: + + NewU32Base128Decoder(r io.ByteReader) + NewU64Base128Decoder(r io.ByteReader) + NewU32GroupVarintDecoder(r io.ByteReader) + +For decoders, the only command is `GetUXX`. +`GetUXX` returns the value and any potential errors. +When reading is complete, `GetUXX` will return an `EOF` (End Of File). + + dec := NewU32Base128Decoder(&buf) + x, err := dec.GetU32() + +## Use Cases + +Using fixed width integers, such as uint32 and uint64, usually waste large amounts of space, especially when encoding small values. +Optimally, smaller numbers should take less space to represent. + +Using integer encoding algorithms is especially common in specific applications, such as storing edge lists or indexes for search engines. +In these situations, you have a sorted list of numbers that you want to keep as compactly as possible in memory. +Additionally, by storing only the difference between the given number and the previous (delta encoding), the numbers are quite small, and thus compress well. + +For an explicit example, the Web Data Commons Hyperlink Graph contains 128 billion edges linking page A to page B, where each page is represented by a 32 bit integer. +By converting all these edges to 64 bit integers (32 | 32), sorting them, and then using delta encoding, memory usage can be reduced from 64 bits per edge down to only 9 bits per edge using the Base128 integer encoding algorithm. +This figure improves even further if compressed using conventional compression algorithms (3 bits per edge). + +## Encodings supported + +`govarint` supports: + ++ Base128 [32, 64] - each byte uses 7 bits for encoding the integer and 1 bit for indicating if the integer requires another byte ++ Group Varint [32] - integers are encoded in blocks of four - one byte encodes the size of the following four integers, then the values of the four integers follows + +Group Varint consistently beats Base128 in decompression speed but Base128 may offer improved compression ratios depending on the distribution of the supplied integers. + +## Tests + + go test -v -bench=. + +## License + +MIT License, as per `LICENSE` diff --git a/vendor/github.com/Smerity/govarint/govarint.go b/vendor/github.com/Smerity/govarint/govarint.go new file mode 100644 index 0000000000..61328a337b --- /dev/null +++ b/vendor/github.com/Smerity/govarint/govarint.go @@ -0,0 +1,229 @@ +package govarint + +import "encoding/binary" +import "io" + +type U32VarintEncoder interface { + PutU32(x uint32) int + Close() +} + +type U32VarintDecoder interface { + GetU32() (uint32, error) +} + +/// + +type U64VarintEncoder interface { + PutU64(x uint64) int + Close() +} + +type U64VarintDecoder interface { + GetU64() (uint64, error) +} + +/// + +type U32GroupVarintEncoder struct { + w io.Writer + index int + store [4]uint32 + temp [17]byte +} + +func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} } + +func (b *U32GroupVarintEncoder) Flush() (int, error) { + // TODO: Is it more efficient to have a tailored version that's called only in Close()? + // If index is zero, there are no integers to flush + if b.index == 0 { + return 0, nil + } + // In the case we're flushing (the group isn't of size four), the non-values should be zero + // This ensures the unused entries are all zero in the sizeByte + for i := b.index; i < 4; i++ { + b.store[i] = 0 + } + length := 1 + // We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it + b.temp[0] = 0 + for i, x := range b.store { + size := byte(0) + shifts := []byte{24, 16, 8, 0} + for _, shift := range shifts { + // Always writes at least one byte -- the first one (shift = 0) + // Will write more bytes until the rest of the integer is all zeroes + if (x>>shift) != 0 || shift == 0 { + size += 1 + b.temp[length] = byte(x >> shift) + length += 1 + } + } + // We store the size in two of the eight bits in the first byte (sizeByte) + // 0 means there is one byte in total, hence why we subtract one from size + b.temp[0] |= (size - 1) << (uint8(3-i) * 2) + } + // If we're flushing without a full group of four, remove the unused bytes we computed + // This enables us to realize it's a partial group on decoding thanks to EOF + if b.index != 4 { + length -= 4 - b.index + } + _, err := b.w.Write(b.temp[:length]) + return length, err +} + +func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) { + bytesWritten := 0 + b.store[b.index] = x + b.index += 1 + if b.index == 4 { + n, err := b.Flush() + if err != nil { + return n, err + } + bytesWritten += n + b.index = 0 + } + return bytesWritten, nil +} + +func (b *U32GroupVarintEncoder) Close() { + // On Close, we flush any remaining values that might not have been in a full group + b.Flush() +} + +/// + +type U32GroupVarintDecoder struct { + r io.ByteReader + group [4]uint32 + pos int + finished bool + capacity int +} + +func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder { + return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4} +} + +func (b *U32GroupVarintDecoder) getGroup() error { + // We should always receive a sizeByte if there are more values to read + sizeByte, err := b.r.ReadByte() + if err != nil { + return err + } + // Calculate the size of the four incoming 32 bit integers + // 0b00 means 1 byte to read, 0b01 = 2, etc + b.group[0] = uint32((sizeByte >> 6) & 3) + b.group[1] = uint32((sizeByte >> 4) & 3) + b.group[2] = uint32((sizeByte >> 2) & 3) + b.group[3] = uint32(sizeByte & 3) + // + for index, size := range b.group { + b.group[index] = 0 + // Any error that occurs in earlier byte reads should be repeated at the end one + // Hence we only catch and report the final ReadByte's error + var err error + switch size { + case 0: + var x byte + x, err = b.r.ReadByte() + b.group[index] = uint32(x) + case 1: + var x, y byte + x, _ = b.r.ReadByte() + y, err = b.r.ReadByte() + b.group[index] = uint32(x)<<8 | uint32(y) + case 2: + var x, y, z byte + x, _ = b.r.ReadByte() + y, _ = b.r.ReadByte() + z, err = b.r.ReadByte() + b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z) + case 3: + var x, y, z, zz byte + x, _ = b.r.ReadByte() + y, _ = b.r.ReadByte() + z, _ = b.r.ReadByte() + zz, err = b.r.ReadByte() + b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz) + } + if err != nil { + if err == io.EOF { + // If we hit EOF here, we have found a partial group + // We've return any valid entries we have read and return EOF once we run out + b.capacity = index + b.finished = true + break + } else { + return err + } + } + } + // Reset the pos pointer to the beginning of the read values + b.pos = 0 + return nil +} + +func (b *U32GroupVarintDecoder) GetU32() (uint32, error) { + // Check if we have any more values to give out - if not, let's get them + if b.pos == b.capacity { + // If finished is set, there is nothing else to do + if b.finished { + return 0, io.EOF + } + err := b.getGroup() + if err != nil { + return 0, err + } + } + // Increment pointer and return the value stored at that point + b.pos += 1 + return b.group[b.pos-1], nil +} + +/// + +type Base128Encoder struct { + w io.Writer + tmpBytes []byte +} + +func NewU32Base128Encoder(w io.Writer) *Base128Encoder { + return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)} +} +func NewU64Base128Encoder(w io.Writer) *Base128Encoder { + return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)} +} + +func (b *Base128Encoder) PutU32(x uint32) (int, error) { + writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x)) + return b.w.Write(b.tmpBytes[:writtenBytes]) +} + +func (b *Base128Encoder) PutU64(x uint64) (int, error) { + writtenBytes := binary.PutUvarint(b.tmpBytes, x) + return b.w.Write(b.tmpBytes[:writtenBytes]) +} + +func (b *Base128Encoder) Close() { +} + +/// + +type Base128Decoder struct { + r io.ByteReader +} + +func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } +func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } + +func (b *Base128Decoder) GetU32() (uint32, error) { + v, err := binary.ReadUvarint(b.r) + return uint32(v), err +} + +func (b *Base128Decoder) GetU64() (uint64, error) { + return binary.ReadUvarint(b.r) +} diff --git a/vendor/github.com/blevesearch/bleve/README.md b/vendor/github.com/blevesearch/bleve/README.md index fa11f906d5..7c1a7c7c46 100644 --- a/vendor/github.com/blevesearch/bleve/README.md +++ b/vendor/github.com/blevesearch/bleve/README.md @@ -1,6 +1,6 @@ # ![bleve](docs/bleve.png) bleve -[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) +[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) [![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) diff --git a/vendor/github.com/blevesearch/bleve/config.go b/vendor/github.com/blevesearch/bleve/config.go index 74d407fdd1..482efb408f 100644 --- a/vendor/github.com/blevesearch/bleve/config.go +++ b/vendor/github.com/blevesearch/bleve/config.go @@ -25,6 +25,9 @@ import ( "github.com/blevesearch/bleve/index/upsidedown" "github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/search/highlight/highlighter/html" + + // force import of scorch so its accessible by default + _ "github.com/blevesearch/bleve/index/scorch" ) var bleveExpVar = expvar.NewMap("bleve") diff --git a/vendor/github.com/blevesearch/bleve/config_app.go b/vendor/github.com/blevesearch/bleve/config_app.go new file mode 100644 index 0000000000..112d0b600d --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/config_app.go @@ -0,0 +1,23 @@ +// Copyright (c) 2014 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build appengine appenginevm + +package bleve + +// in the appengine environment we cannot support disk based indexes +// so we do no extra configuration in this method +func initDisk() { + +} diff --git a/vendor/github.com/blevesearch/bleve/document/document.go b/vendor/github.com/blevesearch/bleve/document/document.go index ed36b127f9..c37585c661 100644 --- a/vendor/github.com/blevesearch/bleve/document/document.go +++ b/vendor/github.com/blevesearch/bleve/document/document.go @@ -20,7 +20,6 @@ type Document struct { ID string `json:"id"` Fields []Field `json:"fields"` CompositeFields []*CompositeField - Number uint64 `json:"-"` } func NewDocument(id string) *Document { diff --git a/vendor/github.com/blevesearch/bleve/document/field_boolean.go b/vendor/github.com/blevesearch/bleve/document/field_boolean.go index 668b431a1a..c226374c07 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_boolean.go +++ b/vendor/github.com/blevesearch/bleve/document/field_boolean.go @@ -20,7 +20,7 @@ import ( "github.com/blevesearch/bleve/analysis" ) -const DefaultBooleanIndexingOptions = StoreField | IndexField +const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues type BooleanField struct { name string diff --git a/vendor/github.com/blevesearch/bleve/document/field_datetime.go b/vendor/github.com/blevesearch/bleve/document/field_datetime.go index 6783d53d06..1db068c87b 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_datetime.go +++ b/vendor/github.com/blevesearch/bleve/document/field_datetime.go @@ -23,7 +23,7 @@ import ( "github.com/blevesearch/bleve/numeric" ) -const DefaultDateTimeIndexingOptions = StoreField | IndexField +const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues const DefaultDateTimePrecisionStep uint = 4 var MinTimeRepresentable = time.Unix(0, math.MinInt64) diff --git a/vendor/github.com/blevesearch/bleve/document/field_numeric.go b/vendor/github.com/blevesearch/bleve/document/field_numeric.go index 7faae2bbb8..e32993c887 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_numeric.go +++ b/vendor/github.com/blevesearch/bleve/document/field_numeric.go @@ -21,7 +21,7 @@ import ( "github.com/blevesearch/bleve/numeric" ) -const DefaultNumericIndexingOptions = StoreField | IndexField +const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues const DefaultPrecisionStep uint = 4 diff --git a/vendor/github.com/blevesearch/bleve/document/field_text.go b/vendor/github.com/blevesearch/bleve/document/field_text.go index 37873d36e7..5f7a3ab648 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_text.go +++ b/vendor/github.com/blevesearch/bleve/document/field_text.go @@ -20,7 +20,7 @@ import ( "github.com/blevesearch/bleve/analysis" ) -const DefaultTextIndexingOptions = IndexField +const DefaultTextIndexingOptions = IndexField | DocValues type TextField struct { name string diff --git a/vendor/github.com/blevesearch/bleve/document/indexing_options.go b/vendor/github.com/blevesearch/bleve/document/indexing_options.go index 5d562c1de9..44498a8e90 100644 --- a/vendor/github.com/blevesearch/bleve/document/indexing_options.go +++ b/vendor/github.com/blevesearch/bleve/document/indexing_options.go @@ -20,6 +20,7 @@ const ( IndexField IndexingOptions = 1 << iota StoreField IncludeTermVectors + DocValues ) func (o IndexingOptions) IsIndexed() bool { @@ -34,6 +35,10 @@ func (o IndexingOptions) IncludeTermVectors() bool { return o&IncludeTermVectors != 0 } +func (o IndexingOptions) IncludeDocValues() bool { + return o&DocValues != 0 +} + func (o IndexingOptions) String() string { rv := "" if o.IsIndexed() { @@ -51,5 +56,11 @@ func (o IndexingOptions) String() string { } rv += "TV" } + if o.IncludeDocValues() { + if rv != "" { + rv += ", " + } + rv += "DV" + } return rv } diff --git a/vendor/github.com/blevesearch/bleve/index.go b/vendor/github.com/blevesearch/bleve/index.go index 293ec9877b..e85652d967 100644 --- a/vendor/github.com/blevesearch/bleve/index.go +++ b/vendor/github.com/blevesearch/bleve/index.go @@ -76,7 +76,7 @@ func (b *Batch) SetInternal(key, val []byte) { b.internal.SetInternal(key, val) } -// SetInternal adds the specified delete internal +// DeleteInternal adds the specified delete internal // operation to the batch. NOTE: the bleve Index is // not updated until the batch is executed. func (b *Batch) DeleteInternal(key []byte) { diff --git a/vendor/github.com/blevesearch/bleve/index/analysis.go b/vendor/github.com/blevesearch/bleve/index/analysis.go index b626b9f3ed..840dad97ae 100644 --- a/vendor/github.com/blevesearch/bleve/index/analysis.go +++ b/vendor/github.com/blevesearch/bleve/index/analysis.go @@ -14,7 +14,10 @@ package index -import "github.com/blevesearch/bleve/document" +import ( + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/document" +) type IndexRow interface { KeySize() int @@ -29,6 +32,11 @@ type IndexRow interface { type AnalysisResult struct { DocID string Rows []IndexRow + + // scorch + Document *document.Document + Analyzed []analysis.TokenFrequencies + Length []int } type AnalysisWork struct { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/README.md b/vendor/github.com/blevesearch/bleve/index/scorch/README.md new file mode 100644 index 0000000000..861335a1bf --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/README.md @@ -0,0 +1,367 @@ +# scorch + +## Definitions + +Batch +- A collection of Documents to mutate in the index. + +Document +- Has a unique identifier (arbitrary bytes). +- Is comprised of a list of fields. + +Field +- Has a name (string). +- Has a type (text, number, date, geopoint). +- Has a value (depending on type). +- Can be indexed, stored, or both. +- If indexed, can be analyzed. +-m If indexed, can optionally store term vectors. + +## Scope + +Scorch *MUST* implement the bleve.index API without requiring any changes to this API. + +Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API. + +## Implementation + +The scorch implementation starts with the concept of a segmented index. + +A segment is simply a slice, subset, or portion of the entire index. A segmented index is one which is composed of one or more segments. Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying. Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently. + +### Internal Wrapper + +In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described. + +#### \_id field + +In scorch, field 0 is prearranged to be named \_id. All documents have a value for this field, which is the documents external identifier. In this version the field *MUST* be both indexed AND stored. The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code. + +NOTE: If a document already contains a field \_id, it will be replaced. If this is problematic, the caller must ensure such a scenario does not happen. + +### Proposed Structures + +``` +type Segment interface { + + Dictionary(field string) TermDictionary + +} + +type TermDictionary interface { + + PostingsList(term string, excluding PostingsList) PostingsList + +} + +type PostingsList interface { + + Next() Posting + + And(other PostingsList) PostingsList + Or(other PostingsList) PostingsList + +} + +type Posting interface { + Number() uint64 + + Frequency() uint64 + Norm() float64 + + Locations() Locations +} + +type Locations interface { + Start() uint64 + End() uint64 + Pos() uint64 + ArrayPositions() ... +} + +type DeletedDocs { + +} + +type SegmentSnapshot struct { + segment Segment + deleted PostingsList +} + +type IndexSnapshot struct { + segment []SegmentSnapshot +} +``` +**What about errors?** +**What about memory mgmnt or context?** +**Postings List separate iterator to separate stateful from stateless** +### Mutating the Index + +The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches. + +NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS. + +From this point forward, only Batch mutations will be discussed. + +Sequence of Operations: + +1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like: + + ``` + foreach segment { + dict := segment.Dictionary("\_id") + postings := empty postings list + foreach docID { + postings = postings.Or(dict.PostingsList(docID, nil)) + } + } + ``` + + NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset. + +2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool. + +3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information. + +4. We now have everything we need to update the state of the system to include this new snapshot. + + - Acquire a lock + - Create a new IndexSnapshot + - For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot. + - Create a new SegmentSnapshot wrapping our new segment with nil deleted docs. + - Append the new SegmentSnapshot to the IndexSnapshot + - Release the lock + +An ASCII art example: + ``` + 0 - Empty Index + + No segments + + IndexSnapshot + segments [] + deleted [] + + + 1 - Index Batch [ A B C ] + + segment 0 + numbers [ 1 2 3 ] + \_id [ A B C ] + + IndexSnapshot + segments [ 0 ] + deleted [ nil ] + + + 2 - Index Batch [ B' ] + + segment 0 1 + numbers [ 1 2 3 ] [ 1 ] + \_id [ A B C ] [ B ] + + Compute bitset segment-0-deleted-by-1: + [ 0 1 0 ] + + OR it with previous (nil) (call it 0-1) + [ 0 1 0 ] + + IndexSnapshot + segments [ 0 1 ] + deleted [ 0-1 nil ] + + 3 - Index Batch [ C' ] + + segment 0 1 2 + numbers [ 1 2 3 ] [ 1 ] [ 1 ] + \_id [ A B C ] [ B ] [ C ] + + Compute bitset segment-0-deleted-by-2: + [ 0 0 1 ] + + OR it with previous ([ 0 1 0 ]) (call it 0-12) + [ 0 1 1 ] + + Compute bitset segment-1-deleted-by-2: + [ 0 ] + + OR it with previous (nil) + still just nil + + + IndexSnapshot + segments [ 0 1 2 ] + deleted [ 0-12 nil nil ] + ``` + +**is there opportunity to stop early when doc is found in one segment** +**also, more efficient way to find bits for long lists of ids?** + +### Searching + +In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time. + +As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot. When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like. The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments. The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable. This gives a stable view of the data. + +#### Term Search + +Term search is the only searching primitive exposed in today's bleve.index API. This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working. + +A term search for term T in field F will look something like this: + +``` + searchResultPostings = empty + foreach segment { + dict := segment.Dictionary(F) + segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted) + // make segmentLocal numbers into global numbers, and flip bits in searchResultPostings + } +``` + +The searchResultPostings will be a new implementation of the TermFieldReader inteface. + +As a reminder this interface is: + +``` +// TermFieldReader is the interface exposing the enumeration of documents +// containing a given term in a given field. Documents are returned in byte +// lexicographic order over their identifiers. +type TermFieldReader interface { + // Next returns the next document containing the term in this field, or nil + // when it reaches the end of the enumeration. The preAlloced TermFieldDoc + // is optional, and when non-nil, will be used instead of allocating memory. + Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) + + // Advance resets the enumeration at specified document or its immediate + // follower. + Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) + + // Count returns the number of documents contains the term in this field. + Count() uint64 + Close() error +} +``` + +At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces: + +- The weakest interface, only supports Next() no ordering at all. +- Ordered, supporting Advance() +- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking) + +But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly. + +NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order. Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field. We probably should introduce at least a marker interface to detect this. + +An ASCII art example: + +``` +Let's start with the IndexSnapshot we ended with earlier: + +3 - Index Batch [ C' ] + + segment 0 1 2 + numbers [ 1 2 3 ] [ 1 ] [ 1 ] + \_id [ A B C ] [ B ] [ C ] + + Compute bitset segment-0-deleted-by-2: + [ 0 0 1 ] + + OR it with previous ([ 0 1 0 ]) (call it 0-12) + [ 0 1 1 ] + +Compute bitset segment-1-deleted-by-2: + [ 0 0 0 ] + +OR it with previous (nil) + still just nil + + + IndexSnapshot + segments [ 0 1 2 ] + deleted [ 0-12 nil nil ] + +Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it. + +Concurrently: + + - Segment 0 + - Get Term Dictionary For Field 'desc' + - From it get Postings List for term 'cat' EXCLUDING 0-12 + - raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ] + - Segment 1 + - Get Term Dictionary For Field 'desc' + - From it get Postings List for term 'cat' excluding nil + - [ 0 ] + - Segment 2 + - Get Term Dictionary For Field 'desc' + - From it get Postings List for term 'cat' excluding nil + - [ 1 ] + +Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot) + +IndexSnapshot already should have mapping something like: +0 - Offset 0 +1 - Offset 3 (because segment 0 had 3 docs) +2 - Offset 4 (becuase segment 1 had 1 doc) + +This maps to search result bitset: + +[ 0 0 0 0 1] + +Caller would call Next() and get doc number 5 (assuming 1 based indexing for now) + +Caller could then ask to get term locations, stored fields, external doc ID for document number 5. Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2. That happens to be C... + +``` + +#### Future improvements + +In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers. Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance. + +Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage. + + +### Memory Tracking + +All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers). + +This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded. + +### Persistence + +Obviously, we want to support (but maybe not require) asynchronous persistence of segments. My expectation is that segments are initially built in memory. At some point they are persisted to disk. This poses some interesting challenges. + +At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents. These bitmasks indirectly encode an ordering in which the segments were added. The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments. In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in. One idea is that we could take a similar approach on disk. When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask. + +This also relates to the topic rollback, addressed next... + + +### Rollback + +One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo. + +Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal. + + +### Internal Storage + +The bleve.index API has support for "internal storage". The ability to store information under a separate name space. + +This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index. But, the reality is that this storage is used to maintain key information related to the rollback scenario. Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index. Based on this, I believe its important to put the internal key/value pairs inside the segments themselves. But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments. But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask. Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc. + +More thought is required here. + +### Merging + +The segmented index approach requires merging to prevent the number of segments from growing too large. + +Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system. In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless. + +A few simple principles have been identified. + +- Roughly we merge multiple smaller segments into a single larger one. +- The larger a segment gets the less likely we should be to ever merge it. +- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings. +- Segments with all items deleted/obsoleted can be dropped. + +Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it. diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/event.go b/vendor/github.com/blevesearch/bleve/index/scorch/event.go new file mode 100644 index 0000000000..dd79d6d066 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/event.go @@ -0,0 +1,56 @@ +// Copyright (c) 2018 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import "time" + +// RegistryAsyncErrorCallbacks should be treated as read-only after +// process init()'ialization. +var RegistryAsyncErrorCallbacks = map[string]func(error){} + +// RegistryEventCallbacks should be treated as read-only after +// process init()'ialization. +var RegistryEventCallbacks = map[string]func(Event){} + +// Event represents the information provided in an OnEvent() callback. +type Event struct { + Kind EventKind + Scorch *Scorch + Duration time.Duration +} + +// EventKind represents an event code for OnEvent() callbacks. +type EventKind int + +// EventKindCloseStart is fired when a Scorch.Close() has begun. +var EventKindCloseStart = EventKind(1) + +// EventKindClose is fired when a scorch index has been fully closed. +var EventKindClose = EventKind(2) + +// EventKindMergerProgress is fired when the merger has completed a +// round of merge processing. +var EventKindMergerProgress = EventKind(3) + +// EventKindPersisterProgress is fired when the persister has completed +// a round of persistence processing. +var EventKindPersisterProgress = EventKind(4) + +// EventKindBatchIntroductionStart is fired when Batch() is invoked which +// introduces a new segment. +var EventKindBatchIntroductionStart = EventKind(5) + +// EventKindBatchIntroduction is fired when Batch() completes. +var EventKindBatchIntroduction = EventKind(6) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go new file mode 100644 index 0000000000..4499fa41bd --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go @@ -0,0 +1,317 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "fmt" + "sync/atomic" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +type segmentIntroduction struct { + id uint64 + data segment.Segment + obsoletes map[uint64]*roaring.Bitmap + ids []string + internal map[string][]byte + + applied chan error + persisted chan error +} + +type epochWatcher struct { + epoch uint64 + notifyCh notificationChan +} + +type snapshotReversion struct { + snapshot *IndexSnapshot + applied chan error + persisted chan error +} + +func (s *Scorch) mainLoop() { + var epochWatchers []*epochWatcher +OUTER: + for { + select { + case <-s.closeCh: + break OUTER + + case epochWatcher := <-s.introducerNotifier: + epochWatchers = append(epochWatchers, epochWatcher) + + case nextMerge := <-s.merges: + s.introduceMerge(nextMerge) + + case next := <-s.introductions: + err := s.introduceSegment(next) + if err != nil { + continue OUTER + } + + case revertTo := <-s.revertToSnapshots: + err := s.revertToSnapshot(revertTo) + if err != nil { + continue OUTER + } + } + + var epochCurr uint64 + s.rootLock.RLock() + if s.root != nil { + epochCurr = s.root.epoch + } + s.rootLock.RUnlock() + var epochWatchersNext []*epochWatcher + for _, w := range epochWatchers { + if w.epoch < epochCurr { + close(w.notifyCh) + } else { + epochWatchersNext = append(epochWatchersNext, w) + } + } + epochWatchers = epochWatchersNext + } + + s.asyncTasks.Done() +} + +func (s *Scorch) introduceSegment(next *segmentIntroduction) error { + // acquire lock + s.rootLock.Lock() + + nsegs := len(s.root.segment) + + // prepare new index snapshot + newSnapshot := &IndexSnapshot{ + parent: s, + segment: make([]*SegmentSnapshot, nsegs, nsegs+1), + offsets: make([]uint64, nsegs, nsegs+1), + internal: make(map[string][]byte, len(s.root.internal)), + epoch: s.nextSnapshotEpoch, + refs: 1, + } + s.nextSnapshotEpoch++ + + // iterate through current segments + var running uint64 + for i := range s.root.segment { + // see if optimistic work included this segment + delta, ok := next.obsoletes[s.root.segment[i].id] + if !ok { + var err error + delta, err = s.root.segment[i].segment.DocNumbers(next.ids) + if err != nil { + s.rootLock.Unlock() + next.applied <- fmt.Errorf("error computing doc numbers: %v", err) + close(next.applied) + _ = newSnapshot.DecRef() + return err + } + } + newSnapshot.segment[i] = &SegmentSnapshot{ + id: s.root.segment[i].id, + segment: s.root.segment[i].segment, + cachedDocs: s.root.segment[i].cachedDocs, + } + s.root.segment[i].segment.AddRef() + + // apply new obsoletions + if s.root.segment[i].deleted == nil { + newSnapshot.segment[i].deleted = delta + } else { + newSnapshot.segment[i].deleted = roaring.Or(s.root.segment[i].deleted, delta) + } + + newSnapshot.offsets[i] = running + running += s.root.segment[i].Count() + + } + // append new segment, if any, to end of the new index snapshot + if next.data != nil { + newSegmentSnapshot := &SegmentSnapshot{ + id: next.id, + segment: next.data, // take ownership of next.data's ref-count + cachedDocs: &cachedDocs{cache: nil}, + } + newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot) + newSnapshot.offsets = append(newSnapshot.offsets, running) + + // increment numItemsIntroduced which tracks the number of items + // queued for persistence. + atomic.AddUint64(&s.stats.numItemsIntroduced, newSegmentSnapshot.Count()) + } + // copy old values + for key, oldVal := range s.root.internal { + newSnapshot.internal[key] = oldVal + } + // set new values and apply deletes + for key, newVal := range next.internal { + if newVal != nil { + newSnapshot.internal[key] = newVal + } else { + delete(newSnapshot.internal, key) + } + } + if next.persisted != nil { + s.rootPersisted = append(s.rootPersisted, next.persisted) + } + // swap in new index snapshot + rootPrev := s.root + s.root = newSnapshot + // release lock + s.rootLock.Unlock() + + if rootPrev != nil { + _ = rootPrev.DecRef() + } + + close(next.applied) + + return nil +} + +func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { + // acquire lock + s.rootLock.Lock() + + // prepare new index snapshot + currSize := len(s.root.segment) + newSize := currSize + 1 - len(nextMerge.old) + newSnapshot := &IndexSnapshot{ + parent: s, + segment: make([]*SegmentSnapshot, 0, newSize), + offsets: make([]uint64, 0, newSize), + internal: s.root.internal, + epoch: s.nextSnapshotEpoch, + refs: 1, + } + s.nextSnapshotEpoch++ + + // iterate through current segments + newSegmentDeleted := roaring.NewBitmap() + var running uint64 + for i := range s.root.segment { + segmentID := s.root.segment[i].id + if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok { + // this segment is going away, see if anything else was deleted since we started the merge + if s.root.segment[i].deleted != nil { + // assume all these deletes are new + deletedSince := s.root.segment[i].deleted + // if we already knew about some of them, remove + if segSnapAtMerge.deleted != nil { + deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted) + } + deletedSinceItr := deletedSince.Iterator() + for deletedSinceItr.HasNext() { + oldDocNum := deletedSinceItr.Next() + newDocNum := nextMerge.oldNewDocNums[segmentID][oldDocNum] + newSegmentDeleted.Add(uint32(newDocNum)) + } + } + } else { + // this segment is staying + newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{ + id: s.root.segment[i].id, + segment: s.root.segment[i].segment, + deleted: s.root.segment[i].deleted, + cachedDocs: s.root.segment[i].cachedDocs, + }) + s.root.segment[i].segment.AddRef() + newSnapshot.offsets = append(newSnapshot.offsets, running) + running += s.root.segment[i].Count() + } + } + + // put new segment at end + newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{ + id: nextMerge.id, + segment: nextMerge.new, // take ownership for nextMerge.new's ref-count + deleted: newSegmentDeleted, + cachedDocs: &cachedDocs{cache: nil}, + }) + newSnapshot.offsets = append(newSnapshot.offsets, running) + + // swap in new segment + rootPrev := s.root + s.root = newSnapshot + // release lock + s.rootLock.Unlock() + + if rootPrev != nil { + _ = rootPrev.DecRef() + } + + // notify merger we incorporated this + close(nextMerge.notify) +} + +func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { + if revertTo.snapshot == nil { + err := fmt.Errorf("Cannot revert to a nil snapshot") + revertTo.applied <- err + return err + } + + // acquire lock + s.rootLock.Lock() + + // prepare a new index snapshot, based on next snapshot + newSnapshot := &IndexSnapshot{ + parent: s, + segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)), + offsets: revertTo.snapshot.offsets, + internal: revertTo.snapshot.internal, + epoch: s.nextSnapshotEpoch, + refs: 1, + } + s.nextSnapshotEpoch++ + + // iterate through segments + for i, segmentSnapshot := range revertTo.snapshot.segment { + newSnapshot.segment[i] = &SegmentSnapshot{ + id: segmentSnapshot.id, + segment: segmentSnapshot.segment, + deleted: segmentSnapshot.deleted, + cachedDocs: segmentSnapshot.cachedDocs, + } + newSnapshot.segment[i].segment.AddRef() + + // remove segment from ineligibleForRemoval map + filename := zapFileName(segmentSnapshot.id) + delete(s.ineligibleForRemoval, filename) + } + + if revertTo.persisted != nil { + s.rootPersisted = append(s.rootPersisted, revertTo.persisted) + } + + // swap in new snapshot + rootPrev := s.root + s.root = newSnapshot + // release lock + s.rootLock.Unlock() + + if rootPrev != nil { + _ = rootPrev.DecRef() + } + + close(revertTo.applied) + + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go new file mode 100644 index 0000000000..5ded29b5a3 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go @@ -0,0 +1,189 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "fmt" + "os" + "sync/atomic" + "time" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/mergeplan" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/index/scorch/segment/zap" +) + +func (s *Scorch) mergerLoop() { + var lastEpochMergePlanned uint64 +OUTER: + for { + select { + case <-s.closeCh: + break OUTER + + default: + // check to see if there is a new snapshot to persist + s.rootLock.RLock() + ourSnapshot := s.root + ourSnapshot.AddRef() + s.rootLock.RUnlock() + + if ourSnapshot.epoch != lastEpochMergePlanned { + startTime := time.Now() + + // lets get started + err := s.planMergeAtSnapshot(ourSnapshot) + if err != nil { + s.fireAsyncError(fmt.Errorf("merging err: %v", err)) + _ = ourSnapshot.DecRef() + continue OUTER + } + lastEpochMergePlanned = ourSnapshot.epoch + + s.fireEvent(EventKindMergerProgress, time.Since(startTime)) + } + _ = ourSnapshot.DecRef() + + // tell the persister we're waiting for changes + // first make a notification chan + notifyUs := make(notificationChan) + + // give it to the persister + select { + case <-s.closeCh: + break OUTER + case s.persisterNotifier <- notifyUs: + } + + // check again + s.rootLock.RLock() + ourSnapshot = s.root + ourSnapshot.AddRef() + s.rootLock.RUnlock() + + if ourSnapshot.epoch != lastEpochMergePlanned { + startTime := time.Now() + + // lets get started + err := s.planMergeAtSnapshot(ourSnapshot) + if err != nil { + s.fireAsyncError(fmt.Errorf("merging err: %v", err)) + _ = ourSnapshot.DecRef() + continue OUTER + } + lastEpochMergePlanned = ourSnapshot.epoch + + s.fireEvent(EventKindMergerProgress, time.Since(startTime)) + } + _ = ourSnapshot.DecRef() + + // now wait for it (but also detect close) + select { + case <-s.closeCh: + break OUTER + case <-notifyUs: + // woken up, next loop should pick up work + } + } + } + s.asyncTasks.Done() +} + +func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot) error { + // build list of zap segments in this snapshot + var onlyZapSnapshots []mergeplan.Segment + for _, segmentSnapshot := range ourSnapshot.segment { + if _, ok := segmentSnapshot.segment.(*zap.Segment); ok { + onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot) + } + } + + // give this list to the planner + resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, nil) + if err != nil { + return fmt.Errorf("merge planning err: %v", err) + } + if resultMergePlan == nil { + // nothing to do + return nil + } + + // process tasks in serial for now + var notifications []notificationChan + for _, task := range resultMergePlan.Tasks { + oldMap := make(map[uint64]*SegmentSnapshot) + newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) + segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments)) + docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments)) + for _, planSegment := range task.Segments { + if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok { + oldMap[segSnapshot.id] = segSnapshot + if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok { + segmentsToMerge = append(segmentsToMerge, zapSeg) + docsToDrop = append(docsToDrop, segSnapshot.deleted) + } + } + } + + filename := zapFileName(newSegmentID) + s.markIneligibleForRemoval(filename) + path := s.path + string(os.PathSeparator) + filename + newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, DefaultChunkFactor) + if err != nil { + s.unmarkIneligibleForRemoval(filename) + return fmt.Errorf("merging failed: %v", err) + } + segment, err := zap.Open(path) + if err != nil { + s.unmarkIneligibleForRemoval(filename) + return err + } + sm := &segmentMerge{ + id: newSegmentID, + old: oldMap, + oldNewDocNums: make(map[uint64][]uint64), + new: segment, + notify: make(notificationChan), + } + notifications = append(notifications, sm.notify) + for i, segNewDocNums := range newDocNums { + sm.oldNewDocNums[task.Segments[i].Id()] = segNewDocNums + } + + // give it to the introducer + select { + case <-s.closeCh: + return nil + case s.merges <- sm: + } + } + for _, notification := range notifications { + select { + case <-s.closeCh: + return nil + case <-notification: + } + } + return nil +} + +type segmentMerge struct { + id uint64 + old map[uint64]*SegmentSnapshot + oldNewDocNums map[uint64][]uint64 + new segment.Segment + notify notificationChan +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go new file mode 100644 index 0000000000..0afc3ce5c6 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go @@ -0,0 +1,369 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mergeplan provides a segment merge planning approach that's +// inspired by Lucene's TieredMergePolicy.java and descriptions like +// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html +package mergeplan + +import ( + "fmt" + "math" + "sort" + "strings" +) + +// A Segment represents the information that the planner needs to +// calculate segment merging. +type Segment interface { + // Unique id of the segment -- used for sorting. + Id() uint64 + + // Full segment size (the size before any logical deletions). + FullSize() int64 + + // Size of the live data of the segment; i.e., FullSize() minus + // any logical deletions. + LiveSize() int64 +} + +// Plan() will functionally compute a merge plan. A segment will be +// assigned to at most a single MergeTask in the output MergePlan. A +// segment not assigned to any MergeTask means the segment should +// remain unmerged. +func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) { + return plan(segments, o) +} + +// A MergePlan is the result of the Plan() API. +// +// The planner doesn’t know how or whether these tasks are executed -- +// that’s up to a separate merge execution system, which might execute +// these tasks concurrently or not, and which might execute all the +// tasks or not. +type MergePlan struct { + Tasks []*MergeTask +} + +// A MergeTask represents several segments that should be merged +// together into a single segment. +type MergeTask struct { + Segments []Segment +} + +// The MergePlanOptions is designed to be reusable between planning calls. +type MergePlanOptions struct { + // Max # segments per logarithmic tier, or max width of any + // logarithmic “step”. Smaller values mean more merging but fewer + // segments. Should be >= SegmentsPerMergeTask, else you'll have + // too much merging. + MaxSegmentsPerTier int + + // Max size of any segment produced after merging. Actual + // merging, however, may produce segment sizes different than the + // planner’s predicted sizes. + MaxSegmentSize int64 + + // The growth factor for each tier in a staircase of idealized + // segments computed by CalcBudget(). + TierGrowth float64 + + // The number of segments in any resulting MergeTask. e.g., + // len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask. + SegmentsPerMergeTask int + + // Small segments are rounded up to this size, i.e., treated as + // equal (floor) size for consideration. This is to prevent lots + // of tiny segments from resulting in a long tail in the index. + FloorSegmentSize int64 + + // Controls how aggressively merges that reclaim more deletions + // are favored. Higher values will more aggressively target + // merges that reclaim deletions, but be careful not to go so high + // that way too much merging takes place; a value of 3.0 is + // probably nearly too high. A value of 0.0 means deletions don't + // impact merge selection. + ReclaimDeletesWeight float64 + + // Optional, defaults to mergeplan.CalcBudget(). + CalcBudget func(totalSize int64, firstTierSize int64, + o *MergePlanOptions) (budgetNumSegments int) + + // Optional, defaults to mergeplan.ScoreSegments(). + ScoreSegments func(segments []Segment, o *MergePlanOptions) float64 + + // Optional. + Logger func(string) +} + +// Returns the higher of the input or FloorSegmentSize. +func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 { + if s > o.FloorSegmentSize { + return s + } + return o.FloorSegmentSize +} + +// Suggested default options. +var DefaultMergePlanOptions = MergePlanOptions{ + MaxSegmentsPerTier: 10, + MaxSegmentSize: 5000000, + TierGrowth: 10.0, + SegmentsPerMergeTask: 10, + FloorSegmentSize: 2000, + ReclaimDeletesWeight: 2.0, +} + +// ------------------------------------------- + +func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { + if len(segmentsIn) <= 1 { + return nil, nil + } + + if o == nil { + o = &DefaultMergePlanOptions + } + + segments := append([]Segment(nil), segmentsIn...) // Copy. + + sort.Sort(byLiveSizeDescending(segments)) + + var minLiveSize int64 = math.MaxInt64 + + var eligibles []Segment + var eligiblesLiveSize int64 + + for _, segment := range segments { + if minLiveSize > segment.LiveSize() { + minLiveSize = segment.LiveSize() + } + + // Only small-enough segments are eligible. + if segment.LiveSize() < o.MaxSegmentSize/2 { + eligibles = append(eligibles, segment) + eligiblesLiveSize += segment.LiveSize() + } + } + + minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize) + + calcBudget := o.CalcBudget + if calcBudget == nil { + calcBudget = CalcBudget + } + + budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o) + + scoreSegments := o.ScoreSegments + if scoreSegments == nil { + scoreSegments = ScoreSegments + } + + rv := &MergePlan{} + + var empties []Segment + for _, eligible := range eligibles { + if eligible.LiveSize() <= 0 { + empties = append(empties, eligible) + } + } + if len(empties) > 0 { + rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties}) + eligibles = removeSegments(eligibles, empties) + } + + // While we’re over budget, keep looping, which might produce + // another MergeTask. + for len(eligibles) > budgetNumSegments { + // Track a current best roster as we examine and score + // potential rosters of merges. + var bestRoster []Segment + var bestRosterScore float64 // Lower score is better. + + for startIdx := 0; startIdx < len(eligibles)-o.SegmentsPerMergeTask; startIdx++ { + var roster []Segment + var rosterLiveSize int64 + + for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ { + eligible := eligibles[idx] + + if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize { + roster = append(roster, eligible) + rosterLiveSize += eligible.LiveSize() + } + } + + if len(roster) > 0 { + rosterScore := scoreSegments(roster, o) + + if len(bestRoster) <= 0 || rosterScore < bestRosterScore { + bestRoster = roster + bestRosterScore = rosterScore + } + } + } + + if len(bestRoster) <= 0 { + return rv, nil + } + + rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster}) + + eligibles = removeSegments(eligibles, bestRoster) + } + + return rv, nil +} + +// Compute the number of segments that would be needed to cover the +// totalSize, by climbing up a logarithmically growing staircase of +// segment tiers. +func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) ( + budgetNumSegments int) { + tierSize := firstTierSize + if tierSize < 1 { + tierSize = 1 + } + + maxSegmentsPerTier := o.MaxSegmentsPerTier + if maxSegmentsPerTier < 1 { + maxSegmentsPerTier = 1 + } + + tierGrowth := o.TierGrowth + if tierGrowth < 1.0 { + tierGrowth = 1.0 + } + + for totalSize > 0 { + segmentsInTier := float64(totalSize) / float64(tierSize) + if segmentsInTier < float64(maxSegmentsPerTier) { + budgetNumSegments += int(math.Ceil(segmentsInTier)) + break + } + + budgetNumSegments += maxSegmentsPerTier + totalSize -= int64(maxSegmentsPerTier) * tierSize + tierSize = int64(float64(tierSize) * tierGrowth) + } + + return budgetNumSegments +} + +// Of note, removeSegments() keeps the ordering of the results stable. +func removeSegments(segments []Segment, toRemove []Segment) []Segment { + rv := make([]Segment, 0, len(segments)-len(toRemove)) +OUTER: + for _, segment := range segments { + for _, r := range toRemove { + if segment == r { + continue OUTER + } + } + rv = append(rv, segment) + } + return rv +} + +// Smaller result score is better. +func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 { + var totBeforeSize int64 + var totAfterSize int64 + var totAfterSizeFloored int64 + + for _, segment := range segments { + totBeforeSize += segment.FullSize() + totAfterSize += segment.LiveSize() + totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize()) + } + + if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 { + return 0 + } + + // Roughly guess the "balance" of the segments -- whether the + // segments are about the same size. + balance := + float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) / + float64(totAfterSizeFloored) + + // Gently favor smaller merges over bigger ones. We don't want to + // make the exponent too large else we end up with poor merges of + // small segments in order to avoid the large merges. + score := balance * math.Pow(float64(totAfterSize), 0.05) + + // Strongly favor merges that reclaim deletes. + nonDelRatio := float64(totAfterSize) / float64(totBeforeSize) + + score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight) + + return score +} + +// ------------------------------------------ + +// ToBarChart returns an ASCII rendering of the segments and the plan. +// The barMax is the max width of the bars in the bar chart. +func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string { + rv := make([]string, 0, len(segments)) + + var maxFullSize int64 + for _, segment := range segments { + if maxFullSize < segment.FullSize() { + maxFullSize = segment.FullSize() + } + } + if maxFullSize < 0 { + maxFullSize = 1 + } + + for _, segment := range segments { + barFull := int(segment.FullSize()) + barLive := int(segment.LiveSize()) + + if maxFullSize > int64(barMax) { + barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize)) + barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize)) + } + + barKind := " " + barChar := "." + + if plan != nil { + TASK_LOOP: + for taski, task := range plan.Tasks { + for _, taskSegment := range task.Segments { + if taskSegment == segment { + barKind = "*" + barChar = fmt.Sprintf("%d", taski) + break TASK_LOOP + } + } + } + } + + bar := + strings.Repeat(barChar, barLive)[0:barLive] + + strings.Repeat("x", barFull-barLive)[0:barFull-barLive] + + rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix, + segment.Id(), + segment.LiveSize(), + segment.FullSize(), + barKind, bar)) + } + + return strings.Join(rv, "\n") +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/sort.go b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/sort.go new file mode 100644 index 0000000000..d044b8d7c9 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/sort.go @@ -0,0 +1,28 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mergeplan + +type byLiveSizeDescending []Segment + +func (a byLiveSizeDescending) Len() int { return len(a) } + +func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func (a byLiveSizeDescending) Less(i, j int) bool { + if a[i].LiveSize() != a[j].LiveSize() { + return a[i].LiveSize() > a[j].LiveSize() + } + return a[i].Id() < a[j].Id() +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go new file mode 100644 index 0000000000..cdcee37c2e --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go @@ -0,0 +1,646 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "bytes" + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "sync/atomic" + "time" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/index/scorch/segment/zap" + "github.com/boltdb/bolt" +) + +var DefaultChunkFactor uint32 = 1024 + +type notificationChan chan struct{} + +func (s *Scorch) persisterLoop() { + defer s.asyncTasks.Done() + + var notifyChs []notificationChan + var lastPersistedEpoch uint64 +OUTER: + for { + select { + case <-s.closeCh: + break OUTER + case notifyCh := <-s.persisterNotifier: + notifyChs = append(notifyChs, notifyCh) + default: + } + + var ourSnapshot *IndexSnapshot + var ourPersisted []chan error + + // check to see if there is a new snapshot to persist + s.rootLock.Lock() + if s.root != nil && s.root.epoch > lastPersistedEpoch { + ourSnapshot = s.root + ourSnapshot.AddRef() + ourPersisted = s.rootPersisted + s.rootPersisted = nil + } + s.rootLock.Unlock() + + if ourSnapshot != nil { + startTime := time.Now() + + err := s.persistSnapshot(ourSnapshot) + for _, ch := range ourPersisted { + if err != nil { + ch <- err + } + close(ch) + } + if err != nil { + s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) + _ = ourSnapshot.DecRef() + continue OUTER + } + + lastPersistedEpoch = ourSnapshot.epoch + for _, notifyCh := range notifyChs { + close(notifyCh) + } + notifyChs = nil + _ = ourSnapshot.DecRef() + + changed := false + s.rootLock.RLock() + if s.root != nil && s.root.epoch != lastPersistedEpoch { + changed = true + } + s.rootLock.RUnlock() + + s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) + + if changed { + continue OUTER + } + } + + // tell the introducer we're waiting for changes + w := &epochWatcher{ + epoch: lastPersistedEpoch, + notifyCh: make(notificationChan, 1), + } + + select { + case <-s.closeCh: + break OUTER + case s.introducerNotifier <- w: + } + + s.removeOldData() // might as well cleanup while waiting + + select { + case <-s.closeCh: + break OUTER + case <-w.notifyCh: + // woken up, next loop should pick up work + } + } +} + +func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error { + // start a write transaction + tx, err := s.rootBolt.Begin(true) + if err != nil { + return err + } + // defer fsync of the rootbolt + defer func() { + if err == nil { + err = s.rootBolt.Sync() + } + }() + // defer commit/rollback transaction + defer func() { + if err == nil { + err = tx.Commit() + } else { + _ = tx.Rollback() + } + }() + + snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket) + if err != nil { + return err + } + newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch) + snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey) + if err != nil { + return err + } + + // persist internal values + internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey) + if err != nil { + return err + } + // TODO optimize writing these in order? + for k, v := range snapshot.internal { + err = internalBucket.Put([]byte(k), v) + if err != nil { + return err + } + } + + var filenames []string + newSegmentPaths := make(map[uint64]string) + + // first ensure that each segment in this snapshot has been persisted + for i, segmentSnapshot := range snapshot.segment { + snapshotSegmentKey := segment.EncodeUvarintAscending(nil, uint64(i)) + snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey) + if err2 != nil { + return err2 + } + switch seg := segmentSnapshot.segment.(type) { + case *zap.SegmentBase: + // need to persist this to disk + filename := zapFileName(segmentSnapshot.id) + path := s.path + string(os.PathSeparator) + filename + err2 := zap.PersistSegmentBase(seg, path) + if err2 != nil { + return fmt.Errorf("error persisting segment: %v", err2) + } + newSegmentPaths[segmentSnapshot.id] = path + err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename)) + if err != nil { + return err + } + filenames = append(filenames, filename) + case *zap.Segment: + path := seg.Path() + filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator)) + err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename)) + if err != nil { + return err + } + filenames = append(filenames, filename) + default: + return fmt.Errorf("unknown segment type: %T", seg) + } + // store current deleted bits + var roaringBuf bytes.Buffer + if segmentSnapshot.deleted != nil { + _, err = segmentSnapshot.deleted.WriteTo(&roaringBuf) + if err != nil { + return fmt.Errorf("error persisting roaring bytes: %v", err) + } + err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes()) + if err != nil { + return err + } + } + } + + // only alter the root if we actually persisted a segment + // (sometimes its just a new snapshot, possibly with new internal values) + if len(newSegmentPaths) > 0 { + // now try to open all the new snapshots + newSegments := make(map[uint64]segment.Segment) + for segmentID, path := range newSegmentPaths { + newSegments[segmentID], err = zap.Open(path) + if err != nil { + for _, s := range newSegments { + if s != nil { + _ = s.Close() // cleanup segments that were successfully opened + } + } + return fmt.Errorf("error opening new segment at %s, %v", path, err) + } + } + + s.rootLock.Lock() + newIndexSnapshot := &IndexSnapshot{ + parent: s, + epoch: s.nextSnapshotEpoch, + segment: make([]*SegmentSnapshot, len(s.root.segment)), + offsets: make([]uint64, len(s.root.offsets)), + internal: make(map[string][]byte, len(s.root.internal)), + refs: 1, + } + s.nextSnapshotEpoch++ + for i, segmentSnapshot := range s.root.segment { + // see if this segment has been replaced + if replacement, ok := newSegments[segmentSnapshot.id]; ok { + newSegmentSnapshot := &SegmentSnapshot{ + id: segmentSnapshot.id, + segment: replacement, + deleted: segmentSnapshot.deleted, + cachedDocs: segmentSnapshot.cachedDocs, + } + newIndexSnapshot.segment[i] = newSegmentSnapshot + // update items persisted incase of a new segment snapshot + atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count()) + } else { + newIndexSnapshot.segment[i] = s.root.segment[i] + newIndexSnapshot.segment[i].segment.AddRef() + } + newIndexSnapshot.offsets[i] = s.root.offsets[i] + } + for k, v := range s.root.internal { + newIndexSnapshot.internal[k] = v + } + for _, filename := range filenames { + delete(s.ineligibleForRemoval, filename) + } + rootPrev := s.root + s.root = newIndexSnapshot + s.rootLock.Unlock() + if rootPrev != nil { + _ = rootPrev.DecRef() + } + } + + return nil +} + +func zapFileName(epoch uint64) string { + return fmt.Sprintf("%012x.zap", epoch) +} + +// bolt snapshot code + +var boltSnapshotsBucket = []byte{'s'} +var boltPathKey = []byte{'p'} +var boltDeletedKey = []byte{'d'} +var boltInternalKey = []byte{'i'} + +func (s *Scorch) loadFromBolt() error { + return s.rootBolt.View(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil + } + foundRoot := false + c := snapshots.Cursor() + for k, _ := c.Last(); k != nil; k, _ = c.Prev() { + _, snapshotEpoch, err := segment.DecodeUvarintAscending(k) + if err != nil { + log.Printf("unable to parse segment epoch %x, continuing", k) + continue + } + if foundRoot { + s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) + continue + } + snapshot := snapshots.Bucket(k) + if snapshot == nil { + log.Printf("snapshot key, but bucket missing %x, continuing", k) + s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) + continue + } + indexSnapshot, err := s.loadSnapshot(snapshot) + if err != nil { + log.Printf("unable to load snapshot, %v, continuing", err) + s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) + continue + } + indexSnapshot.epoch = snapshotEpoch + // set the nextSegmentID + s.nextSegmentID, err = s.maxSegmentIDOnDisk() + if err != nil { + return err + } + s.nextSegmentID++ + s.nextSnapshotEpoch = snapshotEpoch + 1 + s.rootLock.Lock() + if s.root != nil { + _ = s.root.DecRef() + } + s.root = indexSnapshot + s.rootLock.Unlock() + foundRoot = true + } + return nil + }) +} + +// LoadSnapshot loads the segment with the specified epoch +// NOTE: this is currently ONLY intended to be used by the command-line tool +func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { + err = s.rootBolt.View(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil + } + snapshotKey := segment.EncodeUvarintAscending(nil, epoch) + snapshot := snapshots.Bucket(snapshotKey) + if snapshot == nil { + return nil + } + rv, err = s.loadSnapshot(snapshot) + return err + }) + if err != nil { + return nil, err + } + return rv, nil +} + +func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { + + rv := &IndexSnapshot{ + parent: s, + internal: make(map[string][]byte), + refs: 1, + } + var running uint64 + c := snapshot.Cursor() + for k, _ := c.First(); k != nil; k, _ = c.Next() { + if k[0] == boltInternalKey[0] { + internalBucket := snapshot.Bucket(k) + err := internalBucket.ForEach(func(key []byte, val []byte) error { + copiedVal := append([]byte(nil), val...) + rv.internal[string(key)] = copiedVal + return nil + }) + if err != nil { + _ = rv.DecRef() + return nil, err + } + } else { + segmentBucket := snapshot.Bucket(k) + if segmentBucket == nil { + _ = rv.DecRef() + return nil, fmt.Errorf("segment key, but bucket missing % x", k) + } + segmentSnapshot, err := s.loadSegment(segmentBucket) + if err != nil { + _ = rv.DecRef() + return nil, fmt.Errorf("failed to load segment: %v", err) + } + _, segmentSnapshot.id, err = segment.DecodeUvarintAscending(k) + if err != nil { + _ = rv.DecRef() + return nil, fmt.Errorf("failed to decode segment id: %v", err) + } + rv.segment = append(rv.segment, segmentSnapshot) + rv.offsets = append(rv.offsets, running) + running += segmentSnapshot.segment.Count() + } + } + return rv, nil +} + +func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) { + pathBytes := segmentBucket.Get(boltPathKey) + if pathBytes == nil { + return nil, fmt.Errorf("segment path missing") + } + segmentPath := s.path + string(os.PathSeparator) + string(pathBytes) + segment, err := zap.Open(segmentPath) + if err != nil { + return nil, fmt.Errorf("error opening bolt segment: %v", err) + } + + rv := &SegmentSnapshot{ + segment: segment, + cachedDocs: &cachedDocs{cache: nil}, + } + deletedBytes := segmentBucket.Get(boltDeletedKey) + if deletedBytes != nil { + deletedBitmap := roaring.NewBitmap() + r := bytes.NewReader(deletedBytes) + _, err := deletedBitmap.ReadFrom(r) + if err != nil { + _ = segment.Close() + return nil, fmt.Errorf("error reading deleted bytes: %v", err) + } + rv.deleted = deletedBitmap + } + + return rv, nil +} + +type uint64Descending []uint64 + +func (p uint64Descending) Len() int { return len(p) } +func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] } +func (p uint64Descending) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func (s *Scorch) removeOldData() { + removed, err := s.removeOldBoltSnapshots() + if err != nil { + s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) + } + + if removed > 0 { + err = s.removeOldZapFiles() + if err != nil { + s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) + } + } +} + +// NumSnapshotsToKeep represents how many recent, old snapshots to +// keep around per Scorch instance. Useful for apps that require +// rollback'ability. +var NumSnapshotsToKeep = 1 + +// Removes enough snapshots from the rootBolt so that the +// s.eligibleForRemoval stays under the NumSnapshotsToKeep policy. +func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) { + persistedEpochs, err := s.RootBoltSnapshotEpochs() + if err != nil { + return 0, err + } + + if len(persistedEpochs) <= NumSnapshotsToKeep { + // we need to keep everything + return 0, nil + } + + // make a map of epochs to protect from deletion + protectedEpochs := make(map[uint64]struct{}, NumSnapshotsToKeep) + for _, epoch := range persistedEpochs[0:NumSnapshotsToKeep] { + protectedEpochs[epoch] = struct{}{} + } + + var epochsToRemove []uint64 + var newEligible []uint64 + s.rootLock.Lock() + for _, epoch := range s.eligibleForRemoval { + if _, ok := protectedEpochs[epoch]; ok { + // protected + newEligible = append(newEligible, epoch) + } else { + epochsToRemove = append(epochsToRemove, epoch) + } + } + s.eligibleForRemoval = newEligible + s.rootLock.Unlock() + + if len(epochsToRemove) <= 0 { + return 0, nil + } + + tx, err := s.rootBolt.Begin(true) + if err != nil { + return 0, err + } + defer func() { + if err == nil { + err = tx.Commit() + } else { + _ = tx.Rollback() + } + if err == nil { + err = s.rootBolt.Sync() + } + }() + + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return 0, nil + } + + for _, epochToRemove := range epochsToRemove { + k := segment.EncodeUvarintAscending(nil, epochToRemove) + err = snapshots.DeleteBucket(k) + if err == bolt.ErrBucketNotFound { + err = nil + } + if err == nil { + numRemoved++ + } + } + + return numRemoved, err +} + +func (s *Scorch) maxSegmentIDOnDisk() (uint64, error) { + currFileInfos, err := ioutil.ReadDir(s.path) + if err != nil { + return 0, err + } + + var rv uint64 + for _, finfo := range currFileInfos { + fname := finfo.Name() + if filepath.Ext(fname) == ".zap" { + prefix := strings.TrimSuffix(fname, ".zap") + id, err2 := strconv.ParseUint(prefix, 16, 64) + if err2 != nil { + return 0, err2 + } + if id > rv { + rv = id + } + } + } + return rv, err +} + +// Removes any *.zap files which aren't listed in the rootBolt. +func (s *Scorch) removeOldZapFiles() error { + liveFileNames, err := s.loadZapFileNames() + if err != nil { + return err + } + + currFileInfos, err := ioutil.ReadDir(s.path) + if err != nil { + return err + } + + s.rootLock.RLock() + + for _, finfo := range currFileInfos { + fname := finfo.Name() + if filepath.Ext(fname) == ".zap" { + if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] { + err := os.Remove(s.path + string(os.PathSeparator) + fname) + if err != nil { + log.Printf("got err removing file: %s, err: %v", fname, err) + } + } + } + } + + s.rootLock.RUnlock() + + return nil +} + +func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) { + var rv []uint64 + err := s.rootBolt.View(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil + } + sc := snapshots.Cursor() + for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() { + _, snapshotEpoch, err := segment.DecodeUvarintAscending(sk) + if err != nil { + continue + } + rv = append(rv, snapshotEpoch) + } + return nil + }) + return rv, err +} + +// Returns the *.zap file names that are listed in the rootBolt. +func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) { + rv := map[string]struct{}{} + err := s.rootBolt.View(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil + } + sc := snapshots.Cursor() + for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() { + snapshot := snapshots.Bucket(sk) + if snapshot == nil { + continue + } + segc := snapshot.Cursor() + for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() { + if segk[0] == boltInternalKey[0] { + continue + } + segmentBucket := snapshot.Bucket(segk) + if segmentBucket == nil { + continue + } + pathBytes := segmentBucket.Get(boltPathKey) + if pathBytes == nil { + continue + } + pathString := string(pathBytes) + rv[string(pathString)] = struct{}{} + } + } + return nil + }) + + return rv, err +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/reader.go b/vendor/github.com/blevesearch/bleve/index/scorch/reader.go new file mode 100644 index 0000000000..365ecb6706 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/reader.go @@ -0,0 +1,110 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" +) + +type Reader struct { + root *IndexSnapshot // Owns 1 ref-count on the index snapshot. +} + +func (r *Reader) TermFieldReader(term []byte, field string, includeFreq, + includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { + return r.root.TermFieldReader(term, field, includeFreq, includeNorm, includeTermVectors) +} + +// DocIDReader returns an iterator over all doc ids +// The caller must close returned instance to release associated resources. +func (r *Reader) DocIDReaderAll() (index.DocIDReader, error) { + return r.root.DocIDReaderAll() +} + +func (r *Reader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { + return r.root.DocIDReaderOnly(ids) +} + +func (r *Reader) FieldDict(field string) (index.FieldDict, error) { + return r.root.FieldDict(field) +} + +// FieldDictRange is currently defined to include the start and end terms +func (r *Reader) FieldDictRange(field string, startTerm []byte, + endTerm []byte) (index.FieldDict, error) { + return r.root.FieldDictRange(field, startTerm, endTerm) +} + +func (r *Reader) FieldDictPrefix(field string, + termPrefix []byte) (index.FieldDict, error) { + return r.root.FieldDictPrefix(field, termPrefix) +} + +func (r *Reader) Document(id string) (*document.Document, error) { + return r.root.Document(id) +} +func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, + visitor index.DocumentFieldTermVisitor) error { + return r.root.DocumentVisitFieldTerms(id, fields, visitor) +} + +func (r *Reader) Fields() ([]string, error) { + return r.root.Fields() +} + +func (r *Reader) GetInternal(key []byte) ([]byte, error) { + return r.root.GetInternal(key) +} + +func (r *Reader) DocCount() (uint64, error) { + return r.root.DocCount() +} + +func (r *Reader) ExternalID(id index.IndexInternalID) (string, error) { + return r.root.ExternalID(id) +} + +func (r *Reader) InternalID(id string) (index.IndexInternalID, error) { + return r.root.InternalID(id) +} + +func (r *Reader) DumpAll() chan interface{} { + rv := make(chan interface{}) + go func() { + close(rv) + }() + return rv +} + +func (r *Reader) DumpDoc(id string) chan interface{} { + rv := make(chan interface{}) + go func() { + close(rv) + }() + return rv +} + +func (r *Reader) DumpFields() chan interface{} { + rv := make(chan interface{}) + go func() { + close(rv) + }() + return rv +} + +func (r *Reader) Close() error { + return r.root.DecRef() +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go new file mode 100644 index 0000000000..311077653a --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go @@ -0,0 +1,438 @@ +// Copyright (c) 2018 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "encoding/json" + "fmt" + "os" + "sync" + "sync/atomic" + "time" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/blevesearch/bleve/index/scorch/segment/mem" + "github.com/blevesearch/bleve/index/scorch/segment/zap" + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/registry" + "github.com/boltdb/bolt" +) + +const Name = "scorch" + +const Version uint8 = 1 + +type Scorch struct { + readOnly bool + version uint8 + config map[string]interface{} + analysisQueue *index.AnalysisQueue + stats *Stats + nextSegmentID uint64 + path string + + unsafeBatch bool + + rootLock sync.RWMutex + root *IndexSnapshot // holds 1 ref-count on the root + rootPersisted []chan error // closed when root is persisted + nextSnapshotEpoch uint64 + eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC. + ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet. + + closeCh chan struct{} + introductions chan *segmentIntroduction + merges chan *segmentMerge + introducerNotifier chan *epochWatcher + revertToSnapshots chan *snapshotReversion + persisterNotifier chan notificationChan + rootBolt *bolt.DB + asyncTasks sync.WaitGroup + + onEvent func(event Event) + onAsyncError func(err error) +} + +func NewScorch(storeName string, + config map[string]interface{}, + analysisQueue *index.AnalysisQueue) (index.Index, error) { + rv := &Scorch{ + version: Version, + config: config, + analysisQueue: analysisQueue, + nextSnapshotEpoch: 1, + closeCh: make(chan struct{}), + ineligibleForRemoval: map[string]bool{}, + } + rv.stats = &Stats{i: rv} + rv.root = &IndexSnapshot{parent: rv, refs: 1} + ro, ok := config["read_only"].(bool) + if ok { + rv.readOnly = ro + } + ub, ok := config["unsafe_batch"].(bool) + if ok { + rv.unsafeBatch = ub + } + ecbName, ok := config["eventCallbackName"].(string) + if ok { + rv.onEvent = RegistryEventCallbacks[ecbName] + } + aecbName, ok := config["asyncErrorCallbackName"].(string) + if ok { + rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName] + } + return rv, nil +} + +func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) { + if s.onEvent != nil { + s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) + } +} + +func (s *Scorch) fireAsyncError(err error) { + if s.onAsyncError != nil { + s.onAsyncError(err) + } +} + +func (s *Scorch) Open() error { + var ok bool + s.path, ok = s.config["path"].(string) + if !ok { + return fmt.Errorf("must specify path") + } + if s.path == "" { + s.unsafeBatch = true + } + + var rootBoltOpt *bolt.Options + if s.readOnly { + rootBoltOpt = &bolt.Options{ + ReadOnly: true, + } + } else { + if s.path != "" { + err := os.MkdirAll(s.path, 0700) + if err != nil { + return err + } + } + } + rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt" + var err error + if s.path != "" { + s.rootBolt, err = bolt.Open(rootBoltPath, 0600, rootBoltOpt) + if err != nil { + return err + } + + // now see if there is any existing state to load + err = s.loadFromBolt() + if err != nil { + _ = s.Close() + return err + } + } + + s.introductions = make(chan *segmentIntroduction) + s.merges = make(chan *segmentMerge) + s.introducerNotifier = make(chan *epochWatcher, 1) + s.revertToSnapshots = make(chan *snapshotReversion) + s.persisterNotifier = make(chan notificationChan) + + if !s.readOnly && s.path != "" { + err := s.removeOldZapFiles() // Before persister or merger create any new files. + if err != nil { + _ = s.Close() + return err + } + } + + s.asyncTasks.Add(1) + go s.mainLoop() + + if !s.readOnly && s.path != "" { + s.asyncTasks.Add(1) + go s.persisterLoop() + s.asyncTasks.Add(1) + go s.mergerLoop() + } + + return nil +} + +func (s *Scorch) Close() (err error) { + startTime := time.Now() + defer func() { + s.fireEvent(EventKindClose, time.Since(startTime)) + }() + + s.fireEvent(EventKindCloseStart, 0) + + // signal to async tasks we want to close + close(s.closeCh) + // wait for them to close + s.asyncTasks.Wait() + // now close the root bolt + if s.rootBolt != nil { + err = s.rootBolt.Close() + s.rootLock.Lock() + if s.root != nil { + _ = s.root.DecRef() + } + s.root = nil + s.rootLock.Unlock() + } + + return +} + +func (s *Scorch) Update(doc *document.Document) error { + b := index.NewBatch() + b.Update(doc) + return s.Batch(b) +} + +func (s *Scorch) Delete(id string) error { + b := index.NewBatch() + b.Delete(id) + return s.Batch(b) +} + +// Batch applices a batch of changes to the index atomically +func (s *Scorch) Batch(batch *index.Batch) (err error) { + start := time.Now() + + defer func() { + s.fireEvent(EventKindBatchIntroduction, time.Since(start)) + }() + + resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) + + var numUpdates uint64 + var numDeletes uint64 + var numPlainTextBytes uint64 + var ids []string + for docID, doc := range batch.IndexOps { + if doc != nil { + // insert _id field + doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil)) + numUpdates++ + numPlainTextBytes += doc.NumPlainTextBytes() + } else { + numDeletes++ + } + ids = append(ids, docID) + } + + // FIXME could sort ids list concurrent with analysis? + + go func() { + for _, doc := range batch.IndexOps { + if doc != nil { + aw := index.NewAnalysisWork(s, doc, resultChan) + // put the work on the queue + s.analysisQueue.Queue(aw) + } + } + }() + + // wait for analysis result + analysisResults := make([]*index.AnalysisResult, int(numUpdates)) + var itemsDeQueued uint64 + for itemsDeQueued < numUpdates { + result := <-resultChan + analysisResults[itemsDeQueued] = result + itemsDeQueued++ + } + close(resultChan) + + atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(start))) + + // notify handlers that we're about to introduce a segment + s.fireEvent(EventKindBatchIntroductionStart, 0) + + var newSegment segment.Segment + if len(analysisResults) > 0 { + newSegment, err = zap.NewSegmentBase(mem.NewFromAnalyzedDocs(analysisResults), DefaultChunkFactor) + if err != nil { + return err + } + } + + err = s.prepareSegment(newSegment, ids, batch.InternalOps) + if err != nil { + if newSegment != nil { + _ = newSegment.Close() + } + atomic.AddUint64(&s.stats.errors, 1) + } else { + atomic.AddUint64(&s.stats.updates, numUpdates) + atomic.AddUint64(&s.stats.deletes, numDeletes) + atomic.AddUint64(&s.stats.batches, 1) + atomic.AddUint64(&s.stats.numPlainTextBytesIndexed, numPlainTextBytes) + } + return err +} + +func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, + internalOps map[string][]byte) error { + + // new introduction + introduction := &segmentIntroduction{ + id: atomic.AddUint64(&s.nextSegmentID, 1), + data: newSegment, + ids: ids, + obsoletes: make(map[uint64]*roaring.Bitmap), + internal: internalOps, + applied: make(chan error), + } + + if !s.unsafeBatch { + introduction.persisted = make(chan error, 1) + } + + // get read lock, to optimistically prepare obsoleted info + s.rootLock.RLock() + for _, seg := range s.root.segment { + delta, err := seg.segment.DocNumbers(ids) + if err != nil { + s.rootLock.RUnlock() + return err + } + introduction.obsoletes[seg.id] = delta + } + s.rootLock.RUnlock() + + s.introductions <- introduction + + // block until this segment is applied + err := <-introduction.applied + if err != nil { + return err + } + + if introduction.persisted != nil { + err = <-introduction.persisted + } + + return err +} + +func (s *Scorch) SetInternal(key, val []byte) error { + b := index.NewBatch() + b.SetInternal(key, val) + return s.Batch(b) +} + +func (s *Scorch) DeleteInternal(key []byte) error { + b := index.NewBatch() + b.DeleteInternal(key) + return s.Batch(b) +} + +// Reader returns a low-level accessor on the index data. Close it to +// release associated resources. +func (s *Scorch) Reader() (index.IndexReader, error) { + s.rootLock.RLock() + rv := &Reader{root: s.root} + rv.root.AddRef() + s.rootLock.RUnlock() + return rv, nil +} + +func (s *Scorch) Stats() json.Marshaler { + return s.stats +} +func (s *Scorch) StatsMap() map[string]interface{} { + m, _ := s.stats.statsMap() + return m +} + +func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult { + rv := &index.AnalysisResult{ + Document: d, + Analyzed: make([]analysis.TokenFrequencies, len(d.Fields)+len(d.CompositeFields)), + Length: make([]int, len(d.Fields)+len(d.CompositeFields)), + } + + for i, field := range d.Fields { + if field.Options().IsIndexed() { + fieldLength, tokenFreqs := field.Analyze() + rv.Analyzed[i] = tokenFreqs + rv.Length[i] = fieldLength + + if len(d.CompositeFields) > 0 { + // see if any of the composite fields need this + for _, compositeField := range d.CompositeFields { + compositeField.Compose(field.Name(), fieldLength, tokenFreqs) + } + } + } + } + + return rv +} + +func (s *Scorch) Advanced() (store.KVStore, error) { + return nil, nil +} + +func (s *Scorch) AddEligibleForRemoval(epoch uint64) { + s.rootLock.Lock() + if s.root == nil || s.root.epoch != epoch { + s.eligibleForRemoval = append(s.eligibleForRemoval, epoch) + } + s.rootLock.Unlock() +} + +func (s *Scorch) MemoryUsed() uint64 { + var memUsed uint64 + s.rootLock.RLock() + if s.root != nil { + for _, segmentSnapshot := range s.root.segment { + memUsed += 8 /* size of id -> uint64 */ + + segmentSnapshot.segment.SizeInBytes() + if segmentSnapshot.deleted != nil { + memUsed += segmentSnapshot.deleted.GetSizeInBytes() + } + memUsed += segmentSnapshot.cachedDocs.sizeInBytes() + } + } + s.rootLock.RUnlock() + return memUsed +} + +func (s *Scorch) markIneligibleForRemoval(filename string) { + s.rootLock.Lock() + s.ineligibleForRemoval[filename] = true + s.rootLock.Unlock() +} + +func (s *Scorch) unmarkIneligibleForRemoval(filename string) { + s.rootLock.Lock() + delete(s.ineligibleForRemoval, filename) + s.rootLock.Unlock() +} + +func init() { + registry.RegisterIndexType(Name, NewScorch) +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go new file mode 100644 index 0000000000..83454644da --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go @@ -0,0 +1,95 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package segment + +import ( + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" +) + +type EmptySegment struct{} + +func (e *EmptySegment) Dictionary(field string) (TermDictionary, error) { + return &EmptyDictionary{}, nil +} + +func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error { + return nil +} + +func (e *EmptySegment) Count() uint64 { + return 0 +} + +func (e *EmptySegment) DocNumbers([]string) (*roaring.Bitmap, error) { + r := roaring.NewBitmap() + return r, nil +} + +func (e *EmptySegment) Fields() []string { + return []string{} +} + +func (e *EmptySegment) Close() error { + return nil +} + +func (e *EmptySegment) AddRef() { +} + +func (e *EmptySegment) DecRef() error { + return nil +} + +type EmptyDictionary struct{} + +func (e *EmptyDictionary) PostingsList(term string, + except *roaring.Bitmap) (PostingsList, error) { + return &EmptyPostingsList{}, nil +} + +func (e *EmptyDictionary) Iterator() DictionaryIterator { + return &EmptyDictionaryIterator{} +} + +func (e *EmptyDictionary) PrefixIterator(prefix string) DictionaryIterator { + return &EmptyDictionaryIterator{} +} + +func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator { + return &EmptyDictionaryIterator{} +} + +type EmptyDictionaryIterator struct{} + +func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { + return nil, nil +} + +type EmptyPostingsList struct{} + +func (e *EmptyPostingsList) Iterator() PostingsIterator { + return &EmptyPostingsIterator{} +} + +func (e *EmptyPostingsList) Count() uint64 { + return 0 +} + +type EmptyPostingsIterator struct{} + +func (e *EmptyPostingsIterator) Next() (Posting, error) { + return nil, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go new file mode 100644 index 0000000000..a4836ebf8a --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go @@ -0,0 +1,94 @@ +// Copyright 2014 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +// This code originated from: +// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go + +// Modified to not use pkg/errors + +package segment + +import "fmt" + +const ( + MaxVarintSize = 9 + + // IntMin is chosen such that the range of int tags does not overlap the + // ascii character set that is frequently used in testing. + IntMin = 0x80 // 128 + intMaxWidth = 8 + intZero = IntMin + intMaxWidth // 136 + intSmall = IntMax - intZero - intMaxWidth // 109 + // IntMax is the maximum int tag value. + IntMax = 0xfd // 253 +) + +// EncodeUvarintAscending encodes the uint64 value using a variable length +// (length-prefixed) representation. The length is encoded as a single +// byte indicating the number of encoded bytes (-8) to follow. See +// EncodeVarintAscending for rationale. The encoded bytes are appended to the +// supplied buffer and the final buffer is returned. +func EncodeUvarintAscending(b []byte, v uint64) []byte { + switch { + case v <= intSmall: + return append(b, intZero+byte(v)) + case v <= 0xff: + return append(b, IntMax-7, byte(v)) + case v <= 0xffff: + return append(b, IntMax-6, byte(v>>8), byte(v)) + case v <= 0xffffff: + return append(b, IntMax-5, byte(v>>16), byte(v>>8), byte(v)) + case v <= 0xffffffff: + return append(b, IntMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) + case v <= 0xffffffffff: + return append(b, IntMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), + byte(v)) + case v <= 0xffffffffffff: + return append(b, IntMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), + byte(v>>8), byte(v)) + case v <= 0xffffffffffffff: + return append(b, IntMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), + byte(v>>16), byte(v>>8), byte(v)) + default: + return append(b, IntMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), + byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) + } +} + +// DecodeUvarintAscending decodes a varint encoded uint64 from the input +// buffer. The remainder of the input buffer and the decoded uint64 +// are returned. +func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { + if len(b) == 0 { + return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value") + } + length := int(b[0]) - intZero + b = b[1:] // skip length byte + if length <= intSmall { + return b, uint64(length), nil + } + length -= intSmall + if length < 0 || length > 8 { + return nil, 0, fmt.Errorf("invalid uvarint length of %d", length) + } else if len(b) < length { + return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b) + } + var v uint64 + // It is faster to range over the elements in a slice than to index + // into the slice on each loop iteration. + for _, t := range b[:length] { + v = (v << 8) | uint64(t) + } + return b[length:], v, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go new file mode 100644 index 0000000000..d3344ce301 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go @@ -0,0 +1,306 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mem + +import ( + "math" + "sort" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/analysis" + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" +) + +// NewFromAnalyzedDocs places the analyzed document mutations into a new segment +func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment { + s := New() + + // ensure that _id field get fieldID 0 + s.getOrDefineField("_id") + + // fill Dicts/DictKeys and preallocate memory + s.initializeDict(results) + + // walk each doc + for _, result := range results { + s.processDocument(result) + } + + // go back and sort the dictKeys + for _, dict := range s.DictKeys { + sort.Strings(dict) + } + + // compute memory usage of segment + s.updateSizeInBytes() + + // professional debugging + // + // log.Printf("fields: %v\n", s.FieldsMap) + // log.Printf("fieldsInv: %v\n", s.FieldsInv) + // log.Printf("fieldsLoc: %v\n", s.FieldsLoc) + // log.Printf("dicts: %v\n", s.Dicts) + // log.Printf("dict keys: %v\n", s.DictKeys) + // for i, posting := range s.Postings { + // log.Printf("posting %d: %v\n", i, posting) + // } + // for i, freq := range s.Freqs { + // log.Printf("freq %d: %v\n", i, freq) + // } + // for i, norm := range s.Norms { + // log.Printf("norm %d: %v\n", i, norm) + // } + // for i, field := range s.Locfields { + // log.Printf("field %d: %v\n", i, field) + // } + // for i, start := range s.Locstarts { + // log.Printf("start %d: %v\n", i, start) + // } + // for i, end := range s.Locends { + // log.Printf("end %d: %v\n", i, end) + // } + // for i, pos := range s.Locpos { + // log.Printf("pos %d: %v\n", i, pos) + // } + // for i, apos := range s.Locarraypos { + // log.Printf("apos %d: %v\n", i, apos) + // } + // log.Printf("stored: %v\n", s.Stored) + // log.Printf("stored types: %v\n", s.StoredTypes) + // log.Printf("stored pos: %v\n", s.StoredPos) + + return s +} + +// fill Dicts/DictKeys and preallocate memory for postings +func (s *Segment) initializeDict(results []*index.AnalysisResult) { + var numPostingsLists int + + numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id. + numLocsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id. + + var numTokenFrequencies int + var totLocs int + + processField := func(fieldID uint16, tfs analysis.TokenFrequencies) { + for term, tf := range tfs { + pidPlus1, exists := s.Dicts[fieldID][term] + if !exists { + numPostingsLists++ + pidPlus1 = uint64(numPostingsLists) + s.Dicts[fieldID][term] = pidPlus1 + s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term) + numTermsPerPostingsList = append(numTermsPerPostingsList, 0) + numLocsPerPostingsList = append(numLocsPerPostingsList, 0) + } + pid := pidPlus1 - 1 + numTermsPerPostingsList[pid] += 1 + numLocsPerPostingsList[pid] += len(tf.Locations) + totLocs += len(tf.Locations) + } + numTokenFrequencies += len(tfs) + } + + for _, result := range results { + // walk each composite field + for _, field := range result.Document.CompositeFields { + fieldID := uint16(s.getOrDefineField(field.Name())) + _, tf := field.Analyze() + processField(fieldID, tf) + } + + // walk each field + for i, field := range result.Document.Fields { + fieldID := uint16(s.getOrDefineField(field.Name())) + tf := result.Analyzed[i] + processField(fieldID, tf) + } + } + + s.Postings = make([]*roaring.Bitmap, numPostingsLists) + for i := 0; i < numPostingsLists; i++ { + s.Postings[i] = roaring.New() + } + s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists) + for i := 0; i < numPostingsLists; i++ { + s.PostingsLocs[i] = roaring.New() + } + + // Preallocate big, contiguous backing arrays. + auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos. + uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos. + float32Backing := make([]float32, numTokenFrequencies) // For sub-Norms. + uint16Backing := make([]uint16, totLocs) // For sub-Locfields. + + // Point top-level slices to the backing arrays. + s.Freqs = auint64Backing[0:numPostingsLists] + auint64Backing = auint64Backing[numPostingsLists:] + + s.Norms = make([][]float32, numPostingsLists) + + s.Locfields = make([][]uint16, numPostingsLists) + + s.Locstarts = auint64Backing[0:numPostingsLists] + auint64Backing = auint64Backing[numPostingsLists:] + + s.Locends = auint64Backing[0:numPostingsLists] + auint64Backing = auint64Backing[numPostingsLists:] + + s.Locpos = auint64Backing[0:numPostingsLists] + auint64Backing = auint64Backing[numPostingsLists:] + + s.Locarraypos = make([][][]uint64, numPostingsLists) + + // Point sub-slices to the backing arrays. + for pid, numTerms := range numTermsPerPostingsList { + s.Freqs[pid] = uint64Backing[0:0] + uint64Backing = uint64Backing[numTerms:] + + s.Norms[pid] = float32Backing[0:0] + float32Backing = float32Backing[numTerms:] + } + + for pid, numLocs := range numLocsPerPostingsList { + s.Locfields[pid] = uint16Backing[0:0] + uint16Backing = uint16Backing[numLocs:] + + s.Locstarts[pid] = uint64Backing[0:0] + uint64Backing = uint64Backing[numLocs:] + + s.Locends[pid] = uint64Backing[0:0] + uint64Backing = uint64Backing[numLocs:] + + s.Locpos[pid] = uint64Backing[0:0] + uint64Backing = uint64Backing[numLocs:] + + s.Locarraypos[pid] = auint64Backing[0:0] + auint64Backing = auint64Backing[numLocs:] + } +} + +func (s *Segment) processDocument(result *index.AnalysisResult) { + // used to collate information across fields + docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap)) + fieldLens := make(map[uint16]int, len(s.FieldsMap)) + + docNum := uint64(s.addDocument()) + + processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) { + fieldLens[field] += l + if existingFreqs, ok := docMap[field]; ok { + existingFreqs.MergeAll(name, tf) + } else { + docMap[field] = tf + } + } + + storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) { + s.Stored[docNum][field] = append(s.Stored[docNum][field], val) + s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ) + s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos) + } + + // walk each composite field + for _, field := range result.Document.CompositeFields { + fieldID := uint16(s.getOrDefineField(field.Name())) + l, tf := field.Analyze() + processField(fieldID, field.Name(), l, tf) + } + + // walk each field + for i, field := range result.Document.Fields { + fieldID := uint16(s.getOrDefineField(field.Name())) + l := result.Length[i] + tf := result.Analyzed[i] + processField(fieldID, field.Name(), l, tf) + if field.Options().IsStored() { + storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions()) + } + + if field.Options().IncludeDocValues() { + s.DocValueFields[fieldID] = true + } + } + + // now that its been rolled up into docMap, walk that + for fieldID, tokenFrequencies := range docMap { + for term, tokenFreq := range tokenFrequencies { + pid := s.Dicts[fieldID][term] - 1 + bs := s.Postings[pid] + bs.AddInt(int(docNum)) + s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency())) + s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))) + locationBS := s.PostingsLocs[pid] + if len(tokenFreq.Locations) > 0 { + locationBS.AddInt(int(docNum)) + for _, loc := range tokenFreq.Locations { + var locf = fieldID + if loc.Field != "" { + locf = uint16(s.getOrDefineField(loc.Field)) + } + s.Locfields[pid] = append(s.Locfields[pid], locf) + s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start)) + s.Locends[pid] = append(s.Locends[pid], uint64(loc.End)) + s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position)) + if len(loc.ArrayPositions) > 0 { + s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions) + } else { + s.Locarraypos[pid] = append(s.Locarraypos[pid], nil) + } + } + } + } + } +} + +func (s *Segment) getOrDefineField(name string) int { + fieldIDPlus1, ok := s.FieldsMap[name] + if !ok { + fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) + s.FieldsMap[name] = fieldIDPlus1 + s.FieldsInv = append(s.FieldsInv, name) + s.Dicts = append(s.Dicts, make(map[string]uint64)) + s.DictKeys = append(s.DictKeys, make([]string, 0)) + } + return int(fieldIDPlus1 - 1) +} + +func (s *Segment) addDocument() int { + docNum := len(s.Stored) + s.Stored = append(s.Stored, map[uint16][][]byte{}) + s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{}) + s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{}) + return docNum +} + +func encodeFieldType(f document.Field) byte { + fieldType := byte('x') + switch f.(type) { + case *document.TextField: + fieldType = 't' + case *document.NumericField: + fieldType = 'n' + case *document.DateTimeField: + fieldType = 'd' + case *document.BooleanField: + fieldType = 'b' + case *document.GeoPointField: + fieldType = 'g' + case *document.CompositeField: + fieldType = 'c' + } + return fieldType +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go new file mode 100644 index 0000000000..939c287e98 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go @@ -0,0 +1,102 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mem + +import ( + "sort" + "strings" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +// Dictionary is the in-memory representation of the term dictionary +type Dictionary struct { + segment *Segment + field string + fieldID uint16 +} + +// PostingsList returns the postings list for the specified term +func (d *Dictionary) PostingsList(term string, + except *roaring.Bitmap) (segment.PostingsList, error) { + return &PostingsList{ + dictionary: d, + term: term, + postingsID: d.segment.Dicts[d.fieldID][term], + except: except, + }, nil +} + +// Iterator returns an iterator for this dictionary +func (d *Dictionary) Iterator() segment.DictionaryIterator { + return &DictionaryIterator{ + d: d, + } +} + +// PrefixIterator returns an iterator which only visits terms having the +// the specified prefix +func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { + offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix) + return &DictionaryIterator{ + d: d, + prefix: prefix, + offset: offset, + } +} + +// RangeIterator returns an iterator which only visits terms between the +// start and end terms. NOTE: bleve.index API specifies the end is inclusive. +func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator { + offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start) + return &DictionaryIterator{ + d: d, + offset: offset, + end: end, + } +} + +// DictionaryIterator is an iterator for term dictionary +type DictionaryIterator struct { + d *Dictionary + prefix string + end string + offset int +} + +// Next returns the next entry in the dictionary +func (d *DictionaryIterator) Next() (*index.DictEntry, error) { + if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 { + return nil, nil + } + next := d.d.segment.DictKeys[d.d.fieldID][d.offset] + // check prefix + if d.prefix != "" && !strings.HasPrefix(next, d.prefix) { + return nil, nil + } + // check end (bleve.index API demands inclusive end) + if d.end != "" && next > d.end { + return nil, nil + } + + d.offset++ + postingID := d.d.segment.Dicts[d.d.fieldID][next] + return &index.DictEntry{ + Term: next, + Count: d.d.segment.Postings[postingID-1].GetCardinality(), + }, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go new file mode 100644 index 0000000000..d91a005615 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go @@ -0,0 +1,178 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mem + +import ( + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +// PostingsList is an in-memory represenation of a postings list +type PostingsList struct { + dictionary *Dictionary + term string + postingsID uint64 + except *roaring.Bitmap +} + +// Count returns the number of items on this postings list +func (p *PostingsList) Count() uint64 { + var rv uint64 + if p.postingsID > 0 { + rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality() + if p.except != nil { + except := p.except.GetCardinality() + if except > rv { + // avoid underflow + except = rv + } + rv -= except + } + } + return rv +} + +// Iterator returns an iterator for this postings list +func (p *PostingsList) Iterator() segment.PostingsIterator { + rv := &PostingsIterator{ + postings: p, + } + if p.postingsID > 0 { + allbits := p.dictionary.segment.Postings[p.postingsID-1] + rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1] + rv.all = allbits.Iterator() + if p.except != nil { + allExcept := allbits.Clone() + allExcept.AndNot(p.except) + rv.actual = allExcept.Iterator() + } else { + rv.actual = allbits.Iterator() + } + } + + return rv +} + +// PostingsIterator provides a way to iterate through the postings list +type PostingsIterator struct { + postings *PostingsList + all roaring.IntIterable + locations *roaring.Bitmap + offset int + locoffset int + actual roaring.IntIterable +} + +// Next returns the next posting on the postings list, or nil at the end +func (i *PostingsIterator) Next() (segment.Posting, error) { + if i.actual == nil || !i.actual.HasNext() { + return nil, nil + } + n := i.actual.Next() + allN := i.all.Next() + + // n is the next actual hit (excluding some postings) + // allN is the next hit in the full postings + // if they don't match, adjust offsets to factor in item we're skipping over + // incr the all iterator, and check again + for allN != n { + i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) + i.offset++ + allN = i.all.Next() + } + rv := &Posting{ + iterator: i, + docNum: uint64(n), + offset: i.offset, + locoffset: i.locoffset, + hasLoc: i.locations.Contains(n), + } + + i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) + i.offset++ + return rv, nil +} + +// Posting is a single entry in a postings list +type Posting struct { + iterator *PostingsIterator + docNum uint64 + offset int + locoffset int + hasLoc bool +} + +// Number returns the document number of this posting in this segment +func (p *Posting) Number() uint64 { + return p.docNum +} + +// Frequency returns the frequence of occurance of this term in this doc/field +func (p *Posting) Frequency() uint64 { + return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset] +} + +// Norm returns the normalization factor for this posting +func (p *Posting) Norm() float64 { + return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset]) +} + +// Locations returns the location information for each occurance +func (p *Posting) Locations() []segment.Location { + if !p.hasLoc { + return nil + } + freq := int(p.Frequency()) + rv := make([]segment.Location, freq) + for i := 0; i < freq; i++ { + rv[i] = &Location{ + p: p, + offset: p.locoffset + i, + } + } + return rv +} + +// Location represents the location of a single occurance +type Location struct { + p *Posting + offset int +} + +// Field returns the name of the field (useful in composite fields to know +// which original field the value came from) +func (l *Location) Field() string { + return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]] +} + +// Start returns the start byte offset of this occurance +func (l *Location) Start() uint64 { + return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset] +} + +// End returns the end byte offset of this occurance +func (l *Location) End() uint64 { + return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset] +} + +// Pos returns the 1-based phrase position of this occurance +func (l *Location) Pos() uint64 { + return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset] +} + +// ArrayPositions returns the array position vector associated with this occurance +func (l *Location) ArrayPositions() []uint64 { + return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset] +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go new file mode 100644 index 0000000000..04bdb368ac --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go @@ -0,0 +1,289 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mem + +import ( + "fmt" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +// _id field is always guaranteed to have fieldID of 0 +const idFieldID uint16 = 0 + +// KNOWN ISSUES +// - LIMITATION - we decided whether or not to store term vectors for a field +// at the segment level, based on the first definition of a +// field we see. in normal bleve usage this is fine, all +// instances of a field definition will be the same. however, +// advanced users may violate this and provide unique field +// definitions with each document. this segment does not +// support this usage. + +// TODO +// - need better testing of multiple docs, iterating freqs, locations and +// and verifying the correct results are returned + +// Segment is an in memory implementation of scorch.Segment +type Segment struct { + + // FieldsMap adds 1 to field id to avoid zero value issues + // name -> field id + 1 + FieldsMap map[string]uint16 + + // FieldsInv is the inverse of FieldsMap + // field id -> name + FieldsInv []string + + // Term dictionaries for each field + // field id -> term -> postings list id + 1 + Dicts []map[string]uint64 + + // Terms for each field, where terms are sorted ascending + // field id -> []term + DictKeys [][]string + + // Postings list + // postings list id -> bitmap by docNum + Postings []*roaring.Bitmap + + // Postings list has locations + PostingsLocs []*roaring.Bitmap + + // Term frequencies + // postings list id -> Freqs (one for each hit in bitmap) + Freqs [][]uint64 + + // Field norms + // postings list id -> Norms (one for each hit in bitmap) + Norms [][]float32 + + // Field/start/end/pos/locarraypos + // postings list id -> start/end/pos/locarraypos (one for each freq) + Locfields [][]uint16 + Locstarts [][]uint64 + Locends [][]uint64 + Locpos [][]uint64 + Locarraypos [][][]uint64 + + // Stored field values + // docNum -> field id -> slice of values (each value []byte) + Stored []map[uint16][][]byte + + // Stored field types + // docNum -> field id -> slice of types (each type byte) + StoredTypes []map[uint16][]byte + + // Stored field array positions + // docNum -> field id -> slice of array positions (each is []uint64) + StoredPos []map[uint16][][]uint64 + + // For storing the docValue persisted fields + DocValueFields map[uint16]bool + + // Footprint of the segment, updated when analyzed document mutations + // are added into the segment + sizeInBytes uint64 +} + +// New builds a new empty Segment +func New() *Segment { + return &Segment{ + FieldsMap: map[string]uint16{}, + DocValueFields: map[uint16]bool{}, + } +} + +func (s *Segment) updateSizeInBytes() { + var sizeInBytes uint64 + + // FieldsMap, FieldsInv + for k, _ := range s.FieldsMap { + sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + + 2 /* size of uint16 */) + } + // overhead from the data structures + sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) + + // Dicts, DictKeys + for _, entry := range s.Dicts { + for k, _ := range entry { + sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + + 8 /* size of uint64 */) + } + // overhead from the data structures + sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) + } + sizeInBytes += (segment.SizeOfSlice * 2) + + // Postings, PostingsLocs + for i := 0; i < len(s.Postings); i++ { + sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) + + (s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer) + } + sizeInBytes += (segment.SizeOfSlice * 2) + + // Freqs, Norms + for i := 0; i < len(s.Freqs); i++ { + sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ + + len(s.Norms[i])*4 /* size of float32 */) + + (segment.SizeOfSlice * 2) + } + sizeInBytes += (segment.SizeOfSlice * 2) + + // Location data + for i := 0; i < len(s.Locfields); i++ { + sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ + + len(s.Locstarts[i])*8 /* size of uint64 */ + + len(s.Locends[i])*8 /* size of uint64 */ + + len(s.Locpos[i])*8 /* size of uint64 */) + + for j := 0; j < len(s.Locarraypos[i]); j++ { + sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) + + segment.SizeOfSlice + } + + sizeInBytes += (segment.SizeOfSlice * 5) + } + sizeInBytes += (segment.SizeOfSlice * 5) + + // Stored data + for i := 0; i < len(s.Stored); i++ { + for _, v := range s.Stored[i] { + sizeInBytes += uint64(2 /* size of uint16 */) + for _, arr := range v { + sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice + } + sizeInBytes += segment.SizeOfSlice + } + + for _, v := range s.StoredTypes[i] { + sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice + } + + for _, v := range s.StoredPos[i] { + sizeInBytes += uint64(2 /* size of uint16 */) + for _, arr := range v { + sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) + + segment.SizeOfSlice + } + sizeInBytes += segment.SizeOfSlice + } + + // overhead from map(s) within Stored, StoredTypes, StoredPos + sizeInBytes += (segment.SizeOfMap * 3) + } + // overhead from data structures: Stored, StoredTypes, StoredPos + sizeInBytes += (segment.SizeOfSlice * 3) + + // DocValueFields + sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) + + segment.SizeOfMap + + // SizeInBytes + sizeInBytes += uint64(8) + + s.sizeInBytes = sizeInBytes +} + +func (s *Segment) SizeInBytes() uint64 { + return s.sizeInBytes +} + +func (s *Segment) AddRef() { +} + +func (s *Segment) DecRef() error { + return nil +} + +// Fields returns the field names used in this segment +func (s *Segment) Fields() []string { + return s.FieldsInv +} + +// VisitDocument invokes the DocFieldValueVistor for each stored field +// for the specified doc number +func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { + // ensure document number exists + if int(num) > len(s.Stored)-1 { + return nil + } + docFields := s.Stored[int(num)] + st := s.StoredTypes[int(num)] + sp := s.StoredPos[int(num)] + for field, values := range docFields { + for i, value := range values { + keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i]) + if !keepGoing { + return nil + } + } + } + return nil +} + +func (s *Segment) getField(name string) (int, error) { + fieldID, ok := s.FieldsMap[name] + if !ok { + return 0, fmt.Errorf("no field named %s", name) + } + return int(fieldID - 1), nil +} + +// Dictionary returns the term dictionary for the specified field +func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) { + fieldID, err := s.getField(field) + if err != nil { + // no such field, return empty dictionary + return &segment.EmptyDictionary{}, nil + } + return &Dictionary{ + segment: s, + field: field, + fieldID: uint16(fieldID), + }, nil +} + +// Count returns the number of documents in this segment +// (this has no notion of deleted docs) +func (s *Segment) Count() uint64 { + return uint64(len(s.Stored)) +} + +// DocNumbers returns a bitset corresponding to the doc numbers of all the +// provided _id strings +func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) { + rv := roaring.New() + + // guard against empty segment + if len(s.FieldsMap) > 0 { + idDictionary := s.Dicts[idFieldID] + + for _, id := range ids { + postingID := idDictionary[id] + if postingID > 0 { + rv.Or(s.Postings[postingID-1]) + } + } + } + return rv, nil +} + +// Close releases all resources associated with this segment +func (s *Segment) Close() error { + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go new file mode 100644 index 0000000000..d5435ab96b --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go @@ -0,0 +1,110 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package segment + +import ( + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" +) + +// Overhead from go data structures when deployed on a 64-bit system. +const SizeOfMap uint64 = 8 +const SizeOfPointer uint64 = 8 +const SizeOfSlice uint64 = 24 +const SizeOfString uint64 = 16 + +// DocumentFieldValueVisitor defines a callback to be visited for each +// stored field value. The return value determines if the visitor +// should keep going. Returning true continues visiting, false stops. +type DocumentFieldValueVisitor func(field string, typ byte, value []byte, pos []uint64) bool + +type Segment interface { + Dictionary(field string) (TermDictionary, error) + + VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error + Count() uint64 + + DocNumbers([]string) (*roaring.Bitmap, error) + + Fields() []string + + Close() error + + SizeInBytes() uint64 + + AddRef() + DecRef() error +} + +type TermDictionary interface { + PostingsList(term string, except *roaring.Bitmap) (PostingsList, error) + + Iterator() DictionaryIterator + PrefixIterator(prefix string) DictionaryIterator + RangeIterator(start, end string) DictionaryIterator +} + +type DictionaryIterator interface { + Next() (*index.DictEntry, error) +} + +type PostingsList interface { + Iterator() PostingsIterator + + Count() uint64 + + // NOTE deferred for future work + + // And(other PostingsList) PostingsList + // Or(other PostingsList) PostingsList +} + +type PostingsIterator interface { + // The caller is responsible for copying whatever it needs from + // the returned Posting instance before calling Next(), as some + // implementations may return a shared instance to reduce memory + // allocations. + Next() (Posting, error) +} + +type Posting interface { + Number() uint64 + + Frequency() uint64 + Norm() float64 + + Locations() []Location +} + +type Location interface { + Field() string + Start() uint64 + End() uint64 + Pos() uint64 + ArrayPositions() []uint64 +} + +// DocumentFieldTermVisitable is implemented by various scorch segment +// implementations with persistence for the un inverting of the +// postings or other indexed values. +type DocumentFieldTermVisitable interface { + VisitDocumentFieldTerms(localDocNum uint64, fields []string, + visitor index.DocumentFieldTermVisitor) error + + // VisitableDocValueFields implementation should return + // the list of fields which are document value persisted and + // therefore visitable by the above VisitDocumentFieldTerms method. + VisitableDocValueFields() ([]string, error) +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md new file mode 100644 index 0000000000..179adceafd --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md @@ -0,0 +1,167 @@ +# zap file format + +The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written. + +Current usage: + +- mmap the entire file +- crc-32 bytes and version are in fixed position at end of the file +- reading remainder of footer could be version specific +- remainder of footer gives us: + - 3 important offsets (docValue , fields index and stored data index) + - 2 important values (number of docs and chunk factor) +- field data is processed once and memoized onto the heap so that we never have to go back to disk for it +- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data. the first bytes of that section tell us the size of data so that we know where it ends. +- access to all other indexed data follows the following pattern: + - first know the field name -> convert to id + - next navigate to term dictionary for that field + - some operations stop here and do dictionary ops + - next use dictionary to navigate to posting list for a specific term + - walk posting list + - if necessary, walk posting details as we go + - if location info is desired, consult location bitmap to see if it is there + +## stored fields section + +- for each document + - preparation phase: + - produce a slice of metadata bytes and data bytes + - produce these slices in field id order + - field value is appended to the data slice + - metadata slice is govarint encoded with the following values for each field value + - field id (uint16) + - field type (byte) + - field value start offset in uncompressed data slice (uint64) + - field value length (uint64) + - field number of array positions (uint64) + - one additional value for each array position (uint64) + - compress the data slice using snappy + - file writing phase: + - remember the start offset for this document + - write out meta data length (varint uint64) + - write out compressed data length (varint uint64) + - write out the metadata bytes + - write out the compressed data bytes + +## stored fields idx + +- for each document + - write start offset (remembered from previous section) of stored data (big endian uint64) + +With this index and a known document number, we have direct access to all the stored field data. + +## posting details (freq/norm) section + +- for each posting list + - produce a slice containing multiple consecutive chunks (each chunk is govarint stream) + - produce a slice remembering offsets of where each chunk starts + - preparation phase: + - for each hit in the posting list + - if this hit is in next chunk close out encoding of last chunk and record offset start of next + - encode term frequency (uint64) + - encode norm factor (float32) + - file writing phase: + - remember start position for this posting list details + - write out number of chunks that follow (varint uint64) + - write out length of each chunk (each a varint uint64) + - write out the byte slice containing all the chunk data + +If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it. + +## posting details (location) section + +- for each posting list + - produce a slice containing multiple consecutive chunks (each chunk is govarint stream) + - produce a slice remembering offsets of where each chunk starts + - preparation phase: + - for each hit in the posting list + - if this hit is in next chunk close out encoding of last chunk and record offset start of next + - encode field (uint16) + - encode field pos (uint64) + - encode field start (uint64) + - encode field end (uint64) + - encode number of array positions to follow (uint64) + - encode each array position (each uint64) + - file writing phase: + - remember start position for this posting list details + - write out number of chunks that follow (varint uint64) + - write out length of each chunk (each a varint uint64) + - write out the byte slice containing all the chunk data + +If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it. + +## bitmaps of hits with location info + +- for each posting list + - preparation phase: + - encode roaring bitmap (inidicating which hits have location details indexed) posting list to bytes (so we know the length) + - file writing phase: + - remember the start position for this bitmap + - write length of encoded roaring bitmap + - write the serialized roaring bitmap data + +## postings list section + +- for each posting list + - preparation phase: + - encode roaring bitmap posting list to bytes (so we know the length) + - file writing phase: + - remember the start position for this posting list + - write freq/norm details offset (remembered from previous, as varint uint64) + - write location details offset (remembered from previous, as varint uint64) + - write location bitmap offset (remembered from pervious, as varint uint64) + - write length of encoded roaring bitmap + - write the serialized roaring bitmap data + +## dictionary + +- for each field + - preparation phase: + - encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous) + - file writing phase: + - remember the start position of this persistDictionary + - write length of vellum data (varint uint64) + - write out vellum data + +## fields section + +- for each field + - file writing phase: + - remember start offset for each field + - write dictionary address (remembered from previous) (varint uint64) + - write length of field name (varint uint64) + - write field name bytes + +## fields idx + +- for each field + - file writing phase: + - write big endian uint64 of start offset for each field + +NOTE: currently we don't know or record the length of this fields index. Instead we rely on the fact that we know it immediately precedes a footer of known size. + +## fields DocValue + +- for each field + - preparation phase: + - produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data + - produce a slice remembering the length of each chunk + - file writing phase: + - remember the start position of this first field DocValue offset in the footer + - write out number of chunks that follow (varint uint64) + - write out length of each chunk (each a varint uint64) + - write out the byte slice containing all the chunk data + +NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any +read operation leverage that meta information to extract the document specific data from the file. + +## footer + +- file writing phase + - write number of docs (big endian uint64) + - write stored field index location (big endian uint64) + - write field index location (big endian uint64) + - write field docValue location (big endian uint64) + - write out chunk factor (big endian uint32) + - write out version (big endian uint32) + - write out file CRC of everything preceding this (big endian uint32) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go new file mode 100644 index 0000000000..58f9faeaf6 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go @@ -0,0 +1,648 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bufio" + "bytes" + "encoding/binary" + "math" + "os" + "sort" + + "github.com/Smerity/govarint" + "github.com/blevesearch/bleve/index/scorch/segment/mem" + "github.com/couchbase/vellum" + "github.com/golang/snappy" +) + +const version uint32 = 2 + +const fieldNotUninverted = math.MaxUint64 + +// PersistSegmentBase persists SegmentBase in the zap file format. +func PersistSegmentBase(sb *SegmentBase, path string) error { + flag := os.O_RDWR | os.O_CREATE + + f, err := os.OpenFile(path, flag, 0600) + if err != nil { + return err + } + + cleanup := func() { + _ = f.Close() + _ = os.Remove(path) + } + + br := bufio.NewWriter(f) + + _, err = br.Write(sb.mem) + if err != nil { + cleanup() + return err + } + + err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset, + sb.chunkFactor, sb.memCRC, br) + if err != nil { + cleanup() + return err + } + + err = br.Flush() + if err != nil { + cleanup() + return err + } + + err = f.Sync() + if err != nil { + cleanup() + return err + } + + err = f.Close() + if err != nil { + cleanup() + return err + } + + return nil +} + +// PersistSegment takes the in-memory segment and persists it to +// the specified path in the zap file format. +func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) error { + flag := os.O_RDWR | os.O_CREATE + + f, err := os.OpenFile(path, flag, 0600) + if err != nil { + return err + } + + cleanup := func() { + _ = f.Close() + _ = os.Remove(path) + } + + // buffer the output + br := bufio.NewWriter(f) + + // wrap it for counting (tracking offsets) + cr := NewCountHashWriter(br) + + numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, err := + persistBase(memSegment, cr, chunkFactor) + if err != nil { + cleanup() + return err + } + + err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, + chunkFactor, cr.Sum32(), cr) + if err != nil { + cleanup() + return err + } + + err = br.Flush() + if err != nil { + cleanup() + return err + } + + err = f.Sync() + if err != nil { + cleanup() + return err + } + + err = f.Close() + if err != nil { + cleanup() + return err + } + + return nil +} + +func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint32) ( + numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, + dictLocs []uint64, err error) { + docValueOffset = uint64(fieldNotUninverted) + + if len(memSegment.Stored) > 0 { + storedIndexOffset, err = persistStored(memSegment, cr) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + freqOffsets, locOffsets, err := persistPostingDetails(memSegment, cr, chunkFactor) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + postingsListLocs, err := persistPostingsLocs(memSegment, cr) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + postingsLocs, err := persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + dictLocs, err = persistDictionary(memSegment, cr, postingsLocs) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + docValueOffset, err = persistFieldDocValues(memSegment, cr, chunkFactor) + if err != nil { + return 0, 0, 0, 0, nil, err + } + } else { + dictLocs = make([]uint64, len(memSegment.FieldsInv)) + } + + fieldsIndexOffset, err = persistFields(memSegment.FieldsInv, cr, dictLocs) + if err != nil { + return 0, 0, 0, 0, nil, err + } + + return uint64(len(memSegment.Stored)), storedIndexOffset, fieldsIndexOffset, docValueOffset, + dictLocs, nil +} + +func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) { + + var curr int + var metaBuf bytes.Buffer + var data, compressed []byte + + docNumOffsets := make(map[int]uint64, len(memSegment.Stored)) + + for docNum, storedValues := range memSegment.Stored { + if docNum != 0 { + // reset buffer if necessary + metaBuf.Reset() + data = data[:0] + compressed = compressed[:0] + curr = 0 + } + + metaEncoder := govarint.NewU64Base128Encoder(&metaBuf) + + st := memSegment.StoredTypes[docNum] + sp := memSegment.StoredPos[docNum] + + // encode fields in order + for fieldID := range memSegment.FieldsInv { + if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok { + // has stored values for this field + num := len(storedFieldValues) + + stf := st[uint16(fieldID)] + spf := sp[uint16(fieldID)] + + // process each value + for i := 0; i < num; i++ { + // encode field + _, err2 := metaEncoder.PutU64(uint64(fieldID)) + if err2 != nil { + return 0, err2 + } + // encode type + _, err2 = metaEncoder.PutU64(uint64(stf[i])) + if err2 != nil { + return 0, err2 + } + // encode start offset + _, err2 = metaEncoder.PutU64(uint64(curr)) + if err2 != nil { + return 0, err2 + } + // end len + _, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i]))) + if err2 != nil { + return 0, err2 + } + // encode number of array pos + _, err2 = metaEncoder.PutU64(uint64(len(spf[i]))) + if err2 != nil { + return 0, err2 + } + // encode all array positions + for _, pos := range spf[i] { + _, err2 = metaEncoder.PutU64(pos) + if err2 != nil { + return 0, err2 + } + } + // append data + data = append(data, storedFieldValues[i]...) + // update curr + curr += len(storedFieldValues[i]) + } + } + } + metaEncoder.Close() + + metaBytes := metaBuf.Bytes() + + // compress the data + compressed = snappy.Encode(compressed, data) + + // record where we're about to start writing + docNumOffsets[docNum] = uint64(w.Count()) + + // write out the meta len and compressed data len + _, err := writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed))) + if err != nil { + return 0, err + } + + // now write the meta + _, err = w.Write(metaBytes) + if err != nil { + return 0, err + } + // now write the compressed data + _, err = w.Write(compressed) + if err != nil { + return 0, err + } + } + + // return value is the start of the stored index + rv := uint64(w.Count()) + // now write out the stored doc index + for docNum := range memSegment.Stored { + err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum]) + if err != nil { + return 0, err + } + } + + return rv, nil +} + +func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) { + var freqOffsets, locOfffsets []uint64 + tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) + for postingID := range memSegment.Postings { + if postingID != 0 { + tfEncoder.Reset() + } + freqs := memSegment.Freqs[postingID] + norms := memSegment.Norms[postingID] + postingsListItr := memSegment.Postings[postingID].Iterator() + var offset int + for postingsListItr.HasNext() { + + docNum := uint64(postingsListItr.Next()) + + // put freq + err := tfEncoder.Add(docNum, freqs[offset]) + if err != nil { + return nil, nil, err + } + + // put norm + norm := norms[offset] + normBits := math.Float32bits(norm) + err = tfEncoder.Add(docNum, uint64(normBits)) + if err != nil { + return nil, nil, err + } + + offset++ + } + + // record where this postings freq info starts + freqOffsets = append(freqOffsets, uint64(w.Count())) + + tfEncoder.Close() + _, err := tfEncoder.Write(w) + if err != nil { + return nil, nil, err + } + + } + + // now do it again for the locations + locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) + for postingID := range memSegment.Postings { + if postingID != 0 { + locEncoder.Reset() + } + freqs := memSegment.Freqs[postingID] + locfields := memSegment.Locfields[postingID] + locpos := memSegment.Locpos[postingID] + locstarts := memSegment.Locstarts[postingID] + locends := memSegment.Locends[postingID] + locarraypos := memSegment.Locarraypos[postingID] + postingsListItr := memSegment.Postings[postingID].Iterator() + var offset int + var locOffset int + for postingsListItr.HasNext() { + docNum := uint64(postingsListItr.Next()) + for i := 0; i < int(freqs[offset]); i++ { + if len(locfields) > 0 { + // put field + err := locEncoder.Add(docNum, uint64(locfields[locOffset])) + if err != nil { + return nil, nil, err + } + + // put pos + err = locEncoder.Add(docNum, locpos[locOffset]) + if err != nil { + return nil, nil, err + } + + // put start + err = locEncoder.Add(docNum, locstarts[locOffset]) + if err != nil { + return nil, nil, err + } + + // put end + err = locEncoder.Add(docNum, locends[locOffset]) + if err != nil { + return nil, nil, err + } + + // put the number of array positions to follow + num := len(locarraypos[locOffset]) + err = locEncoder.Add(docNum, uint64(num)) + if err != nil { + return nil, nil, err + } + + // put each array position + for _, pos := range locarraypos[locOffset] { + err = locEncoder.Add(docNum, pos) + if err != nil { + return nil, nil, err + } + } + } + locOffset++ + } + offset++ + } + + // record where this postings loc info starts + locOfffsets = append(locOfffsets, uint64(w.Count())) + locEncoder.Close() + _, err := locEncoder.Write(w) + if err != nil { + return nil, nil, err + } + } + return freqOffsets, locOfffsets, nil +} + +func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { + rv = make([]uint64, 0, len(memSegment.PostingsLocs)) + var reuseBuf bytes.Buffer + reuseBufVarint := make([]byte, binary.MaxVarintLen64) + for postingID := range memSegment.PostingsLocs { + // record where we start this posting loc + rv = append(rv, uint64(w.Count())) + // write out the length and bitmap + _, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint) + if err != nil { + return nil, err + } + } + return rv, nil +} + +func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, + postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) { + rv = make([]uint64, 0, len(memSegment.Postings)) + var reuseBuf bytes.Buffer + reuseBufVarint := make([]byte, binary.MaxVarintLen64) + for postingID := range memSegment.Postings { + // record where we start this posting list + rv = append(rv, uint64(w.Count())) + + // write out the term info, loc info, and loc posting list offset + _, err = writeUvarints(w, freqOffsets[postingID], + locOffsets[postingID], postingsListLocs[postingID]) + if err != nil { + return nil, err + } + + // write out the length and bitmap + _, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint) + if err != nil { + return nil, err + } + } + return rv, nil +} + +func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) { + rv := make([]uint64, 0, len(memSegment.DictKeys)) + + varintBuf := make([]byte, binary.MaxVarintLen64) + + var buffer bytes.Buffer + for fieldID, fieldTerms := range memSegment.DictKeys { + if fieldID != 0 { + buffer.Reset() + } + + // start a new vellum for this field + builder, err := vellum.New(&buffer, nil) + if err != nil { + return nil, err + } + + dict := memSegment.Dicts[fieldID] + // now walk the dictionary in order of fieldTerms (already sorted) + for _, fieldTerm := range fieldTerms { + postingID := dict[fieldTerm] - 1 + postingsAddr := postingsLocs[postingID] + err = builder.Insert([]byte(fieldTerm), postingsAddr) + if err != nil { + return nil, err + } + } + err = builder.Close() + if err != nil { + return nil, err + } + + // record where this dictionary starts + rv = append(rv, uint64(w.Count())) + + vellumData := buffer.Bytes() + + // write out the length of the vellum data + n := binary.PutUvarint(varintBuf, uint64(len(vellumData))) + _, err = w.Write(varintBuf[:n]) + if err != nil { + return nil, err + } + + // write this vellum to disk + _, err = w.Write(vellumData) + if err != nil { + return nil, err + } + } + + return rv, nil +} + +type docIDRange []uint64 + +func (a docIDRange) Len() int { return len(a) } +func (a docIDRange) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] } + +func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, + chunkFactor uint32) (map[uint16]uint64, error) { + fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv)) + fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) + + for fieldID := range memSegment.DocValueFields { + field := memSegment.FieldsInv[fieldID] + docTermMap := make(map[uint64][]byte, 0) + dict, err := memSegment.Dictionary(field) + if err != nil { + return nil, err + } + + dictItr := dict.Iterator() + next, err := dictItr.Next() + for err == nil && next != nil { + postings, err1 := dict.PostingsList(next.Term, nil) + if err1 != nil { + return nil, err + } + + postingsItr := postings.Iterator() + nextPosting, err2 := postingsItr.Next() + for err2 == nil && nextPosting != nil { + docNum := nextPosting.Number() + docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...) + docTermMap[docNum] = append(docTermMap[docNum], termSeparator) + nextPosting, err2 = postingsItr.Next() + } + if err2 != nil { + return nil, err2 + } + + next, err = dictItr.Next() + } + + if err != nil { + return nil, err + } + // sort wrt to docIDs + var docNumbers docIDRange + for k := range docTermMap { + docNumbers = append(docNumbers, k) + } + sort.Sort(docNumbers) + + for _, docNum := range docNumbers { + err = fdvEncoder.Add(docNum, docTermMap[docNum]) + if err != nil { + return nil, err + } + } + + fieldChunkOffsets[fieldID] = uint64(w.Count()) + err = fdvEncoder.Close() + if err != nil { + return nil, err + } + // persist the doc value details for this field + _, err = fdvEncoder.Write(w) + if err != nil { + return nil, err + } + // resetting encoder for the next field + fdvEncoder.Reset() + } + + return fieldChunkOffsets, nil +} + +func persistFieldDocValues(memSegment *mem.Segment, w *CountHashWriter, + chunkFactor uint32) (uint64, error) { + fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor) + if err != nil { + return 0, err + } + + fieldDocValuesOffset := uint64(w.Count()) + buf := make([]byte, binary.MaxVarintLen64) + offset := uint64(0) + ok := true + for fieldID := range memSegment.FieldsInv { + // if the field isn't configured for docValue, then mark + // the offset accordingly + if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok { + offset = fieldNotUninverted + } + n := binary.PutUvarint(buf, uint64(offset)) + _, err := w.Write(buf[:n]) + if err != nil { + return 0, err + } + } + + return fieldDocValuesOffset, nil +} + +func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase, error) { + var br bytes.Buffer + + cr := NewCountHashWriter(&br) + + numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, err := + persistBase(memSegment, cr, chunkFactor) + if err != nil { + return nil, err + } + + sb := &SegmentBase{ + mem: br.Bytes(), + memCRC: cr.Sum32(), + chunkFactor: chunkFactor, + fieldsMap: memSegment.FieldsMap, + fieldsInv: memSegment.FieldsInv, + numDocs: numDocs, + storedIndexOffset: storedIndexOffset, + fieldsIndexOffset: fieldsIndexOffset, + docValueOffset: docValueOffset, + dictLocs: dictLocs, + fieldDvIterMap: make(map[uint16]*docValueIterator), + } + + err = sb.loadDvIterators() + if err != nil { + return nil, err + } + + return sb, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go new file mode 100644 index 0000000000..b03940497f --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go @@ -0,0 +1,167 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/golang/snappy" +) + +var termSeparator byte = 0xff +var termSeparatorSplitSlice = []byte{termSeparator} + +type chunkedContentCoder struct { + final []byte + chunkSize uint64 + currChunk uint64 + chunkLens []uint64 + chunkMetaBuf bytes.Buffer + chunkBuf bytes.Buffer + + chunkMeta []MetaData +} + +// MetaData represents the data information inside a +// chunk. +type MetaData struct { + DocID uint64 // docid of the data inside the chunk + DocDvLoc uint64 // starting offset for a given docid + DocDvLen uint64 // length of data inside the chunk for the given docid +} + +// newChunkedContentCoder returns a new chunk content coder which +// packs data into chunks based on the provided chunkSize +func newChunkedContentCoder(chunkSize uint64, + maxDocNum uint64) *chunkedContentCoder { + total := maxDocNum/chunkSize + 1 + rv := &chunkedContentCoder{ + chunkSize: chunkSize, + chunkLens: make([]uint64, total), + chunkMeta: []MetaData{}, + } + + return rv +} + +// Reset lets you reuse this chunked content coder. Buffers are reset +// and re used. You cannot change the chunk size. +func (c *chunkedContentCoder) Reset() { + c.currChunk = 0 + c.final = c.final[:0] + c.chunkBuf.Reset() + c.chunkMetaBuf.Reset() + for i := range c.chunkLens { + c.chunkLens[i] = 0 + } + c.chunkMeta = []MetaData{} +} + +// Close indicates you are done calling Add() this allows +// the final chunk to be encoded. +func (c *chunkedContentCoder) Close() error { + return c.flushContents() +} + +func (c *chunkedContentCoder) flushContents() error { + // flush the contents, with meta information at first + buf := make([]byte, binary.MaxVarintLen64) + n := binary.PutUvarint(buf, uint64(len(c.chunkMeta))) + _, err := c.chunkMetaBuf.Write(buf[:n]) + if err != nil { + return err + } + + // write out the metaData slice + for _, meta := range c.chunkMeta { + _, err := writeUvarints(&c.chunkMetaBuf, meta.DocID, meta.DocDvLoc, meta.DocDvLen) + if err != nil { + return err + } + } + + // write the metadata to final data + metaData := c.chunkMetaBuf.Bytes() + c.final = append(c.final, c.chunkMetaBuf.Bytes()...) + // write the compressed data to the final data + compressedData := snappy.Encode(nil, c.chunkBuf.Bytes()) + c.final = append(c.final, compressedData...) + + c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData)) + return nil +} + +// Add encodes the provided byte slice into the correct chunk for the provided +// doc num. You MUST call Add() with increasing docNums. +func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error { + chunk := docNum / c.chunkSize + if chunk != c.currChunk { + // flush out the previous chunk details + err := c.flushContents() + if err != nil { + return err + } + // clearing the chunk specific meta for next chunk + c.chunkBuf.Reset() + c.chunkMetaBuf.Reset() + c.chunkMeta = []MetaData{} + c.currChunk = chunk + } + + // mark the starting offset for this doc + dvOffset := c.chunkBuf.Len() + dvSize, err := c.chunkBuf.Write(vals) + if err != nil { + return err + } + + c.chunkMeta = append(c.chunkMeta, MetaData{ + DocID: docNum, + DocDvLoc: uint64(dvOffset), + DocDvLen: uint64(dvSize), + }) + return nil +} + +// Write commits all the encoded chunked contents to the provided writer. +func (c *chunkedContentCoder) Write(w io.Writer) (int, error) { + var tw int + buf := make([]byte, binary.MaxVarintLen64) + // write out the number of chunks + n := binary.PutUvarint(buf, uint64(len(c.chunkLens))) + nw, err := w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + // write out the chunk lens + for _, chunkLen := range c.chunkLens { + n := binary.PutUvarint(buf, uint64(chunkLen)) + nw, err = w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + } + // write out the data + nw, err = w.Write(c.final) + tw += nw + if err != nil { + return tw, err + } + return tw, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go new file mode 100644 index 0000000000..d75e83c032 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go @@ -0,0 +1,51 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "hash/crc32" + "io" +) + +// CountHashWriter is a wrapper around a Writer which counts the number of +// bytes which have been written and computes a crc32 hash +type CountHashWriter struct { + w io.Writer + crc uint32 + n int +} + +// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer +func NewCountHashWriter(w io.Writer) *CountHashWriter { + return &CountHashWriter{w: w} +} + +// Write writes the provided bytes to the wrapped writer and counts the bytes +func (c *CountHashWriter) Write(b []byte) (int, error) { + n, err := c.w.Write(b) + c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n]) + c.n += n + return n, err +} + +// Count returns the number of bytes written +func (c *CountHashWriter) Count() int { + return c.n +} + +// Sum32 returns the CRC-32 hash of the content written to this writer +func (c *CountHashWriter) Sum32() uint32 { + return c.crc +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go new file mode 100644 index 0000000000..0f5145fba8 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go @@ -0,0 +1,150 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "fmt" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/couchbase/vellum" + "github.com/couchbase/vellum/regexp" +) + +// Dictionary is the zap representation of the term dictionary +type Dictionary struct { + sb *SegmentBase + field string + fieldID uint16 + fst *vellum.FST +} + +// PostingsList returns the postings list for the specified term +func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { + return d.postingsList([]byte(term), except) +} + +func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) { + rv := &PostingsList{ + sb: d.sb, + term: term, + except: except, + } + + if d.fst != nil { + postingsOffset, exists, err := d.fst.Get(term) + if err != nil { + return nil, fmt.Errorf("vellum err: %v", err) + } + if exists { + err = rv.read(postingsOffset, d) + if err != nil { + return nil, err + } + } + } + + return rv, nil +} + +// Iterator returns an iterator for this dictionary +func (d *Dictionary) Iterator() segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + if d.fst != nil { + itr, err := d.fst.Iterator(nil, nil) + if err == nil { + rv.itr = itr + } + } + + return rv +} + +// PrefixIterator returns an iterator which only visits terms having the +// the specified prefix +func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + if d.fst != nil { + r, err := regexp.New(prefix + ".*") + if err == nil { + itr, err := d.fst.Search(r, nil, nil) + if err == nil { + rv.itr = itr + } + } + } + + return rv +} + +// RangeIterator returns an iterator which only visits terms between the +// start and end terms. NOTE: bleve.index API specifies the end is inclusive. +func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + // need to increment the end position to be inclusive + endBytes := []byte(end) + if endBytes[len(endBytes)-1] < 0xff { + endBytes[len(endBytes)-1]++ + } else { + endBytes = append(endBytes, 0xff) + } + + if d.fst != nil { + itr, err := d.fst.Iterator([]byte(start), endBytes) + if err == nil { + rv.itr = itr + } + } + + return rv +} + +// DictionaryIterator is an iterator for term dictionary +type DictionaryIterator struct { + d *Dictionary + itr vellum.Iterator + err error + tmp PostingsList +} + +// Next returns the next entry in the dictionary +func (i *DictionaryIterator) Next() (*index.DictEntry, error) { + if i.itr == nil || i.err == vellum.ErrIteratorDone { + return nil, nil + } else if i.err != nil { + return nil, i.err + } + term, postingsOffset := i.itr.Current() + i.err = i.tmp.read(postingsOffset, i.d) + if i.err != nil { + return nil, i.err + } + rv := &index.DictEntry{ + Term: string(term), + Count: i.tmp.Count(), + } + i.err = i.itr.Next() + return rv, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go new file mode 100644 index 0000000000..fb5b348a5b --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go @@ -0,0 +1,213 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "sort" + + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/golang/snappy" +) + +type docValueIterator struct { + field string + curChunkNum uint64 + numChunks uint64 + chunkLens []uint64 + dvDataLoc uint64 + curChunkHeader []MetaData + curChunkData []byte // compressed data cache +} + +func (di *docValueIterator) sizeInBytes() uint64 { + // curChunkNum, numChunks, dvDataLoc --> uint64 + sizeInBytes := 24 + + // field + sizeInBytes += (len(di.field) + int(segment.SizeOfString)) + + // chunkLens, curChunkHeader + sizeInBytes += len(di.chunkLens)*8 + + len(di.curChunkHeader)*24 + + int(segment.SizeOfSlice*2) /* overhead from slices */ + + // curChunkData is mmap'ed, not included + + return uint64(sizeInBytes) +} + +func (di *docValueIterator) fieldName() string { + return di.field +} + +func (di *docValueIterator) curChunkNumber() uint64 { + return di.curChunkNum +} + +func (s *SegmentBase) loadFieldDocValueIterator(field string, + fieldDvLoc uint64) (*docValueIterator, error) { + // get the docValue offset for the given fields + if fieldDvLoc == fieldNotUninverted { + return nil, fmt.Errorf("loadFieldDocValueIterator: "+ + "no docValues found for field: %s", field) + } + + // read the number of chunks, chunk lengths + var offset, clen uint64 + numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64]) + if read <= 0 { + return nil, fmt.Errorf("failed to read the field "+ + "doc values for field %s", field) + } + offset += uint64(read) + + fdvIter := &docValueIterator{ + curChunkNum: math.MaxUint64, + field: field, + chunkLens: make([]uint64, int(numChunks)), + } + for i := 0; i < int(numChunks); i++ { + clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64]) + if read <= 0 { + return nil, fmt.Errorf("corrupted chunk length during segment load") + } + fdvIter.chunkLens[i] = clen + offset += uint64(read) + } + + fdvIter.dvDataLoc = fieldDvLoc + offset + return fdvIter, nil +} + +func (di *docValueIterator) loadDvChunk(chunkNumber, + localDocNum uint64, s *SegmentBase) error { + // advance to the chunk where the docValues + // reside for the given docID + destChunkDataLoc := di.dvDataLoc + for i := 0; i < int(chunkNumber); i++ { + destChunkDataLoc += di.chunkLens[i] + } + + curChunkSize := di.chunkLens[chunkNumber] + // read the number of docs reside in the chunk + numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64]) + if read <= 0 { + return fmt.Errorf("failed to read the chunk") + } + chunkMetaLoc := destChunkDataLoc + uint64(read) + + offset := uint64(0) + di.curChunkHeader = make([]MetaData, int(numDocs)) + for i := 0; i < int(numDocs); i++ { + di.curChunkHeader[i].DocID, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) + offset += uint64(read) + di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) + offset += uint64(read) + di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) + offset += uint64(read) + } + + compressedDataLoc := chunkMetaLoc + offset + dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc + di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength] + di.curChunkNum = chunkNumber + return nil +} + +func (di *docValueIterator) visitDocValues(docID uint64, + visitor index.DocumentFieldTermVisitor) error { + // binary search the term locations for the docID + start, length := di.getDocValueLocs(docID) + if start == math.MaxUint64 || length == math.MaxUint64 { + return nil + } + // uncompress the already loaded data + uncompressed, err := snappy.Decode(nil, di.curChunkData) + if err != nil { + return err + } + + // pick the terms for the given docID + uncompressed = uncompressed[start : start+length] + for { + i := bytes.Index(uncompressed, termSeparatorSplitSlice) + if i < 0 { + break + } + + visitor(di.field, uncompressed[0:i]) + uncompressed = uncompressed[i+1:] + } + + return nil +} + +func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) { + i := sort.Search(len(di.curChunkHeader), func(i int) bool { + return di.curChunkHeader[i].DocID >= docID + }) + if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocID == docID { + return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen + } + return math.MaxUint64, math.MaxUint64 +} + +// VisitDocumentFieldTerms is an implementation of the +// DocumentFieldTermVisitable interface +func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, + visitor index.DocumentFieldTermVisitor) error { + fieldIDPlus1 := uint16(0) + ok := true + for _, field := range fields { + if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { + continue + } + // find the chunkNumber where the docValues are stored + docInChunk := localDocNum / uint64(s.chunkFactor) + + if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists && + dvIter != nil { + // check if the chunk is already loaded + if docInChunk != dvIter.curChunkNumber() { + err := dvIter.loadDvChunk(docInChunk, localDocNum, s) + if err != nil { + continue + } + } + + _ = dvIter.visitDocValues(localDocNum, visitor) + } + } + return nil +} + +// VisitableDocValueFields returns the list of fields with +// persisted doc value terms ready to be visitable using the +// VisitDocumentFieldTerms method. +func (s *Segment) VisitableDocValueFields() ([]string, error) { + var rv []string + for fieldID, field := range s.fieldsInv { + if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok && + dvIter != nil { + rv = append(rv, field) + } + } + return rv, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go new file mode 100644 index 0000000000..e9f295023b --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go @@ -0,0 +1,126 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/Smerity/govarint" +) + +type chunkedIntCoder struct { + final []byte + maxDocNum uint64 + chunkSize uint64 + chunkBuf bytes.Buffer + encoder *govarint.Base128Encoder + chunkLens []uint64 + currChunk uint64 +} + +// newChunkedIntCoder returns a new chunk int coder which packs data into +// chunks based on the provided chunkSize and supports up to the specified +// maxDocNum +func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder { + total := maxDocNum/chunkSize + 1 + rv := &chunkedIntCoder{ + chunkSize: chunkSize, + maxDocNum: maxDocNum, + chunkLens: make([]uint64, total), + final: make([]byte, 0, 64), + } + rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf) + + return rv +} + +// Reset lets you reuse this chunked int coder. buffers are reset and reused +// from previous use. you cannot change the chunk size or max doc num. +func (c *chunkedIntCoder) Reset() { + c.final = c.final[:0] + c.chunkBuf.Reset() + c.currChunk = 0 + for i := range c.chunkLens { + c.chunkLens[i] = 0 + } +} + +// Add encodes the provided integers into the correct chunk for the provided +// doc num. You MUST call Add() with increasing docNums. +func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error { + chunk := docNum / c.chunkSize + if chunk != c.currChunk { + // starting a new chunk + if c.encoder != nil { + // close out last + c.encoder.Close() + encodingBytes := c.chunkBuf.Bytes() + c.chunkLens[c.currChunk] = uint64(len(encodingBytes)) + c.final = append(c.final, encodingBytes...) + c.chunkBuf.Reset() + c.encoder = govarint.NewU64Base128Encoder(&c.chunkBuf) + } + c.currChunk = chunk + } + + for _, val := range vals { + _, err := c.encoder.PutU64(val) + if err != nil { + return err + } + } + + return nil +} + +// Close indicates you are done calling Add() this allows the final chunk +// to be encoded. +func (c *chunkedIntCoder) Close() { + c.encoder.Close() + encodingBytes := c.chunkBuf.Bytes() + c.chunkLens[c.currChunk] = uint64(len(encodingBytes)) + c.final = append(c.final, encodingBytes...) +} + +// Write commits all the encoded chunked integers to the provided writer. +func (c *chunkedIntCoder) Write(w io.Writer) (int, error) { + var tw int + buf := make([]byte, binary.MaxVarintLen64) + // write out the number of chunks + n := binary.PutUvarint(buf, uint64(len(c.chunkLens))) + nw, err := w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + // write out the chunk lens + for _, chunkLen := range c.chunkLens { + n := binary.PutUvarint(buf, uint64(chunkLen)) + nw, err = w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + } + // write out the data + nw, err = w.Write(c.final) + tw += nw + if err != nil { + return tw, err + } + return tw, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go new file mode 100644 index 0000000000..cc348d7207 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go @@ -0,0 +1,563 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "math" + "os" + + "github.com/RoaringBitmap/roaring" + "github.com/Smerity/govarint" + "github.com/couchbase/vellum" + "github.com/golang/snappy" +) + +// Merge takes a slice of zap segments and bit masks describing which +// documents may be dropped, and creates a new segment containing the +// remaining data. This new segment is built at the specified path, +// with the provided chunkFactor. +func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, + chunkFactor uint32) ([][]uint64, error) { + flag := os.O_RDWR | os.O_CREATE + + f, err := os.OpenFile(path, flag, 0600) + if err != nil { + return nil, err + } + + cleanup := func() { + _ = f.Close() + _ = os.Remove(path) + } + + // buffer the output + br := bufio.NewWriter(f) + + // wrap it for counting (tracking offsets) + cr := NewCountHashWriter(br) + + fieldsInv := mergeFields(segments) + fieldsMap := mapFields(fieldsInv) + + var newDocNums [][]uint64 + var storedIndexOffset uint64 + fieldDvLocsOffset := uint64(fieldNotUninverted) + var dictLocs []uint64 + + newSegDocCount := computeNewDocCount(segments, drops) + if newSegDocCount > 0 { + storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops, + fieldsMap, fieldsInv, newSegDocCount, cr) + if err != nil { + cleanup() + return nil, err + } + + dictLocs, fieldDvLocsOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap, + newDocNums, newSegDocCount, chunkFactor, cr) + if err != nil { + cleanup() + return nil, err + } + } else { + dictLocs = make([]uint64, len(fieldsInv)) + } + + fieldsIndexOffset, err := persistFields(fieldsInv, cr, dictLocs) + if err != nil { + cleanup() + return nil, err + } + + err = persistFooter(newSegDocCount, storedIndexOffset, + fieldsIndexOffset, fieldDvLocsOffset, chunkFactor, cr.Sum32(), cr) + if err != nil { + cleanup() + return nil, err + } + + err = br.Flush() + if err != nil { + cleanup() + return nil, err + } + + err = f.Sync() + if err != nil { + cleanup() + return nil, err + } + + err = f.Close() + if err != nil { + cleanup() + return nil, err + } + + return newDocNums, nil +} + +// mapFields takes the fieldsInv list and builds the map +func mapFields(fields []string) map[string]uint16 { + rv := make(map[string]uint16, len(fields)) + for i, fieldName := range fields { + rv[fieldName] = uint16(i) + } + return rv +} + +// computeNewDocCount determines how many documents will be in the newly +// merged segment when obsoleted docs are dropped +func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 { + var newDocCount uint64 + for segI, segment := range segments { + newDocCount += segment.NumDocs() + if drops[segI] != nil { + newDocCount -= drops[segI].GetCardinality() + } + } + return newDocCount +} + +func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap, + fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64, + newSegDocCount uint64, chunkFactor uint32, + w *CountHashWriter) ([]uint64, uint64, error) { + + var bufReuse bytes.Buffer + var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64) + var bufLoc []uint64 + + rv := make([]uint64, len(fieldsInv)) + fieldDvLocs := make([]uint64, len(fieldsInv)) + fieldDvLocsOffset := uint64(fieldNotUninverted) + + // docTermMap is keyed by docNum, where the array impl provides + // better memory usage behavior than a sparse-friendlier hashmap + // for when docs have much structural similarity (i.e., every doc + // has a given field) + var docTermMap [][]byte + + var vellumBuf bytes.Buffer + + // for each field + for fieldID, fieldName := range fieldsInv { + if fieldID != 0 { + vellumBuf.Reset() + } + newVellum, err := vellum.New(&vellumBuf, nil) + if err != nil { + return nil, 0, err + } + + // collect FST iterators from all segments for this field + var dicts []*Dictionary + var itrs []vellum.Iterator + for _, segment := range segments { + dict, err2 := segment.dictionary(fieldName) + if err2 != nil { + return nil, 0, err2 + } + dicts = append(dicts, dict) + + if dict != nil && dict.fst != nil { + itr, err2 := dict.fst.Iterator(nil, nil) + if err2 != nil && err2 != vellum.ErrIteratorDone { + return nil, 0, err2 + } + if itr != nil { + itrs = append(itrs, itr) + } + } + } + + // create merging iterator + mergeItr, err := vellum.NewMergeIterator(itrs, func(postingOffsets []uint64) uint64 { + // we don't actually use the merged value + return 0 + }) + + tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) + locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) + + if uint64(cap(docTermMap)) < newSegDocCount { + docTermMap = make([][]byte, newSegDocCount) + } else { + docTermMap = docTermMap[0:newSegDocCount] + for docNum := range docTermMap { // reset the docTermMap + docTermMap[docNum] = docTermMap[docNum][:0] + } + } + + for err == nil { + term, _ := mergeItr.Current() + + newRoaring := roaring.NewBitmap() + newRoaringLocs := roaring.NewBitmap() + + tfEncoder.Reset() + locEncoder.Reset() + + // now go back and get posting list for this term + // but pass in the deleted docs for that segment + for dictI, dict := range dicts { + if dict == nil { + continue + } + postings, err2 := dict.postingsList(term, drops[dictI]) + if err2 != nil { + return nil, 0, err2 + } + + postItr := postings.Iterator() + next, err2 := postItr.Next() + for next != nil && err2 == nil { + hitNewDocNum := newDocNums[dictI][next.Number()] + if hitNewDocNum == docDropped { + return nil, 0, fmt.Errorf("see hit with dropped doc num") + } + newRoaring.Add(uint32(hitNewDocNum)) + // encode norm bits + norm := next.Norm() + normBits := math.Float32bits(float32(norm)) + err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits)) + if err != nil { + return nil, 0, err + } + locs := next.Locations() + if len(locs) > 0 { + newRoaringLocs.Add(uint32(hitNewDocNum)) + for _, loc := range locs { + if cap(bufLoc) < 5+len(loc.ArrayPositions()) { + bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions())) + } + args := bufLoc[0:5] + args[0] = uint64(fieldsMap[loc.Field()]) + args[1] = loc.Pos() + args[2] = loc.Start() + args[3] = loc.End() + args[4] = uint64(len(loc.ArrayPositions())) + args = append(args, loc.ArrayPositions()...) + err = locEncoder.Add(hitNewDocNum, args...) + if err != nil { + return nil, 0, err + } + } + } + + docTermMap[hitNewDocNum] = + append(append(docTermMap[hitNewDocNum], term...), termSeparator) + + next, err2 = postItr.Next() + } + if err2 != nil { + return nil, 0, err2 + } + } + + tfEncoder.Close() + locEncoder.Close() + + if newRoaring.GetCardinality() > 0 { + // this field/term actually has hits in the new segment, lets write it down + freqOffset := uint64(w.Count()) + _, err = tfEncoder.Write(w) + if err != nil { + return nil, 0, err + } + locOffset := uint64(w.Count()) + _, err = locEncoder.Write(w) + if err != nil { + return nil, 0, err + } + postingLocOffset := uint64(w.Count()) + _, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64) + if err != nil { + return nil, 0, err + } + postingOffset := uint64(w.Count()) + // write out the start of the term info + buf := bufMaxVarintLen64 + n := binary.PutUvarint(buf, freqOffset) + _, err = w.Write(buf[:n]) + if err != nil { + return nil, 0, err + } + + // write out the start of the loc info + n = binary.PutUvarint(buf, locOffset) + _, err = w.Write(buf[:n]) + if err != nil { + return nil, 0, err + } + + // write out the start of the loc posting list + n = binary.PutUvarint(buf, postingLocOffset) + _, err = w.Write(buf[:n]) + if err != nil { + return nil, 0, err + } + _, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64) + if err != nil { + return nil, 0, err + } + + err = newVellum.Insert(term, postingOffset) + if err != nil { + return nil, 0, err + } + } + + err = mergeItr.Next() + } + if err != nil && err != vellum.ErrIteratorDone { + return nil, 0, err + } + + dictOffset := uint64(w.Count()) + + err = newVellum.Close() + if err != nil { + return nil, 0, err + } + vellumData := vellumBuf.Bytes() + + // write out the length of the vellum data + n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData))) + _, err = w.Write(bufMaxVarintLen64[:n]) + if err != nil { + return nil, 0, err + } + + // write this vellum to disk + _, err = w.Write(vellumData) + if err != nil { + return nil, 0, err + } + + rv[fieldID] = dictOffset + + // update the field doc values + fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1) + for docNum, docTerms := range docTermMap { + if len(docTerms) > 0 { + err = fdvEncoder.Add(uint64(docNum), docTerms) + if err != nil { + return nil, 0, err + } + } + } + err = fdvEncoder.Close() + if err != nil { + return nil, 0, err + } + + // get the field doc value offset + fieldDvLocs[fieldID] = uint64(w.Count()) + + // persist the doc value details for this field + _, err = fdvEncoder.Write(w) + if err != nil { + return nil, 0, err + } + } + + fieldDvLocsOffset = uint64(w.Count()) + + buf := bufMaxVarintLen64 + for _, offset := range fieldDvLocs { + n := binary.PutUvarint(buf, uint64(offset)) + _, err := w.Write(buf[:n]) + if err != nil { + return nil, 0, err + } + } + + return rv, fieldDvLocsOffset, nil +} + +const docDropped = math.MaxUint64 + +func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap, + fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64, + w *CountHashWriter) (uint64, [][]uint64, error) { + var rv [][]uint64 // The remapped or newDocNums for each segment. + + var newDocNum uint64 + + var curr int + var metaBuf bytes.Buffer + var data, compressed []byte + + metaEncoder := govarint.NewU64Base128Encoder(&metaBuf) + + vals := make([][][]byte, len(fieldsInv)) + typs := make([][]byte, len(fieldsInv)) + poss := make([][][]uint64, len(fieldsInv)) + + docNumOffsets := make([]uint64, newSegDocCount) + + // for each segment + for segI, segment := range segments { + segNewDocNums := make([]uint64, segment.numDocs) + + // for each doc num + for docNum := uint64(0); docNum < segment.numDocs; docNum++ { + // TODO: roaring's API limits docNums to 32-bits? + if drops[segI] != nil && drops[segI].Contains(uint32(docNum)) { + segNewDocNums[docNum] = docDropped + continue + } + + segNewDocNums[docNum] = newDocNum + + curr = 0 + metaBuf.Reset() + data = data[:0] + compressed = compressed[:0] + + // collect all the data + for i := 0; i < len(fieldsInv); i++ { + vals[i] = vals[i][:0] + typs[i] = typs[i][:0] + poss[i] = poss[i][:0] + } + err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool { + fieldID := int(fieldsMap[field]) + vals[fieldID] = append(vals[fieldID], value) + typs[fieldID] = append(typs[fieldID], typ) + poss[fieldID] = append(poss[fieldID], pos) + return true + }) + if err != nil { + return 0, nil, err + } + + // now walk the fields in order + for fieldID := range fieldsInv { + storedFieldValues := vals[int(fieldID)] + + // has stored values for this field + num := len(storedFieldValues) + + // process each value + for i := 0; i < num; i++ { + // encode field + _, err2 := metaEncoder.PutU64(uint64(fieldID)) + if err2 != nil { + return 0, nil, err2 + } + // encode type + _, err2 = metaEncoder.PutU64(uint64(typs[int(fieldID)][i])) + if err2 != nil { + return 0, nil, err2 + } + // encode start offset + _, err2 = metaEncoder.PutU64(uint64(curr)) + if err2 != nil { + return 0, nil, err2 + } + // end len + _, err2 = metaEncoder.PutU64(uint64(len(storedFieldValues[i]))) + if err2 != nil { + return 0, nil, err2 + } + // encode number of array pos + _, err2 = metaEncoder.PutU64(uint64(len(poss[int(fieldID)][i]))) + if err2 != nil { + return 0, nil, err2 + } + // encode all array positions + for j := 0; j < len(poss[int(fieldID)][i]); j++ { + _, err2 = metaEncoder.PutU64(poss[int(fieldID)][i][j]) + if err2 != nil { + return 0, nil, err2 + } + } + // append data + data = append(data, storedFieldValues[i]...) + // update curr + curr += len(storedFieldValues[i]) + } + } + + metaEncoder.Close() + metaBytes := metaBuf.Bytes() + + compressed = snappy.Encode(compressed, data) + + // record where we're about to start writing + docNumOffsets[newDocNum] = uint64(w.Count()) + + // write out the meta len and compressed data len + _, err = writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed))) + if err != nil { + return 0, nil, err + } + // now write the meta + _, err = w.Write(metaBytes) + if err != nil { + return 0, nil, err + } + // now write the compressed data + _, err = w.Write(compressed) + if err != nil { + return 0, nil, err + } + + newDocNum++ + } + + rv = append(rv, segNewDocNums) + } + + // return value is the start of the stored index + offset := uint64(w.Count()) + + // now write out the stored doc index + for docNum := range docNumOffsets { + err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum]) + if err != nil { + return 0, nil, err + } + } + + return offset, rv, nil +} + +// mergeFields builds a unified list of fields used across all the input segments +func mergeFields(segments []*Segment) []string { + fieldsMap := map[string]struct{}{} + for _, segment := range segments { + fields := segment.Fields() + for _, field := range fields { + fieldsMap[field] = struct{}{} + } + } + + rv := make([]string, 0, len(fieldsMap)) + // ensure _id stays first + rv = append(rv, "_id") + for k := range fieldsMap { + if k != "_id" { + rv = append(rv, k) + } + } + return rv +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go new file mode 100644 index 0000000000..67e08d1ae3 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go @@ -0,0 +1,409 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + + "github.com/RoaringBitmap/roaring" + "github.com/Smerity/govarint" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +// PostingsList is an in-memory represenation of a postings list +type PostingsList struct { + sb *SegmentBase + term []byte + postingsOffset uint64 + freqOffset uint64 + locOffset uint64 + locBitmap *roaring.Bitmap + postings *roaring.Bitmap + except *roaring.Bitmap + postingKey []byte +} + +// Iterator returns an iterator for this postings list +func (p *PostingsList) Iterator() segment.PostingsIterator { + rv := &PostingsIterator{ + postings: p, + } + if p.postings != nil { + // prepare the freq chunk details + var n uint64 + var read int + var numFreqChunks uint64 + numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + rv.freqChunkLens = make([]uint64, int(numFreqChunks)) + for i := 0; i < int(numFreqChunks); i++ { + rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + } + rv.freqChunkStart = p.freqOffset + n + + // prepare the loc chunk details + n = 0 + var numLocChunks uint64 + numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + rv.locChunkLens = make([]uint64, int(numLocChunks)) + for i := 0; i < int(numLocChunks); i++ { + rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + } + rv.locChunkStart = p.locOffset + n + rv.locBitmap = p.locBitmap + + rv.all = p.postings.Iterator() + if p.except != nil { + allExcept := roaring.AndNot(p.postings, p.except) + rv.actual = allExcept.Iterator() + } else { + rv.actual = p.postings.Iterator() + } + } + + return rv +} + +// Count returns the number of items on this postings list +func (p *PostingsList) Count() uint64 { + if p.postings != nil { + n := p.postings.GetCardinality() + if p.except != nil { + e := p.except.GetCardinality() + if e > n { + e = n + } + return n - e + } + return n + } + return 0 +} + +func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error { + rv.postingsOffset = postingsOffset + + // read the location of the freq/norm details + var n uint64 + var read int + + rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64]) + n += uint64(read) + + rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + + var locBitmapOffset uint64 + locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + + var locBitmapLen uint64 + locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64]) + + locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen] + + rv.locBitmap = roaring.NewBitmap() + _, err := rv.locBitmap.FromBuffer(locRoaringBytes) + if err != nil { + return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err) + } + + var postingsLen uint64 + postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64]) + n += uint64(read) + + roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen] + + rv.postings = roaring.NewBitmap() + _, err = rv.postings.FromBuffer(roaringBytes) + if err != nil { + return fmt.Errorf("error loading roaring bitmap: %v", err) + } + + return nil +} + +// PostingsIterator provides a way to iterate through the postings list +type PostingsIterator struct { + postings *PostingsList + all roaring.IntIterable + offset int + locoffset int + actual roaring.IntIterable + + currChunk uint32 + currChunkFreqNorm []byte + currChunkLoc []byte + freqNormDecoder *govarint.Base128Decoder + locDecoder *govarint.Base128Decoder + + freqChunkLens []uint64 + freqChunkStart uint64 + + locChunkLens []uint64 + locChunkStart uint64 + + locBitmap *roaring.Bitmap + + next Posting +} + +func (i *PostingsIterator) loadChunk(chunk int) error { + if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) { + return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens)) + } + // load correct chunk bytes + start := i.freqChunkStart + for j := 0; j < chunk; j++ { + start += i.freqChunkLens[j] + } + end := start + i.freqChunkLens[chunk] + i.currChunkFreqNorm = i.postings.sb.mem[start:end] + i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm)) + + start = i.locChunkStart + for j := 0; j < chunk; j++ { + start += i.locChunkLens[j] + } + end = start + i.locChunkLens[chunk] + i.currChunkLoc = i.postings.sb.mem[start:end] + i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc)) + i.currChunk = uint32(chunk) + return nil +} + +func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) { + freq, err := i.freqNormDecoder.GetU64() + if err != nil { + return 0, 0, fmt.Errorf("error reading frequency: %v", err) + } + normBits, err := i.freqNormDecoder.GetU64() + if err != nil { + return 0, 0, fmt.Errorf("error reading norm: %v", err) + } + return freq, normBits, err +} + +// readLocation processes all the integers on the stream representing a single +// location. if you care about it, pass in a non-nil location struct, and we +// will fill it. if you don't care about it, pass in nil and we safely consume +// the contents. +func (i *PostingsIterator) readLocation(l *Location) error { + // read off field + fieldID, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading location field: %v", err) + } + // read off pos + pos, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading location pos: %v", err) + } + // read off start + start, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading location start: %v", err) + } + // read off end + end, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading location end: %v", err) + } + // read off num array pos + numArrayPos, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading location num array pos: %v", err) + } + + // group these together for less branching + if l != nil { + l.field = i.postings.sb.fieldsInv[fieldID] + l.pos = pos + l.start = start + l.end = end + if numArrayPos > 0 { + l.ap = make([]uint64, int(numArrayPos)) + } + } + + // read off array positions + for k := 0; k < int(numArrayPos); k++ { + ap, err := i.locDecoder.GetU64() + if err != nil { + return fmt.Errorf("error reading array position: %v", err) + } + if l != nil { + l.ap[k] = ap + } + } + + return nil +} + +// Next returns the next posting on the postings list, or nil at the end +func (i *PostingsIterator) Next() (segment.Posting, error) { + if i.actual == nil || !i.actual.HasNext() { + return nil, nil + } + n := i.actual.Next() + nChunk := n / i.postings.sb.chunkFactor + allN := i.all.Next() + allNChunk := allN / i.postings.sb.chunkFactor + + // n is the next actual hit (excluding some postings) + // allN is the next hit in the full postings + // if they don't match, adjust offsets to factor in item we're skipping over + // incr the all iterator, and check again + for allN != n { + + // in different chunks, reset offsets + if allNChunk != nChunk { + i.locoffset = 0 + i.offset = 0 + } else { + + if i.currChunk != nChunk || i.currChunkFreqNorm == nil { + err := i.loadChunk(int(nChunk)) + if err != nil { + return nil, fmt.Errorf("error loading chunk: %v", err) + } + } + + // read off freq/offsets even though we don't care about them + freq, _, err := i.readFreqNorm() + if err != nil { + return nil, err + } + if i.locBitmap.Contains(allN) { + for j := 0; j < int(freq); j++ { + err := i.readLocation(nil) + if err != nil { + return nil, err + } + } + } + + // in same chunk, need to account for offsets + i.offset++ + } + + allN = i.all.Next() + } + + if i.currChunk != nChunk || i.currChunkFreqNorm == nil { + err := i.loadChunk(int(nChunk)) + if err != nil { + return nil, fmt.Errorf("error loading chunk: %v", err) + } + } + + i.next = Posting{} // clear the struct. + rv := &i.next + rv.iterator = i + rv.docNum = uint64(n) + + var err error + var normBits uint64 + rv.freq, normBits, err = i.readFreqNorm() + if err != nil { + return nil, err + } + rv.norm = math.Float32frombits(uint32(normBits)) + if i.locBitmap.Contains(n) { + // read off 'freq' locations + rv.locs = make([]segment.Location, rv.freq) + locs := make([]Location, rv.freq) + for j := 0; j < int(rv.freq); j++ { + err := i.readLocation(&locs[j]) + if err != nil { + return nil, err + } + rv.locs[j] = &locs[j] + } + } + + return rv, nil +} + +// Posting is a single entry in a postings list +type Posting struct { + iterator *PostingsIterator + docNum uint64 + + freq uint64 + norm float32 + locs []segment.Location +} + +// Number returns the document number of this posting in this segment +func (p *Posting) Number() uint64 { + return p.docNum +} + +// Frequency returns the frequence of occurance of this term in this doc/field +func (p *Posting) Frequency() uint64 { + return p.freq +} + +// Norm returns the normalization factor for this posting +func (p *Posting) Norm() float64 { + return float64(p.norm) +} + +// Locations returns the location information for each occurance +func (p *Posting) Locations() []segment.Location { + return p.locs +} + +// Location represents the location of a single occurance +type Location struct { + field string + pos uint64 + start uint64 + end uint64 + ap []uint64 +} + +// Field returns the name of the field (useful in composite fields to know +// which original field the value came from) +func (l *Location) Field() string { + return l.field +} + +// Start returns the start byte offset of this occurance +func (l *Location) Start() uint64 { + return l.start +} + +// End returns the end byte offset of this occurance +func (l *Location) End() uint64 { + return l.end +} + +// Pos returns the 1-based phrase position of this occurance +func (l *Location) Pos() uint64 { + return l.pos +} + +// ArrayPositions returns the array position vector associated with this occurance +func (l *Location) ArrayPositions() []uint64 { + return l.ap +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go new file mode 100644 index 0000000000..0c5b9e17fa --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go @@ -0,0 +1,31 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import "encoding/binary" + +func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) { + docStoredStartAddr := s.storedIndexOffset + (8 * docNum) + docStoredStart := binary.BigEndian.Uint64(s.mem[docStoredStartAddr : docStoredStartAddr+8]) + var n uint64 + metaLen, read := binary.Uvarint(s.mem[docStoredStart : docStoredStart+binary.MaxVarintLen64]) + n += uint64(read) + var dataLen uint64 + dataLen, read = binary.Uvarint(s.mem[docStoredStart+n : docStoredStart+n+binary.MaxVarintLen64]) + n += uint64(read) + meta := s.mem[docStoredStart+n : docStoredStart+n+metaLen] + data := s.mem[docStoredStart+n+metaLen : docStoredStart+n+metaLen+dataLen] + return meta, data +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go new file mode 100644 index 0000000000..94268caceb --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go @@ -0,0 +1,458 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "os" + "sync" + + "github.com/RoaringBitmap/roaring" + "github.com/Smerity/govarint" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/couchbase/vellum" + mmap "github.com/edsrzf/mmap-go" + "github.com/golang/snappy" +) + +// Open returns a zap impl of a segment +func Open(path string) (segment.Segment, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + mm, err := mmap.Map(f, mmap.RDONLY, 0) + if err != nil { + // mmap failed, try to close the file + _ = f.Close() + return nil, err + } + + rv := &Segment{ + SegmentBase: SegmentBase{ + mem: mm[0 : len(mm)-FooterSize], + fieldsMap: make(map[string]uint16), + fieldDvIterMap: make(map[uint16]*docValueIterator), + }, + f: f, + mm: mm, + path: path, + refs: 1, + } + + err = rv.loadConfig() + if err != nil { + _ = rv.Close() + return nil, err + } + + err = rv.loadFields() + if err != nil { + _ = rv.Close() + return nil, err + } + + err = rv.loadDvIterators() + if err != nil { + _ = rv.Close() + return nil, err + } + + return rv, nil +} + +// SegmentBase is a memory only, read-only implementation of the +// segment.Segment interface, using zap's data representation. +type SegmentBase struct { + mem []byte + memCRC uint32 + chunkFactor uint32 + fieldsMap map[string]uint16 // fieldName -> fieldID+1 + fieldsInv []string // fieldID -> fieldName + numDocs uint64 + storedIndexOffset uint64 + fieldsIndexOffset uint64 + docValueOffset uint64 + dictLocs []uint64 + fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field +} + +func (sb *SegmentBase) AddRef() {} +func (sb *SegmentBase) DecRef() (err error) { return nil } +func (sb *SegmentBase) Close() (err error) { return nil } + +// Segment implements a persisted segment.Segment interface, by +// embedding an mmap()'ed SegmentBase. +type Segment struct { + SegmentBase + + f *os.File + mm mmap.MMap + path string + version uint32 + crc uint32 + + m sync.Mutex // Protects the fields that follow. + refs int64 +} + +func (s *Segment) SizeInBytes() uint64 { + // 8 /* size of file pointer */ + // 4 /* size of version -> uint32 */ + // 4 /* size of crc -> uint32 */ + sizeOfUints := 16 + + sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints + + // mutex, refs -> int64 + sizeInBytes += 16 + + // do not include the mmap'ed part + return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem)) +} + +func (s *SegmentBase) SizeInBytes() uint64 { + // 4 /* size of memCRC -> uint32 */ + // 4 /* size of chunkFactor -> uint32 */ + // 8 /* size of numDocs -> uint64 */ + // 8 /* size of storedIndexOffset -> uint64 */ + // 8 /* size of fieldsIndexOffset -> uint64 */ + // 8 /* size of docValueOffset -> uint64 */ + sizeInBytes := 40 + + sizeInBytes += len(s.mem) + int(segment.SizeOfSlice) + + // fieldsMap + for k, _ := range s.fieldsMap { + sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */ + } + sizeInBytes += int(segment.SizeOfMap) /* overhead from map */ + + // fieldsInv, dictLocs + for _, entry := range s.fieldsInv { + sizeInBytes += (len(entry) + int(segment.SizeOfString)) + } + sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */ + sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */ + + // fieldDvIterMap + sizeInBytes += len(s.fieldDvIterMap) * + int(segment.SizeOfPointer+2 /* size of uint16 */) + for _, entry := range s.fieldDvIterMap { + if entry != nil { + sizeInBytes += int(entry.sizeInBytes()) + } + } + sizeInBytes += int(segment.SizeOfMap) + + return uint64(sizeInBytes) +} + +func (s *Segment) AddRef() { + s.m.Lock() + s.refs++ + s.m.Unlock() +} + +func (s *Segment) DecRef() (err error) { + s.m.Lock() + s.refs-- + if s.refs == 0 { + err = s.closeActual() + } + s.m.Unlock() + return err +} + +func (s *Segment) loadConfig() error { + crcOffset := len(s.mm) - 4 + s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4]) + + verOffset := crcOffset - 4 + s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4]) + if s.version != version { + return fmt.Errorf("unsupported version %d", s.version) + } + + chunkOffset := verOffset - 4 + s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4]) + + docValueOffset := chunkOffset - 8 + s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8]) + + fieldsIndexOffset := docValueOffset - 8 + s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8]) + + storedIndexOffset := fieldsIndexOffset - 8 + s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8]) + + numDocsOffset := storedIndexOffset - 8 + s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8]) + return nil +} + +func (s *SegmentBase) loadFields() error { + // NOTE for now we assume the fields index immediately preceeds + // the footer, and if this changes, need to adjust accordingly (or + // store explicit length), where s.mem was sliced from s.mm in Open(). + fieldsIndexEnd := uint64(len(s.mem)) + + // iterate through fields index + var fieldID uint64 + for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd { + addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8]) + + dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd]) + n := uint64(read) + s.dictLocs = append(s.dictLocs, dictLoc) + + var nameLen uint64 + nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd]) + n += uint64(read) + + name := string(s.mem[addr+n : addr+n+nameLen]) + s.fieldsInv = append(s.fieldsInv, name) + s.fieldsMap[name] = uint16(fieldID + 1) + + fieldID++ + } + return nil +} + +// Dictionary returns the term dictionary for the specified field +func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) { + dict, err := s.dictionary(field) + if err == nil && dict == nil { + return &segment.EmptyDictionary{}, nil + } + return dict, err +} + +func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { + fieldIDPlus1 := sb.fieldsMap[field] + if fieldIDPlus1 > 0 { + rv = &Dictionary{ + sb: sb, + field: field, + fieldID: fieldIDPlus1 - 1, + } + + dictStart := sb.dictLocs[rv.fieldID] + if dictStart > 0 { + // read the length of the vellum data + vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) + fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] + if fstBytes != nil { + rv.fst, err = vellum.Load(fstBytes) + if err != nil { + return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) + } + } + } + } + + return rv, nil +} + +// VisitDocument invokes the DocFieldValueVistor for each stored field +// for the specified doc number +func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { + // first make sure this is a valid number in this segment + if num < s.numDocs { + meta, compressed := s.getDocStoredMetaAndCompressed(num) + uncompressed, err := snappy.Decode(nil, compressed) + if err != nil { + return err + } + // now decode meta and process + reader := bytes.NewReader(meta) + decoder := govarint.NewU64Base128Decoder(reader) + + keepGoing := true + for keepGoing { + field, err := decoder.GetU64() + if err == io.EOF { + break + } + if err != nil { + return err + } + typ, err := decoder.GetU64() + if err != nil { + return err + } + offset, err := decoder.GetU64() + if err != nil { + return err + } + l, err := decoder.GetU64() + if err != nil { + return err + } + numap, err := decoder.GetU64() + if err != nil { + return err + } + var arrayPos []uint64 + if numap > 0 { + arrayPos = make([]uint64, numap) + for i := 0; i < int(numap); i++ { + ap, err := decoder.GetU64() + if err != nil { + return err + } + arrayPos[i] = ap + } + } + + value := uncompressed[offset : offset+l] + keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos) + } + } + return nil +} + +// Count returns the number of documents in this segment. +func (s *SegmentBase) Count() uint64 { + return s.numDocs +} + +// DocNumbers returns a bitset corresponding to the doc numbers of all the +// provided _id strings +func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { + rv := roaring.New() + + if len(s.fieldsMap) > 0 { + idDict, err := s.dictionary("_id") + if err != nil { + return nil, err + } + + for _, id := range ids { + postings, err := idDict.postingsList([]byte(id), nil) + if err != nil { + return nil, err + } + if postings.postings != nil { + rv.Or(postings.postings) + } + } + } + + return rv, nil +} + +// Fields returns the field names used in this segment +func (s *SegmentBase) Fields() []string { + return s.fieldsInv +} + +// Path returns the path of this segment on disk +func (s *Segment) Path() string { + return s.path +} + +// Close releases all resources associated with this segment +func (s *Segment) Close() (err error) { + return s.DecRef() +} + +func (s *Segment) closeActual() (err error) { + if s.mm != nil { + err = s.mm.Unmap() + } + // try to close file even if unmap failed + if s.f != nil { + err2 := s.f.Close() + if err == nil { + // try to return first error + err = err2 + } + } + return +} + +// some helpers i started adding for the command-line utility + +// Data returns the underlying mmaped data slice +func (s *Segment) Data() []byte { + return s.mm +} + +// CRC returns the CRC value stored in the file footer +func (s *Segment) CRC() uint32 { + return s.crc +} + +// Version returns the file version in the file footer +func (s *Segment) Version() uint32 { + return s.version +} + +// ChunkFactor returns the chunk factor in the file footer +func (s *Segment) ChunkFactor() uint32 { + return s.chunkFactor +} + +// FieldsIndexOffset returns the fields index offset in the file footer +func (s *Segment) FieldsIndexOffset() uint64 { + return s.fieldsIndexOffset +} + +// StoredIndexOffset returns the stored value index offset in the file footer +func (s *Segment) StoredIndexOffset() uint64 { + return s.storedIndexOffset +} + +// DocValueOffset returns the docValue offset in the file footer +func (s *Segment) DocValueOffset() uint64 { + return s.docValueOffset +} + +// NumDocs returns the number of documents in the file footer +func (s *Segment) NumDocs() uint64 { + return s.numDocs +} + +// DictAddr is a helper function to compute the file offset where the +// dictionary is stored for the specified field. +func (s *Segment) DictAddr(field string) (uint64, error) { + fieldIDPlus1, ok := s.fieldsMap[field] + if !ok { + return 0, fmt.Errorf("no such field '%s'", field) + } + + return s.dictLocs[fieldIDPlus1-1], nil +} + +func (s *SegmentBase) loadDvIterators() error { + if s.docValueOffset == fieldNotUninverted { + return nil + } + + var read uint64 + for fieldID, field := range s.fieldsInv { + fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64]) + if n <= 0 { + return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID) + } + s.fieldDvIterMap[uint16(fieldID)], _ = s.loadFieldDocValueIterator(field, fieldLoc) + read += uint64(n) + } + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go new file mode 100644 index 0000000000..c5316a99f0 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go @@ -0,0 +1,145 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/RoaringBitmap/roaring" +) + +// writes out the length of the roaring bitmap in bytes as varint +// then writes out the roaring bitmap itself +func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer, + reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) { + reuseBuf.Reset() + + // write out postings list to memory so we know the len + postingsListLen, err := r.WriteTo(reuseBuf) + if err != nil { + return 0, err + } + var tw int + // write out the length of this postings list + n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen)) + nw, err := w.Write(reuseBufVarint[:n]) + tw += nw + if err != nil { + return tw, err + } + // write out the postings list itself + nw, err = w.Write(reuseBuf.Bytes()) + tw += nw + if err != nil { + return tw, err + } + return tw, nil +} + +func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) { + var rv uint64 + var fieldsOffsets []uint64 + + for fieldID, fieldName := range fieldsInv { + // record start of this field + fieldsOffsets = append(fieldsOffsets, uint64(w.Count())) + + // write out the dict location and field name length + _, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName))) + if err != nil { + return 0, err + } + + // write out the field name + _, err = w.Write([]byte(fieldName)) + if err != nil { + return 0, err + } + } + + // now write out the fields index + rv = uint64(w.Count()) + for fieldID := range fieldsInv { + err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID]) + if err != nil { + return 0, err + } + } + + return rv, nil +} + +// FooterSize is the size of the footer record in bytes +// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset +const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8 + +func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, + chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error { + w := NewCountHashWriter(writerIn) + w.crc = crcBeforeFooter + + // write out the number of docs + err := binary.Write(w, binary.BigEndian, numDocs) + if err != nil { + return err + } + // write out the stored field index location: + err = binary.Write(w, binary.BigEndian, storedIndexOffset) + if err != nil { + return err + } + // write out the field index location + err = binary.Write(w, binary.BigEndian, fieldsIndexOffset) + if err != nil { + return err + } + // write out the fieldDocValue location + err = binary.Write(w, binary.BigEndian, docValueOffset) + if err != nil { + return err + } + // write out 32-bit chunk factor + err = binary.Write(w, binary.BigEndian, chunkFactor) + if err != nil { + return err + } + // write out 32-bit version + err = binary.Write(w, binary.BigEndian, version) + if err != nil { + return err + } + // write out CRC-32 of everything upto but not including this CRC + err = binary.Write(w, binary.BigEndian, w.crc) + if err != nil { + return err + } + return nil +} + +func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) { + buf := make([]byte, binary.MaxVarintLen64) + for _, val := range vals { + n := binary.PutUvarint(buf, val) + var nw int + nw, err = w.Write(buf[:n]) + tw += nw + if err != nil { + return tw, err + } + } + return tw, err +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go new file mode 100644 index 0000000000..bb99757687 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go @@ -0,0 +1,503 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "bytes" + "container/heap" + "encoding/binary" + "fmt" + "sort" + "sync" + "sync/atomic" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +type asynchSegmentResult struct { + dictItr segment.DictionaryIterator + + index int + docs *roaring.Bitmap + + postings segment.PostingsList + + err error +} + +type IndexSnapshot struct { + parent *Scorch + segment []*SegmentSnapshot + offsets []uint64 + internal map[string][]byte + epoch uint64 + + m sync.Mutex // Protects the fields that follow. + refs int64 +} + +func (i *IndexSnapshot) Segments() []*SegmentSnapshot { + return i.segment +} + +func (i *IndexSnapshot) Internal() map[string][]byte { + return i.internal +} + +func (i *IndexSnapshot) AddRef() { + i.m.Lock() + i.refs++ + i.m.Unlock() +} + +func (i *IndexSnapshot) DecRef() (err error) { + i.m.Lock() + i.refs-- + if i.refs == 0 { + for _, s := range i.segment { + if s != nil { + err2 := s.segment.DecRef() + if err == nil { + err = err2 + } + } + } + if i.parent != nil { + go i.parent.AddEligibleForRemoval(i.epoch) + } + } + i.m.Unlock() + return err +} + +func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { + + results := make(chan *asynchSegmentResult) + for index, segment := range i.segment { + go func(index int, segment *SegmentSnapshot) { + dict, err := segment.Dictionary(field) + if err != nil { + results <- &asynchSegmentResult{err: err} + } else { + results <- &asynchSegmentResult{dictItr: makeItr(dict)} + } + }(index, segment) + } + + var err error + rv := &IndexSnapshotFieldDict{ + snapshot: i, + cursors: make([]*segmentDictCursor, 0, len(i.segment)), + } + for count := 0; count < len(i.segment); count++ { + asr := <-results + if asr.err != nil && err == nil { + err = asr.err + } else { + next, err2 := asr.dictItr.Next() + if err2 != nil && err == nil { + err = err2 + } + if next != nil { + rv.cursors = append(rv.cursors, &segmentDictCursor{ + itr: asr.dictItr, + curr: next, + }) + } + } + } + // after ensuring we've read all items on channel + if err != nil { + return nil, err + } + // prepare heap + heap.Init(rv) + + return rv, nil +} + +func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.Iterator() + }) +} + +func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, + endTerm []byte) (index.FieldDict, error) { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.RangeIterator(string(startTerm), string(endTerm)) + }) +} + +func (i *IndexSnapshot) FieldDictPrefix(field string, + termPrefix []byte) (index.FieldDict, error) { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.PrefixIterator(string(termPrefix)) + }) +} + +func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { + results := make(chan *asynchSegmentResult) + for index, segment := range i.segment { + go func(index int, segment *SegmentSnapshot) { + results <- &asynchSegmentResult{ + index: index, + docs: segment.DocNumbersLive(), + } + }(index, segment) + } + + return i.newDocIDReader(results) +} + +func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { + results := make(chan *asynchSegmentResult) + for index, segment := range i.segment { + go func(index int, segment *SegmentSnapshot) { + docs, err := segment.DocNumbers(ids) + if err != nil { + results <- &asynchSegmentResult{err: err} + } else { + results <- &asynchSegmentResult{ + index: index, + docs: docs, + } + } + }(index, segment) + } + + return i.newDocIDReader(results) +} + +func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) { + rv := &IndexSnapshotDocIDReader{ + snapshot: i, + iterators: make([]roaring.IntIterable, len(i.segment)), + } + var err error + for count := 0; count < len(i.segment); count++ { + asr := <-results + if asr.err != nil && err != nil { + err = asr.err + } else { + rv.iterators[asr.index] = asr.docs.Iterator() + } + } + + if err != nil { + return nil, err + } + + return rv, nil +} + +func (i *IndexSnapshot) Fields() ([]string, error) { + // FIXME not making this concurrent for now as it's not used in hot path + // of any searches at the moment (just a debug aid) + fieldsMap := map[string]struct{}{} + for _, segment := range i.segment { + fields := segment.Fields() + for _, field := range fields { + fieldsMap[field] = struct{}{} + } + } + rv := make([]string, 0, len(fieldsMap)) + for k := range fieldsMap { + rv = append(rv, k) + } + return rv, nil +} + +func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) { + return i.internal[string(key)], nil +} + +func (i *IndexSnapshot) DocCount() (uint64, error) { + var rv uint64 + for _, segment := range i.segment { + rv += segment.Count() + } + return rv, nil +} + +func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) { + // FIXME could be done more efficiently directly, but reusing for simplicity + tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false) + if err != nil { + return nil, err + } + defer func() { + if cerr := tfr.Close(); err == nil && cerr != nil { + err = cerr + } + }() + + next, err := tfr.Next(nil) + if err != nil { + return nil, err + } + + if next == nil { + // no such doc exists + return nil, nil + } + + docNum, err := docInternalToNumber(next.ID) + if err != nil { + return nil, err + } + segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) + + rv = document.NewDocument(id) + err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool { + if name == "_id" { + return true + } + switch typ { + case 't': + rv.AddField(document.NewTextField(name, pos, value)) + case 'n': + rv.AddField(document.NewNumericFieldFromBytes(name, pos, value)) + case 'd': + rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value)) + case 'b': + rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value)) + case 'g': + rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value)) + } + + return true + }) + if err != nil { + return nil, err + } + + return rv, nil +} + +func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) { + segmentIndex := sort.Search(len(i.offsets), + func(x int) bool { + return i.offsets[x] > docNum + }) - 1 + + localDocNum := docNum - i.offsets[segmentIndex] + return int(segmentIndex), localDocNum +} + +func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) { + docNum, err := docInternalToNumber(id) + if err != nil { + return "", err + } + segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) + + var found bool + var rv string + err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool { + if field == "_id" { + found = true + rv = string(value) + return false + } + return true + }) + if err != nil { + return "", err + } + + if found { + return rv, nil + } + return "", fmt.Errorf("document number %d not found", docNum) +} + +func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) { + // FIXME could be done more efficiently directly, but reusing for simplicity + tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false) + if err != nil { + return nil, err + } + defer func() { + if cerr := tfr.Close(); err == nil && cerr != nil { + err = cerr + } + }() + + next, err := tfr.Next(nil) + if err != nil || next == nil { + return nil, err + } + + return next.ID, nil +} + +func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, + includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { + + rv := &IndexSnapshotTermFieldReader{ + term: term, + field: field, + snapshot: i, + postings: make([]segment.PostingsList, len(i.segment)), + iterators: make([]segment.PostingsIterator, len(i.segment)), + includeFreq: includeFreq, + includeNorm: includeNorm, + includeTermVectors: includeTermVectors, + } + for i, segment := range i.segment { + dict, err := segment.Dictionary(field) + if err != nil { + return nil, err + } + pl, err := dict.PostingsList(string(term), nil) + if err != nil { + return nil, err + } + rv.postings[i] = pl + rv.iterators[i] = pl.Iterator() + } + atomic.AddUint64(&i.parent.stats.termSearchersStarted, uint64(1)) + return rv, nil +} + +func docNumberToBytes(buf []byte, in uint64) []byte { + if len(buf) != 8 { + if cap(buf) >= 8 { + buf = buf[0:8] + } else { + buf = make([]byte, 8) + } + } + binary.BigEndian.PutUint64(buf, in) + return buf +} + +func docInternalToNumber(in index.IndexInternalID) (uint64, error) { + var res uint64 + err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res) + if err != nil { + return 0, err + } + return res, nil +} + +func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, + fields []string, visitor index.DocumentFieldTermVisitor) error { + + docNum, err := docInternalToNumber(id) + if err != nil { + return err + } + segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) + if segmentIndex >= len(i.segment) { + return nil + } + + ss := i.segment[segmentIndex] + + if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok { + // get the list of doc value persisted fields + pFields, err := zaps.VisitableDocValueFields() + if err != nil { + return err + } + // assort the fields for which terms look up have to + // be performed runtime + dvPendingFields := extractDvPendingFields(fields, pFields) + if len(dvPendingFields) == 0 { + // all fields are doc value persisted + return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor) + } + + // concurrently trigger the runtime doc value preparations for + // pending fields as well as the visit of the persisted doc values + errCh := make(chan error, 1) + + go func() { + defer close(errCh) + err := ss.cachedDocs.prepareFields(fields, ss) + if err != nil { + errCh <- err + } + }() + + // visit the persisted dv while the cache preparation is in progress + err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor) + if err != nil { + return err + } + + // err out if fieldCache preparation failed + err = <-errCh + if err != nil { + return err + } + + visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor) + return nil + } + + return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor) +} + +func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string, + ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error { + err := ss.cachedDocs.prepareFields(fields, ss) + if err != nil { + return err + } + + visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor) + return nil +} + +func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string, + ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) { + + for _, field := range fields { + if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists { + if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { + for { + i := bytes.Index(tlist, TermSeparatorSplitSlice) + if i < 0 { + break + } + visitor(field, tlist[0:i]) + tlist = tlist[i+1:] + } + } + } + } + +} + +func extractDvPendingFields(requestedFields, persistedFields []string) []string { + removeMap := map[string]struct{}{} + for _, str := range persistedFields { + removeMap[str] = struct{}{} + } + + rv := make([]string, 0, len(requestedFields)) + for _, s := range requestedFields { + if _, ok := removeMap[s]; !ok { + rv = append(rv, s) + } + } + return rv +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go new file mode 100644 index 0000000000..3c902cad6b --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go @@ -0,0 +1,92 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "container/heap" + + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +type segmentDictCursor struct { + itr segment.DictionaryIterator + curr *index.DictEntry +} + +type IndexSnapshotFieldDict struct { + snapshot *IndexSnapshot + cursors []*segmentDictCursor +} + +func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) } +func (i *IndexSnapshotFieldDict) Less(a, b int) bool { + return i.cursors[a].curr.Term < i.cursors[b].curr.Term +} +func (i *IndexSnapshotFieldDict) Swap(a, b int) { + i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a] +} + +func (i *IndexSnapshotFieldDict) Push(x interface{}) { + i.cursors = append(i.cursors, x.(*segmentDictCursor)) +} + +func (i *IndexSnapshotFieldDict) Pop() interface{} { + n := len(i.cursors) + x := i.cursors[n-1] + i.cursors = i.cursors[0 : n-1] + return x +} + +func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { + if len(i.cursors) <= 0 { + return nil, nil + } + rv := i.cursors[0].curr + next, err := i.cursors[0].itr.Next() + if err != nil { + return nil, err + } + if next == nil { + // at end of this cursor, remove it + heap.Pop(i) + } else { + // modified heap, fix it + i.cursors[0].curr = next + heap.Fix(i, 0) + } + // look for any other entries with the exact same term + for len(i.cursors) > 0 && i.cursors[0].curr.Term == rv.Term { + rv.Count += i.cursors[0].curr.Count + next, err := i.cursors[0].itr.Next() + if err != nil { + return nil, err + } + if next == nil { + // at end of this cursor, remove it + heap.Pop(i) + } else { + // modified heap, fix it + i.cursors[0].curr = next + heap.Fix(i, 0) + } + } + + return rv, nil +} + +func (i *IndexSnapshotFieldDict) Close() error { + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go new file mode 100644 index 0000000000..d1205ff8e8 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go @@ -0,0 +1,67 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "bytes" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" +) + +type IndexSnapshotDocIDReader struct { + snapshot *IndexSnapshot + iterators []roaring.IntIterable + segmentOffset int +} + +func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { + for i.segmentOffset < len(i.iterators) { + if !i.iterators[i.segmentOffset].HasNext() { + i.segmentOffset++ + continue + } + next := i.iterators[i.segmentOffset].Next() + // make segment number into global number by adding offset + globalOffset := i.snapshot.offsets[i.segmentOffset] + return docNumberToBytes(nil, uint64(next)+globalOffset), nil + } + return nil, nil +} + +func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) { + // FIXME do something better + next, err := i.Next() + if err != nil { + return nil, err + } + if next == nil { + return nil, nil + } + for bytes.Compare(next, ID) < 0 { + next, err = i.Next() + if err != nil { + return nil, err + } + if next == nil { + break + } + } + return next, nil +} + +func (i *IndexSnapshotDocIDReader) Close() error { + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go new file mode 100644 index 0000000000..87fd0d14f3 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go @@ -0,0 +1,132 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "bytes" + "sync/atomic" + + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +type IndexSnapshotTermFieldReader struct { + term []byte + field string + snapshot *IndexSnapshot + postings []segment.PostingsList + iterators []segment.PostingsIterator + segmentOffset int + includeFreq bool + includeNorm bool + includeTermVectors bool + currPosting segment.Posting + currID index.IndexInternalID +} + +func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { + rv := preAlloced + if rv == nil { + rv = &index.TermFieldDoc{} + } + // find the next hit + for i.segmentOffset < len(i.postings) { + next, err := i.iterators[i.segmentOffset].Next() + if err != nil { + return nil, err + } + if next != nil { + // make segment number into global number by adding offset + globalOffset := i.snapshot.offsets[i.segmentOffset] + nnum := next.Number() + rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset) + i.postingToTermFieldDoc(next, rv) + + i.currID = rv.ID + i.currPosting = next + return rv, nil + } + i.segmentOffset++ + } + return nil, nil +} + +func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) { + if i.includeFreq { + rv.Freq = next.Frequency() + } + if i.includeNorm { + rv.Norm = next.Norm() + } + if i.includeTermVectors { + locs := next.Locations() + rv.Vectors = make([]*index.TermFieldVector, len(locs)) + for i, loc := range locs { + rv.Vectors[i] = &index.TermFieldVector{ + Start: loc.Start(), + End: loc.End(), + Pos: loc.Pos(), + ArrayPositions: loc.ArrayPositions(), + Field: loc.Field(), + } + } + } +} + +func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { + // FIXME do something better + // for now, if we need to seek backwards, then restart from the beginning + if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { + i2, err := i.snapshot.TermFieldReader(i.term, i.field, + i.includeFreq, i.includeNorm, i.includeTermVectors) + if err != nil { + return nil, err + } + *i = *(i2.(*IndexSnapshotTermFieldReader)) + } + // FIXME do something better + next, err := i.Next(preAlloced) + if err != nil { + return nil, err + } + if next == nil { + return nil, nil + } + for bytes.Compare(next.ID, ID) < 0 { + next, err = i.Next(preAlloced) + if err != nil { + return nil, err + } + if next == nil { + break + } + } + return next, nil +} + +func (i *IndexSnapshotTermFieldReader) Count() uint64 { + var rv uint64 + for _, posting := range i.postings { + rv += posting.Count() + } + return rv +} + +func (i *IndexSnapshotTermFieldReader) Close() error { + if i.snapshot != nil { + atomic.AddUint64(&i.snapshot.parent.stats.termSearchersFinished, uint64(1)) + } + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go new file mode 100644 index 0000000000..43c3ba9f1e --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go @@ -0,0 +1,181 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "fmt" + "log" + + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/boltdb/bolt" +) + +type RollbackPoint struct { + epoch uint64 + meta map[string][]byte +} + +func (r *RollbackPoint) GetInternal(key []byte) []byte { + return r.meta[string(key)] +} + +// RollbackPoints returns an array of rollback points available +// for the application to make a decision on where to rollback +// to. A nil return value indicates that there are no available +// rollback points. +func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) { + if s.rootBolt == nil { + return nil, fmt.Errorf("RollbackPoints: root is nil") + } + + // start a read-only bolt transaction + tx, err := s.rootBolt.Begin(false) + if err != nil { + return nil, fmt.Errorf("RollbackPoints: failed to start" + + " read-only transaction") + } + + // read-only bolt transactions to be rolled back + defer func() { + _ = tx.Rollback() + }() + + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return nil, fmt.Errorf("RollbackPoints: no snapshots available") + } + + rollbackPoints := []*RollbackPoint{} + + c1 := snapshots.Cursor() + for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() { + _, snapshotEpoch, err := segment.DecodeUvarintAscending(k) + if err != nil { + log.Printf("RollbackPoints:"+ + " unable to parse segment epoch %x, continuing", k) + continue + } + + snapshot := snapshots.Bucket(k) + if snapshot == nil { + log.Printf("RollbackPoints:"+ + " snapshot key, but bucket missing %x, continuing", k) + continue + } + + meta := map[string][]byte{} + c2 := snapshot.Cursor() + for j, _ := c2.First(); j != nil; j, _ = c2.Next() { + if j[0] == boltInternalKey[0] { + internalBucket := snapshot.Bucket(j) + err = internalBucket.ForEach(func(key []byte, val []byte) error { + copiedVal := append([]byte(nil), val...) + meta[string(key)] = copiedVal + return nil + }) + if err != nil { + break + } + } + } + + if err != nil { + log.Printf("RollbackPoints:"+ + " failed in fetching internal data: %v", err) + continue + } + + rollbackPoints = append(rollbackPoints, &RollbackPoint{ + epoch: snapshotEpoch, + meta: meta, + }) + } + + return rollbackPoints, nil +} + +// Rollback atomically and durably (if unsafeBatch is unset) brings +// the store back to the point in time as represented by the +// RollbackPoint. Rollback() should only be passed a RollbackPoint +// that came from the same store using the RollbackPoints() API. +func (s *Scorch) Rollback(to *RollbackPoint) error { + if to == nil { + return fmt.Errorf("Rollback: RollbackPoint is nil") + } + + if s.rootBolt == nil { + return fmt.Errorf("Rollback: root is nil") + } + + revert := &snapshotReversion{} + + s.rootLock.Lock() + + err := s.rootBolt.View(func(tx *bolt.Tx) error { + snapshots := tx.Bucket(boltSnapshotsBucket) + if snapshots == nil { + return fmt.Errorf("Rollback: no snapshots available") + } + + pos := segment.EncodeUvarintAscending(nil, to.epoch) + + snapshot := snapshots.Bucket(pos) + if snapshot == nil { + return fmt.Errorf("Rollback: snapshot not found") + } + + indexSnapshot, err := s.loadSnapshot(snapshot) + if err != nil { + return fmt.Errorf("Rollback: unable to load snapshot: %v", err) + } + + // add segments referenced by loaded index snapshot to the + // ineligibleForRemoval map + for _, segSnap := range indexSnapshot.segment { + filename := zapFileName(segSnap.id) + s.ineligibleForRemoval[filename] = true + } + + revert.snapshot = indexSnapshot + revert.applied = make(chan error) + + if !s.unsafeBatch { + revert.persisted = make(chan error) + } + + return nil + }) + + s.rootLock.Unlock() + + if err != nil { + return err + } + + // introduce the reversion + s.revertToSnapshots <- revert + + // block until this snapshot is applied + err = <-revert.applied + if err != nil { + return fmt.Errorf("Rollback: failed with err: %v", err) + } + + if revert.persisted != nil { + err = <-revert.persisted + } + + return err +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go new file mode 100644 index 0000000000..5e64cb1f2f --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go @@ -0,0 +1,229 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "sync" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index/scorch/segment" +) + +var TermSeparator byte = 0xff + +var TermSeparatorSplitSlice = []byte{TermSeparator} + +type SegmentDictionarySnapshot struct { + s *SegmentSnapshot + d segment.TermDictionary +} + +func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { + // TODO: if except is non-nil, perhaps need to OR it with s.s.deleted? + return s.d.PostingsList(term, s.s.deleted) +} + +func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator { + return s.d.Iterator() +} + +func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator { + return s.d.PrefixIterator(prefix) +} + +func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator { + return s.d.RangeIterator(start, end) +} + +type SegmentSnapshot struct { + id uint64 + segment segment.Segment + deleted *roaring.Bitmap + + cachedDocs *cachedDocs +} + +func (s *SegmentSnapshot) Segment() segment.Segment { + return s.segment +} + +func (s *SegmentSnapshot) Deleted() *roaring.Bitmap { + return s.deleted +} + +func (s *SegmentSnapshot) Id() uint64 { + return s.id +} + +func (s *SegmentSnapshot) FullSize() int64 { + return int64(s.segment.Count()) +} + +func (s SegmentSnapshot) LiveSize() int64 { + return int64(s.Count()) +} + +func (s *SegmentSnapshot) Close() error { + return s.segment.Close() +} + +func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { + return s.segment.VisitDocument(num, visitor) +} + +func (s *SegmentSnapshot) Count() uint64 { + + rv := s.segment.Count() + if s.deleted != nil { + rv -= s.deleted.GetCardinality() + } + return rv +} + +func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) { + d, err := s.segment.Dictionary(field) + if err != nil { + return nil, err + } + return &SegmentDictionarySnapshot{ + s: s, + d: d, + }, nil +} + +func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) { + rv, err := s.segment.DocNumbers(docIDs) + if err != nil { + return nil, err + } + if s.deleted != nil { + rv.AndNot(s.deleted) + } + return rv, nil +} + +// DocNumbersLive returns bitsit containing doc numbers for all live docs +func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap { + rv := roaring.NewBitmap() + rv.AddRange(0, s.segment.Count()) + if s.deleted != nil { + rv.AndNot(s.deleted) + } + return rv +} + +func (s *SegmentSnapshot) Fields() []string { + return s.segment.Fields() +} + +type cachedFieldDocs struct { + readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. + err error // Non-nil if there was an error when preparing this cachedFieldDocs. + docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. +} + +func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) { + defer close(cfd.readyCh) + + dict, err := ss.segment.Dictionary(field) + if err != nil { + cfd.err = err + return + } + + dictItr := dict.Iterator() + next, err := dictItr.Next() + for err == nil && next != nil { + postings, err1 := dict.PostingsList(next.Term, nil) + if err1 != nil { + cfd.err = err1 + return + } + + postingsItr := postings.Iterator() + nextPosting, err2 := postingsItr.Next() + for err2 == nil && nextPosting != nil { + docNum := nextPosting.Number() + cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) + cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator) + nextPosting, err2 = postingsItr.Next() + } + + if err2 != nil { + cfd.err = err2 + return + } + + next, err = dictItr.Next() + } + + if err != nil { + cfd.err = err + return + } +} + +type cachedDocs struct { + m sync.Mutex // As the cache is asynchronously prepared, need a lock + cache map[string]*cachedFieldDocs // Keyed by field +} + +func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { + c.m.Lock() + if c.cache == nil { + c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields())) + } + + for _, field := range wantedFields { + _, exists := c.cache[field] + if !exists { + c.cache[field] = &cachedFieldDocs{ + readyCh: make(chan struct{}), + docs: make(map[uint64][]byte), + } + + go c.cache[field].prepareFields(field, ss) + } + } + + for _, field := range wantedFields { + cachedFieldDocs := c.cache[field] + c.m.Unlock() + <-cachedFieldDocs.readyCh + + if cachedFieldDocs.err != nil { + return cachedFieldDocs.err + } + c.m.Lock() + } + + c.m.Unlock() + return nil +} + +func (c *cachedDocs) sizeInBytes() uint64 { + sizeInBytes := 0 + c.m.Lock() + for k, v := range c.cache { // cachedFieldDocs + sizeInBytes += len(k) + if v != nil { + for _, entry := range v.docs { // docs + sizeInBytes += 8 /* size of uint64 */ + len(entry) + } + } + } + c.m.Unlock() + return uint64(sizeInBytes) +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go new file mode 100644 index 0000000000..c44a977bfd --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go @@ -0,0 +1,78 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scorch + +import ( + "encoding/json" + "io/ioutil" + "sync/atomic" +) + +// Stats tracks statistics about the index +type Stats struct { + updates, deletes, batches, errors uint64 + analysisTime, indexTime uint64 + termSearchersStarted uint64 + termSearchersFinished uint64 + numPlainTextBytesIndexed uint64 + numItemsIntroduced uint64 + numItemsPersisted uint64 + i *Scorch +} + +func (s *Stats) statsMap() (map[string]interface{}, error) { + m := map[string]interface{}{} + m["updates"] = atomic.LoadUint64(&s.updates) + m["deletes"] = atomic.LoadUint64(&s.deletes) + m["batches"] = atomic.LoadUint64(&s.batches) + m["errors"] = atomic.LoadUint64(&s.errors) + m["analysis_time"] = atomic.LoadUint64(&s.analysisTime) + m["index_time"] = atomic.LoadUint64(&s.indexTime) + m["term_searchers_started"] = atomic.LoadUint64(&s.termSearchersStarted) + m["term_searchers_finished"] = atomic.LoadUint64(&s.termSearchersFinished) + m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&s.numPlainTextBytesIndexed) + m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced) + m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted) + + if s.i.path != "" { + finfos, err := ioutil.ReadDir(s.i.path) + if err != nil { + return nil, err + } + + var numFilesOnDisk, numBytesUsedDisk uint64 + + for _, finfo := range finfos { + if !finfo.IsDir() { + numBytesUsedDisk += uint64(finfo.Size()) + numFilesOnDisk++ + } + } + + m["num_bytes_used_disk"] = numBytesUsedDisk + m["num_files_on_disk"] = numFilesOnDisk + } + + return m, nil +} + +// MarshalJSON implements json.Marshaler +func (s *Stats) MarshalJSON() ([]byte, error) { + m, err := s.statsMap() + if err != nil { + return nil, err + } + return json.Marshal(m) +} diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go index 76032bf58c..1f40c02ded 100644 --- a/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go +++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go @@ -190,15 +190,18 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDo } func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) { + // we don't actually own the list of ids, so if before we sort we must copy + idsCopy := make([]string, len(ids)) + copy(idsCopy, ids) // ensure ids are sorted - sort.Strings(ids) + sort.Strings(idsCopy) startBytes := []byte{0x0} - if len(ids) > 0 { - startBytes = []byte(ids[0]) + if len(idsCopy) > 0 { + startBytes = []byte(idsCopy[0]) } endBytes := []byte{0xff} - if len(ids) > 0 { - endBytes = incrementBytes([]byte(ids[len(ids)-1])) + if len(idsCopy) > 0 { + endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1])) } bisr := NewBackIndexRow(startBytes, nil, nil) bier := NewBackIndexRow(endBytes, nil, nil) @@ -207,7 +210,7 @@ func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) ( return &UpsideDownCouchDocIDReader{ indexReader: indexReader, iterator: it, - only: ids, + only: idsCopy, onlyMode: true, }, nil } diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go index a8ef538eed..1243375b76 100644 --- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go +++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go @@ -293,7 +293,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi } func (udc *UpsideDownCouch) Open() (err error) { - //acquire the write mutex for the duratin of Open() + // acquire the write mutex for the duration of Open() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() diff --git a/vendor/github.com/blevesearch/bleve/mapping/document.go b/vendor/github.com/blevesearch/bleve/mapping/document.go index d62675e52a..6ec0c66bb2 100644 --- a/vendor/github.com/blevesearch/bleve/mapping/document.go +++ b/vendor/github.com/blevesearch/bleve/mapping/document.go @@ -179,6 +179,7 @@ OUTER: continue OUTER } } + break } return current } @@ -503,7 +504,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, } dm.walkDocument(property, path, indexes, context) } - case reflect.Map: + case reflect.Map, reflect.Slice: if subDocMapping != nil { for _, fieldMapping := range subDocMapping.Fields { if fieldMapping.Type == "geopoint" { diff --git a/vendor/github.com/blevesearch/bleve/mapping/field.go b/vendor/github.com/blevesearch/bleve/mapping/field.go index 9f1928ca5b..278faa1a92 100644 --- a/vendor/github.com/blevesearch/bleve/mapping/field.go +++ b/vendor/github.com/blevesearch/bleve/mapping/field.go @@ -26,8 +26,9 @@ import ( // control the default behavior for dynamic fields (those not explicitly mapped) var ( - IndexDynamic = true - StoreDynamic = true + IndexDynamic = true + StoreDynamic = true + DocValuesDynamic = true // TODO revisit default? ) // A FieldMapping describes how a specific item @@ -54,6 +55,10 @@ type FieldMapping struct { IncludeTermVectors bool `json:"include_term_vectors,omitempty"` IncludeInAll bool `json:"include_in_all,omitempty"` DateFormat string `json:"date_format,omitempty"` + + // DocValues, if true makes the index uninverting possible for this field + // It is useful for faceting and sorting queries. + DocValues bool `json:"docvalues,omitempty"` } // NewTextFieldMapping returns a default field mapping for text @@ -64,6 +69,7 @@ func NewTextFieldMapping() *FieldMapping { Index: true, IncludeTermVectors: true, IncludeInAll: true, + DocValues: true, } } @@ -71,6 +77,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { rv := NewTextFieldMapping() rv.Store = im.StoreDynamic rv.Index = im.IndexDynamic + rv.DocValues = im.DocValuesDynamic return rv } @@ -81,6 +88,7 @@ func NewNumericFieldMapping() *FieldMapping { Store: true, Index: true, IncludeInAll: true, + DocValues: true, } } @@ -88,6 +96,7 @@ func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { rv := NewNumericFieldMapping() rv.Store = im.StoreDynamic rv.Index = im.IndexDynamic + rv.DocValues = im.DocValuesDynamic return rv } @@ -98,6 +107,7 @@ func NewDateTimeFieldMapping() *FieldMapping { Store: true, Index: true, IncludeInAll: true, + DocValues: true, } } @@ -105,6 +115,7 @@ func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { rv := NewDateTimeFieldMapping() rv.Store = im.StoreDynamic rv.Index = im.IndexDynamic + rv.DocValues = im.DocValuesDynamic return rv } @@ -115,6 +126,7 @@ func NewBooleanFieldMapping() *FieldMapping { Store: true, Index: true, IncludeInAll: true, + DocValues: true, } } @@ -122,6 +134,7 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { rv := NewBooleanFieldMapping() rv.Store = im.StoreDynamic rv.Index = im.IndexDynamic + rv.DocValues = im.DocValuesDynamic return rv } @@ -132,6 +145,7 @@ func NewGeoPointFieldMapping() *FieldMapping { Store: true, Index: true, IncludeInAll: true, + DocValues: true, } } @@ -147,6 +161,9 @@ func (fm *FieldMapping) Options() document.IndexingOptions { if fm.IncludeTermVectors { rv |= document.IncludeTermVectors } + if fm.DocValues { + rv |= document.DocValues + } return rv } @@ -308,6 +325,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error { if err != nil { return err } + case "docvalues": + err := json.Unmarshal(v, &fm.DocValues) + if err != nil { + return err + } default: invalidKeys = append(invalidKeys, k) } diff --git a/vendor/github.com/blevesearch/bleve/mapping/index.go b/vendor/github.com/blevesearch/bleve/mapping/index.go index 86100cfa86..fc5d12a73a 100644 --- a/vendor/github.com/blevesearch/bleve/mapping/index.go +++ b/vendor/github.com/blevesearch/bleve/mapping/index.go @@ -50,6 +50,7 @@ type IndexMappingImpl struct { DefaultField string `json:"default_field"` StoreDynamic bool `json:"store_dynamic"` IndexDynamic bool `json:"index_dynamic"` + DocValuesDynamic bool `json:"docvalues_dynamic,omitempty"` CustomAnalysis *customAnalysis `json:"analysis,omitempty"` cache *registry.Cache } @@ -154,6 +155,7 @@ func NewIndexMapping() *IndexMappingImpl { DefaultField: defaultField, IndexDynamic: IndexDynamic, StoreDynamic: StoreDynamic, + DocValuesDynamic: DocValuesDynamic, CustomAnalysis: newCustomAnalysis(), cache: registry.NewCache(), } @@ -217,6 +219,7 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { im.TypeMapping = make(map[string]*DocumentMapping) im.StoreDynamic = StoreDynamic im.IndexDynamic = IndexDynamic + im.DocValuesDynamic = DocValuesDynamic var invalidKeys []string for k, v := range tmp { @@ -271,6 +274,11 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { if err != nil { return err } + case "docvalues_dynamic": + err := json.Unmarshal(v, &im.DocValuesDynamic) + if err != nil { + return err + } default: invalidKeys = append(invalidKeys, k) } @@ -318,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{} // see if the _all field was disabled allMapping := docMapping.documentMappingForPath("_all") - if allMapping == nil || (allMapping.Enabled != false) { + if allMapping == nil || allMapping.Enabled { field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors) doc.AddField(field) } @@ -339,7 +347,7 @@ func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentM doc: doc, im: im, dm: dm, - excludedFromAll: []string{}, + excludedFromAll: []string{"_id"}, } } diff --git a/vendor/github.com/blevesearch/bleve/query.go b/vendor/github.com/blevesearch/bleve/query.go index 1fecfa25c8..523db5ec05 100644 --- a/vendor/github.com/blevesearch/bleve/query.go +++ b/vendor/github.com/blevesearch/bleve/query.go @@ -209,8 +209,8 @@ func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightL return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat) } -// NewGeoDistanceQuery creates a new Query for performing geo bounding -// box searches. The arguments describe a position and a distance. Documents +// NewGeoDistanceQuery creates a new Query for performing geo distance +// searches. The arguments describe a position and a distance. Documents // which have an indexed geo point which is less than or equal to the provided // distance from the given position will be returned. func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery { diff --git a/vendor/github.com/blevesearch/bleve/search.go b/vendor/github.com/blevesearch/bleve/search.go index c2ebafbb50..46d849c1b1 100644 --- a/vendor/github.com/blevesearch/bleve/search.go +++ b/vendor/github.com/blevesearch/bleve/search.go @@ -481,5 +481,10 @@ func (sr *SearchResult) Merge(other *SearchResult) { if other.MaxScore > sr.MaxScore { sr.MaxScore = other.MaxScore } + if sr.Facets == nil && len(other.Facets) != 0 { + sr.Facets = other.Facets + return + } + sr.Facets.Merge(other.Facets) } diff --git a/vendor/github.com/blevesearch/bleve/search/search.go b/vendor/github.com/blevesearch/bleve/search/search.go index cbbcfbfd66..f9a92783b7 100644 --- a/vendor/github.com/blevesearch/bleve/search/search.go +++ b/vendor/github.com/blevesearch/bleve/search/search.go @@ -37,12 +37,12 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { type Location struct { // Pos is the position of the term within the field, starting at 1 - Pos uint64 `json:"pos"` - + Pos uint64 `json:"pos"` + // Start and End are the byte offsets of the term in the field - Start uint64 `json:"start"` - End uint64 `json:"end"` - + Start uint64 `json:"start"` + End uint64 `json:"end"` + // ArrayPositions contains the positions of the term within any elements. ArrayPositions ArrayPositions `json:"array_positions"` } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go index 9ab0e7fa4c..73fba19cd0 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go @@ -57,25 +57,25 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S func (s *ConjunctionSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 - for _, termSearcher := range s.searchers { - sumOfSquaredWeights += termSearcher.Weight() + for _, searcher := range s.searchers { + sumOfSquaredWeights += searcher.Weight() } // now compute query norm from this s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) // finally tell all the downstream searchers the norm - for _, termSearcher := range s.searchers { - termSearcher.SetQueryNorm(s.queryNorm) + for _, searcher := range s.searchers { + searcher.SetQueryNorm(s.queryNorm) } } func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error { var err error // get all searchers pointing at their first match - for i, termSearcher := range s.searchers { + for i, searcher := range s.searchers { if s.currs[i] != nil { ctx.DocumentMatchPool.Put(s.currs[i]) } - s.currs[i], err = termSearcher.Next(ctx) + s.currs[i], err = searcher.Next(ctx) if err != nil { return err } @@ -160,11 +160,11 @@ OUTER: // we know all the searchers are pointing at the same thing // so they all need to be bumped - for i, termSearcher := range s.searchers { + for i, searcher := range s.searchers { if s.currs[i] != rv { ctx.DocumentMatchPool.Put(s.currs[i]) } - s.currs[i], err = termSearcher.Next(ctx) + s.currs[i], err = searcher.Next(ctx) if err != nil { return nil, err } @@ -184,6 +184,9 @@ func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexI } } for i := range s.searchers { + if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 { + continue + } err := s.advanceChild(ctx, i, ID) if err != nil { return nil, err diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go index 96bd544747..b6910ddb67 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go @@ -93,25 +93,25 @@ func newDisjunctionSearcher(indexReader index.IndexReader, func (s *DisjunctionSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 - for _, termSearcher := range s.searchers { - sumOfSquaredWeights += termSearcher.Weight() + for _, searcher := range s.searchers { + sumOfSquaredWeights += searcher.Weight() } // now compute query norm from this s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) // finally tell all the downstream searchers the norm - for _, termSearcher := range s.searchers { - termSearcher.SetQueryNorm(s.queryNorm) + for _, searcher := range s.searchers { + searcher.SetQueryNorm(s.queryNorm) } } func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error { var err error // get all searchers pointing at their first match - for i, termSearcher := range s.searchers { + for i, searcher := range s.searchers { if s.currs[i] != nil { ctx.DocumentMatchPool.Put(s.currs[i]) } - s.currs[i], err = termSearcher.Next(ctx) + s.currs[i], err = searcher.Next(ctx) if err != nil { return err } @@ -221,11 +221,14 @@ func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, } // get all searchers pointing at their first match var err error - for i, termSearcher := range s.searchers { + for i, searcher := range s.searchers { if s.currs[i] != nil { + if s.currs[i].IndexInternalID.Compare(ID) >= 0 { + continue + } ctx.DocumentMatchPool.Put(s.currs[i]) } - s.currs[i], err = termSearcher.Advance(ctx, ID) + s.currs[i], err = searcher.Advance(ctx, ID) if err != nil { return nil, err } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go index e3fa0895d7..6237cecfd3 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go @@ -226,6 +226,10 @@ type phrasePart struct { loc *search.Location } +func (p *phrasePart) String() string { + return fmt.Sprintf("[%s %v]", p.term, p.loc) +} + type phrasePath []*phrasePart func (p phrasePath) MergeInto(in search.TermLocationMap) { @@ -309,6 +313,15 @@ func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexIntern return nil, err } } + if s.currMust != nil { + if s.currMust.IndexInternalID.Compare(ID) >= 0 { + return s.Next(ctx) + } + ctx.DocumentMatchPool.Put(s.currMust) + } + if s.currMust == nil { + return nil, nil + } var err error s.currMust, err = s.mustSearcher.Advance(ctx, ID) if err != nil { diff --git a/vendor/github.com/couchbase/vellum/CONTRIBUTING.md b/vendor/github.com/couchbase/vellum/CONTRIBUTING.md new file mode 100644 index 0000000000..b85ec82b6b --- /dev/null +++ b/vendor/github.com/couchbase/vellum/CONTRIBUTING.md @@ -0,0 +1,16 @@ +# Contributing to Vellum + +We look forward to your contributions, but ask that you first review these guidelines. + +### Sign the CLA + +As Vellum is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Vellum project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. + +### Submitting a Pull Request + +All types of contributions are welcome, but please keep the following in mind: + +- If you're planning a large change, you should really discuss it in a github issue first. This helps avoid duplicate effort and spending time on something that may not be merged. +- Existing tests should continue to pass, new tests for the contribution are nice to have. +- All code should have gone through `go fmt` +- All code should pass `go vet` diff --git a/vendor/github.com/couchbase/vellum/LICENSE b/vendor/github.com/couchbase/vellum/LICENSE new file mode 100644 index 0000000000..7a4a3ea242 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.
\ No newline at end of file diff --git a/vendor/github.com/couchbase/vellum/README.md b/vendor/github.com/couchbase/vellum/README.md new file mode 100644 index 0000000000..0c0759a9b5 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/README.md @@ -0,0 +1,168 @@ +# ![vellum](docs/logo.png) vellum + +[![Build Status](https://travis-ci.org/couchbase/vellum.svg?branch=master)](https://travis-ci.org/couchbase/vellum) +[![Coverage Status](https://coveralls.io/repos/github/couchbase/vellum/badge.svg?branch=master)](https://coveralls.io/github/couchbase/vellum?branch=master) +[![GoDoc](https://godoc.org/github.com/couchbase/vellum?status.svg)](https://godoc.org/github.com/couchbase/vellum) +[![Go Report Card](https://goreportcard.com/badge/github.com/couchbase/vellum)](https://goreportcard.com/report/github.com/couchbase/vellum) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +A Go library implementing an FST (finite state transducer) capable of: + - mapping between keys ([]byte) and a value (uint64) + - enumerating keys in lexicographic order + +Some additional goals of this implementation: + - bounded memory use while building the FST + - streaming out FST data while building + - mmap FST runtime to support very large FTSs (optional) + +## Usage + +### Building an FST + +To build an FST, create a new builder using the `New()` method. This method takes an `io.Writer` as an argument. As the FST is being built, data will be streamed to the writer as soon as possible. With this builder you **MUST** insert keys in lexicographic order. Inserting keys out of order will result in an error. After inserting the last key into the builder, you **MUST** call `Close()` on the builder. This will flush all remaining data to the underlying writer. + +In memory: +```go + var buf bytes.Buffer + builder, err := vellum.New(&buf, nil) + if err != nil { + log.Fatal(err) + } +``` + +To disk: +```go + f, err := os.Create("/tmp/vellum.fst") + if err != nil { + log.Fatal(err) + } + builder, err := vellum.New(f, nil) + if err != nil { + log.Fatal(err) + } +``` + +**MUST** insert keys in lexicographic order: +```go +err = builder.Insert([]byte("cat"), 1) +if err != nil { + log.Fatal(err) +} + +err = builder.Insert([]byte("dog"), 2) +if err != nil { + log.Fatal(err) +} + +err = builder.Insert([]byte("fish"), 3) +if err != nil { + log.Fatal(err) +} + +err = builder.Close() +if err != nil { + log.Fatal(err) +} +``` + +### Using an FST + +After closing the builder, the data can be used to instantiate an FST. If the data was written to disk, you can use the `Open()` method to mmap the file. If the data is already in memory, or you wish to load/mmap the data yourself, you can instantiate the FST with the `Load()` method. + +Load in memory: +```go + fst, err := vellum.Load(buf.Bytes()) + if err != nil { + log.Fatal(err) + } +``` + +Open from disk: +```go + fst, err := vellum.Open("/tmp/vellum.fst") + if err != nil { + log.Fatal(err) + } +``` + +Get key/value: +```go + val, exists, err = fst.Get([]byte("dog")) + if err != nil { + log.Fatal(err) + } + if exists { + fmt.Printf("contains dog with val: %d\n", val) + } else { + fmt.Printf("does not contain dog") + } +``` + +Iterate key/values: +```go + itr, err := fst.Iterator(startKeyInclusive, endKeyExclusive) + for err == nil { + key, val := itr.Current() + fmt.Printf("contains key: %s val: %d", key, val) + err = itr.Next() + } + if err != nil { + log.Fatal(err) + } +``` + +### How does the FST get built? + +A full example of the implementation is beyond the scope of this README, but let's consider a small example where we want to insert 3 key/value pairs. + +First we insert "are" with the value 4. + +![step1](docs/demo1.png) + +Next, we insert "ate" with the value 2. + +![step2](docs/demo2.png) + +Notice how the values associated with the transitions were adjusted so that by summing them while traversing we still get the expected value. + +At this point, we see that state 5 looks like state 3, and state 4 looks like state 2. But, we cannot yet combine them because future inserts could change this. + +Now, we insert "see" with value 3. Once it has been added, we now know that states 5 and 4 can longer change. Since they are identical to 3 and 2, we replace them. + +![step3](docs/demo3.png) + +Again, we see that states 7 and 8 appear to be identical to 2 and 3. + +Having inserted our last key, we call `Close()` on the builder. + +![step4](docs/demo4.png) + +Now, states 7 and 8 can safely be replaced with 2 and 3. + +For additional information, see the references at the bottom of this document. + +### What does the serialized format look like? + +We've broken out a separate document on the [vellum disk format v1](docs/format.md). + +### What if I want to use this on a system that doesn't have mmap? + +The mmap library itself is guarded with system/architecture build tags, but we've also added an additional build tag in vellum. If you'd like to Open() a file based representation of an FST, but not use mmap, you can build the library with the `nommap` build tag. NOTE: if you do this, the entire FST will be read into memory. + +### Can I use this with Unicode strings? + +Yes, however this implementation is only aware of the byte representation you choose. In order to find matches, you must work with some canonical byte representation of the string. In the future, some encoding-aware traversals may be possible on top of the lower-level byte transitions. + +### How did this library come to be? + +In my work on the [Bleve](https://github.com/blevesearch/bleve) project I became aware of the power of the FST for many search-related tasks. The obvious starting point for such a thing in Go was the [mafsa](https://github.com/smartystreets/mafsa) project. While working with mafsa I encountered some issues. First, it did not stream data to disk while building. Second, it chose to use a rune as the fundamental unit of transition in the FST, but I felt using a byte would be more powerful in the end. My hope is that higher-level encoding-aware traversals will be possible when necessary. Finally, as I reported bugs and submitted PRs I learned that the mafsa project was mainly a research project and no longer being maintained. I wanted to build something that could be used in production. As the project advanced more and more techniques from the [BurntSushi/fst](https://github.com/BurntSushi/fst) were adapted to our implementation. + +## Related Work + +Much credit goes to two existing projects: + - [mafsa](https://github.com/smartystreets/mafsa) + - [BurntSushi/fst](https://github.com/BurntSushi/fst) + +Most of the original implementation here started with my digging into the internals of mafsa. As the implementation progressed, I continued to borrow ideas/approaches from the BurntSushi/fst library as well. + +For a great introduction to this topic, please read the blog post [Index 1,600,000,000 Keys with Automata and Rust](http://blog.burntsushi.net/transducers/) diff --git a/vendor/github.com/couchbase/vellum/automaton.go b/vendor/github.com/couchbase/vellum/automaton.go new file mode 100644 index 0000000000..47526595bc --- /dev/null +++ b/vendor/github.com/couchbase/vellum/automaton.go @@ -0,0 +1,85 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +// Automaton represents the general contract of a byte-based finite automaton +type Automaton interface { + + // Start returns the start state + Start() int + + // IsMatch returns true if and only if the state is a match + IsMatch(int) bool + + // CanMatch returns true if and only if it is possible to reach a match + // in zero or more steps + CanMatch(int) bool + + // WillAlwaysMatch returns true if and only if the current state matches + // and will always match no matter what steps are taken + WillAlwaysMatch(int) bool + + // Accept returns the next state given the input to the specified state + Accept(int, byte) int +} + +// AutomatonContains implements an generic Contains() method which works +// on any implementation of Automaton +func AutomatonContains(a Automaton, k []byte) bool { + i := 0 + curr := a.Start() + for a.CanMatch(curr) && i < len(k) { + curr = a.Accept(curr, k[i]) + if curr == noneAddr { + break + } + i++ + } + if i != len(k) { + return false + } + return a.IsMatch(curr) +} + +// AlwaysMatch is an Automaton implementation which always matches +type AlwaysMatch struct{} + +// Start returns the AlwaysMatch start state +func (m *AlwaysMatch) Start() int { + return 0 +} + +// IsMatch always returns true +func (m *AlwaysMatch) IsMatch(int) bool { + return true +} + +// CanMatch always returns true +func (m *AlwaysMatch) CanMatch(int) bool { + return true +} + +// WillAlwaysMatch always returns true +func (m *AlwaysMatch) WillAlwaysMatch(int) bool { + return true +} + +// Accept returns the next AlwaysMatch state +func (m *AlwaysMatch) Accept(int, byte) int { + return 0 +} + +// creating an alwaysMatchAutomaton to avoid unnecesary repeated allocations. +var alwaysMatchAutomaton = &AlwaysMatch{} diff --git a/vendor/github.com/couchbase/vellum/builder.go b/vendor/github.com/couchbase/vellum/builder.go new file mode 100644 index 0000000000..b21db98072 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/builder.go @@ -0,0 +1,453 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "bytes" + "io" +) + +var defaultBuilderOpts = &BuilderOpts{ + Encoder: 1, + RegistryTableSize: 10000, + RegistryMRUSize: 2, +} + +// A Builder is used to build a new FST. When possible data is +// streamed out to the underlying Writer as soon as possible. +type Builder struct { + unfinished *unfinishedNodes + registry *registry + last []byte + len int + + lastAddr int + + encoder encoder + opts *BuilderOpts + + builderNodePool builderNodePool + transitionPool transitionPool +} + +const noneAddr = 1 +const emptyAddr = 0 + +// NewBuilder returns a new Builder which will stream out the +// underlying representation to the provided Writer as the set is built. +func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) { + if opts == nil { + opts = defaultBuilderOpts + } + rv := &Builder{ + registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize), + opts: opts, + lastAddr: noneAddr, + } + rv.unfinished = newUnfinishedNodes(&rv.builderNodePool) + + var err error + rv.encoder, err = loadEncoder(opts.Encoder, w) + if err != nil { + return nil, err + } + err = rv.encoder.start() + if err != nil { + return nil, err + } + return rv, nil +} + +func (b *Builder) Reset(w io.Writer) error { + b.transitionPool.reset() + b.builderNodePool.reset() + b.unfinished.Reset(&b.builderNodePool) + b.registry.Reset() + b.lastAddr = noneAddr + b.encoder.reset(w) + b.last = nil + b.len = 0 + + err := b.encoder.start() + if err != nil { + return err + } + return nil +} + +// Insert the provided value to the set being built. +// NOTE: values must be inserted in lexicographical order. +func (b *Builder) Insert(key []byte, val uint64) error { + // ensure items are added in lexicographic order + if bytes.Compare(key, b.last) < 0 { + return ErrOutOfOrder + } + if len(key) == 0 { + b.len = 1 + b.unfinished.setRootOutput(val) + return nil + } + + prefixLen, out := b.unfinished.findCommonPrefixAndSetOutput(key, val) + b.len++ + err := b.compileFrom(prefixLen) + if err != nil { + return err + } + b.copyLastKey(key) + b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool) + + return nil +} + +func (b *Builder) copyLastKey(key []byte) { + if b.last == nil { + b.last = make([]byte, 0, 64) + } else { + b.last = b.last[:0] + } + b.last = append(b.last, key...) +} + +// Close MUST be called after inserting all values. +func (b *Builder) Close() error { + err := b.compileFrom(0) + if err != nil { + return err + } + root := b.unfinished.popRoot() + rootAddr, err := b.compile(root) + if err != nil { + return err + } + return b.encoder.finish(b.len, rootAddr) +} + +func (b *Builder) compileFrom(iState int) error { + addr := noneAddr + for iState+1 < len(b.unfinished.stack) { + var node *builderNode + if addr == noneAddr { + node = b.unfinished.popEmpty() + } else { + node = b.unfinished.popFreeze(addr, &b.transitionPool) + } + var err error + addr, err = b.compile(node) + if err != nil { + return nil + } + } + b.unfinished.topLastFreeze(addr, &b.transitionPool) + return nil +} + +func (b *Builder) compile(node *builderNode) (int, error) { + if node.final && len(node.trans) == 0 && + node.finalOutput == 0 { + return 0, nil + } + found, addr, entry := b.registry.entry(node) + if found { + return addr, nil + } + addr, err := b.encoder.encodeState(node, b.lastAddr) + if err != nil { + return 0, err + } + + b.lastAddr = addr + entry.addr = addr + return addr, nil +} + +type unfinishedNodes struct { + stack []*builderNodeUnfinished + + // cache allocates a reasonable number of builderNodeUnfinished + // objects up front and tries to keep reusing them + // because the main data structure is a stack, we assume the + // same access pattern, and don't track items separately + // this means calls get() and pushXYZ() must be paired, + // as well as calls put() and popXYZ() + cache []builderNodeUnfinished +} + +func (u *unfinishedNodes) Reset(p *builderNodePool) { + u.stack = u.stack[:0] + for i := 0; i < len(u.cache); i++ { + u.cache[i] = builderNodeUnfinished{} + } + u.pushEmpty(false, p) +} + +func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes { + rv := &unfinishedNodes{ + stack: make([]*builderNodeUnfinished, 0, 64), + cache: make([]builderNodeUnfinished, 64), + } + rv.pushEmpty(false, p) + return rv +} + +// get new builderNodeUnfinished, reusing cache if possible +func (u *unfinishedNodes) get() *builderNodeUnfinished { + if len(u.stack) < len(u.cache) { + return &u.cache[len(u.stack)] + } + // full now allocate a new one + return &builderNodeUnfinished{} +} + +// return builderNodeUnfinished, clearing it for reuse +func (u *unfinishedNodes) put() { + if len(u.stack) >= len(u.cache) { + return + // do nothing, not part of cache + } + u.cache[len(u.stack)] = builderNodeUnfinished{} +} + +func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte, + out uint64) (int, uint64) { + var i int + for i < len(key) { + if i >= len(u.stack) { + break + } + var addPrefix uint64 + if !u.stack[i].hasLastT { + break + } + if u.stack[i].lastIn == key[i] { + commonPre := outputPrefix(u.stack[i].lastOut, out) + addPrefix = outputSub(u.stack[i].lastOut, commonPre) + out = outputSub(out, commonPre) + u.stack[i].lastOut = commonPre + i++ + } else { + break + } + + if addPrefix != 0 { + u.stack[i].addOutputPrefix(addPrefix) + } + } + + return i, out +} + +func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) { + next := u.get() + next.node = p.alloc() + next.node.final = final + u.stack = append(u.stack, next) +} + +func (u *unfinishedNodes) popRoot() *builderNode { + l := len(u.stack) + var unfinished *builderNodeUnfinished + u.stack, unfinished = u.stack[:l-1], u.stack[l-1] + rv := unfinished.node + u.put() + return rv +} + +func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode { + l := len(u.stack) + var unfinished *builderNodeUnfinished + u.stack, unfinished = u.stack[:l-1], u.stack[l-1] + unfinished.lastCompiled(addr, tp) + rv := unfinished.node + u.put() + return rv +} + +func (u *unfinishedNodes) popEmpty() *builderNode { + l := len(u.stack) + var unfinished *builderNodeUnfinished + u.stack, unfinished = u.stack[:l-1], u.stack[l-1] + rv := unfinished.node + u.put() + return rv +} + +func (u *unfinishedNodes) setRootOutput(out uint64) { + u.stack[0].node.final = true + u.stack[0].node.finalOutput = out +} + +func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) { + last := len(u.stack) - 1 + u.stack[last].lastCompiled(addr, tp) +} + +func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) { + if len(bs) == 0 { + return + } + last := len(u.stack) - 1 + u.stack[last].hasLastT = true + u.stack[last].lastIn = bs[0] + u.stack[last].lastOut = out + for _, b := range bs[1:] { + next := u.get() + next.node = p.alloc() + next.hasLastT = true + next.lastIn = b + next.lastOut = 0 + u.stack = append(u.stack, next) + } + u.pushEmpty(true, p) +} + +type builderNodeUnfinished struct { + node *builderNode + lastOut uint64 + lastIn byte + hasLastT bool +} + +func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) { + if b.hasLastT { + transIn := b.lastIn + transOut := b.lastOut + b.hasLastT = false + b.lastOut = 0 + trans := tp.alloc() + trans.in = transIn + trans.out = transOut + trans.addr = addr + b.node.trans = append(b.node.trans, trans) + } +} + +func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) { + if b.node.final { + b.node.finalOutput = outputCat(prefix, b.node.finalOutput) + } + for _, t := range b.node.trans { + t.out = outputCat(prefix, t.out) + } + if b.hasLastT { + b.lastOut = outputCat(prefix, b.lastOut) + } +} + +type builderNode struct { + finalOutput uint64 + trans []*transition + final bool +} + +func (n *builderNode) equiv(o *builderNode) bool { + if n.final != o.final { + return false + } + if n.finalOutput != o.finalOutput { + return false + } + if len(n.trans) != len(o.trans) { + return false + } + for i, ntrans := range n.trans { + otrans := o.trans[i] + if ntrans.in != otrans.in { + return false + } + if ntrans.addr != otrans.addr { + return false + } + if ntrans.out != otrans.out { + return false + } + } + return true +} + +type transition struct { + out uint64 + addr int + in byte +} + +func outputPrefix(l, r uint64) uint64 { + if l < r { + return l + } + return r +} + +func outputSub(l, r uint64) uint64 { + return l - r +} + +func outputCat(l, r uint64) uint64 { + return l + r +} + +// the next builderNode to alloc() will be all[nextOuter][nextInner] +type builderNodePool struct { + all [][]builderNode + nextOuter int + nextInner int +} + +func (p *builderNodePool) reset() { + p.nextOuter = 0 + p.nextInner = 0 +} + +func (p *builderNodePool) alloc() *builderNode { + if p.nextOuter >= len(p.all) { + p.all = append(p.all, make([]builderNode, 256)) + } + rv := &p.all[p.nextOuter][p.nextInner] + p.nextInner += 1 + if p.nextInner >= len(p.all[p.nextOuter]) { + p.nextOuter += 1 + p.nextInner = 0 + } + rv.finalOutput = 0 + rv.trans = rv.trans[:0] + rv.final = false + return rv +} + +// the next transition to alloc() will be all[nextOuter][nextInner] +type transitionPool struct { + all [][]transition + nextOuter int + nextInner int +} + +func (p *transitionPool) reset() { + p.nextOuter = 0 + p.nextInner = 0 +} + +func (p *transitionPool) alloc() *transition { + if p.nextOuter >= len(p.all) { + p.all = append(p.all, make([]transition, 256)) + } + rv := &p.all[p.nextOuter][p.nextInner] + p.nextInner += 1 + if p.nextInner >= len(p.all[p.nextOuter]) { + p.nextOuter += 1 + p.nextInner = 0 + } + *rv = transition{} + return rv +} diff --git a/vendor/github.com/couchbase/vellum/common.go b/vendor/github.com/couchbase/vellum/common.go new file mode 100644 index 0000000000..cd3e6a0d0b --- /dev/null +++ b/vendor/github.com/couchbase/vellum/common.go @@ -0,0 +1,547 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +const maxCommon = 1<<6 - 1 + +func encodeCommon(in byte) byte { + val := byte((int(commonInputs[in]) + 1) % 256) + if val > maxCommon { + return 0 + } + return val +} + +func decodeCommon(in byte) byte { + return commonInputsInv[in-1] +} + +var commonInputs = []byte{ + 84, // '\x00' + 85, // '\x01' + 86, // '\x02' + 87, // '\x03' + 88, // '\x04' + 89, // '\x05' + 90, // '\x06' + 91, // '\x07' + 92, // '\x08' + 93, // '\t' + 94, // '\n' + 95, // '\x0b' + 96, // '\x0c' + 97, // '\r' + 98, // '\x0e' + 99, // '\x0f' + 100, // '\x10' + 101, // '\x11' + 102, // '\x12' + 103, // '\x13' + 104, // '\x14' + 105, // '\x15' + 106, // '\x16' + 107, // '\x17' + 108, // '\x18' + 109, // '\x19' + 110, // '\x1a' + 111, // '\x1b' + 112, // '\x1c' + 113, // '\x1d' + 114, // '\x1e' + 115, // '\x1f' + 116, // ' ' + 80, // '!' + 117, // '"' + 118, // '#' + 79, // '$' + 39, // '%' + 30, // '&' + 81, // "'" + 75, // '(' + 74, // ')' + 82, // '*' + 57, // '+' + 66, // ',' + 16, // '-' + 12, // '.' + 2, // '/' + 19, // '0' + 20, // '1' + 21, // '2' + 27, // '3' + 32, // '4' + 29, // '5' + 35, // '6' + 36, // '7' + 37, // '8' + 34, // '9' + 24, // ':' + 73, // ';' + 119, // '<' + 23, // '=' + 120, // '>' + 40, // '?' + 83, // '@' + 44, // 'A' + 48, // 'B' + 42, // 'C' + 43, // 'D' + 49, // 'E' + 46, // 'F' + 62, // 'G' + 61, // 'H' + 47, // 'I' + 69, // 'J' + 68, // 'K' + 58, // 'L' + 56, // 'M' + 55, // 'N' + 59, // 'O' + 51, // 'P' + 72, // 'Q' + 54, // 'R' + 45, // 'S' + 52, // 'T' + 64, // 'U' + 65, // 'V' + 63, // 'W' + 71, // 'X' + 67, // 'Y' + 70, // 'Z' + 77, // '[' + 121, // '\\' + 78, // ']' + 122, // '^' + 31, // '_' + 123, // '`' + 4, // 'a' + 25, // 'b' + 9, // 'c' + 17, // 'd' + 1, // 'e' + 26, // 'f' + 22, // 'g' + 13, // 'h' + 7, // 'i' + 50, // 'j' + 38, // 'k' + 14, // 'l' + 15, // 'm' + 10, // 'n' + 3, // 'o' + 8, // 'p' + 60, // 'q' + 6, // 'r' + 5, // 's' + 0, // 't' + 18, // 'u' + 33, // 'v' + 11, // 'w' + 41, // 'x' + 28, // 'y' + 53, // 'z' + 124, // '{' + 125, // '|' + 126, // '}' + 76, // '~' + 127, // '\x7f' + 128, // '\x80' + 129, // '\x81' + 130, // '\x82' + 131, // '\x83' + 132, // '\x84' + 133, // '\x85' + 134, // '\x86' + 135, // '\x87' + 136, // '\x88' + 137, // '\x89' + 138, // '\x8a' + 139, // '\x8b' + 140, // '\x8c' + 141, // '\x8d' + 142, // '\x8e' + 143, // '\x8f' + 144, // '\x90' + 145, // '\x91' + 146, // '\x92' + 147, // '\x93' + 148, // '\x94' + 149, // '\x95' + 150, // '\x96' + 151, // '\x97' + 152, // '\x98' + 153, // '\x99' + 154, // '\x9a' + 155, // '\x9b' + 156, // '\x9c' + 157, // '\x9d' + 158, // '\x9e' + 159, // '\x9f' + 160, // '\xa0' + 161, // '¡' + 162, // '¢' + 163, // '£' + 164, // '¤' + 165, // '¥' + 166, // '¦' + 167, // '§' + 168, // '¨' + 169, // '©' + 170, // 'ª' + 171, // '«' + 172, // '¬' + 173, // '\xad' + 174, // '®' + 175, // '¯' + 176, // '°' + 177, // '±' + 178, // '²' + 179, // '³' + 180, // '´' + 181, // 'µ' + 182, // '¶' + 183, // '·' + 184, // '¸' + 185, // '¹' + 186, // 'º' + 187, // '»' + 188, // '¼' + 189, // '½' + 190, // '¾' + 191, // '¿' + 192, // 'À' + 193, // 'Á' + 194, // 'Â' + 195, // 'Ã' + 196, // 'Ä' + 197, // 'Å' + 198, // 'Æ' + 199, // 'Ç' + 200, // 'È' + 201, // 'É' + 202, // 'Ê' + 203, // 'Ë' + 204, // 'Ì' + 205, // 'Í' + 206, // 'Î' + 207, // 'Ï' + 208, // 'Ð' + 209, // 'Ñ' + 210, // 'Ò' + 211, // 'Ó' + 212, // 'Ô' + 213, // 'Õ' + 214, // 'Ö' + 215, // '×' + 216, // 'Ø' + 217, // 'Ù' + 218, // 'Ú' + 219, // 'Û' + 220, // 'Ü' + 221, // 'Ý' + 222, // 'Þ' + 223, // 'ß' + 224, // 'à' + 225, // 'á' + 226, // 'â' + 227, // 'ã' + 228, // 'ä' + 229, // 'å' + 230, // 'æ' + 231, // 'ç' + 232, // 'è' + 233, // 'é' + 234, // 'ê' + 235, // 'ë' + 236, // 'ì' + 237, // 'í' + 238, // 'î' + 239, // 'ï' + 240, // 'ð' + 241, // 'ñ' + 242, // 'ò' + 243, // 'ó' + 244, // 'ô' + 245, // 'õ' + 246, // 'ö' + 247, // '÷' + 248, // 'ø' + 249, // 'ù' + 250, // 'ú' + 251, // 'û' + 252, // 'ü' + 253, // 'ý' + 254, // 'þ' + 255, // 'ÿ' +} + +var commonInputsInv = []byte{ + 't', + 'e', + '/', + 'o', + 'a', + 's', + 'r', + 'i', + 'p', + 'c', + 'n', + 'w', + '.', + 'h', + 'l', + 'm', + '-', + 'd', + 'u', + '0', + '1', + '2', + 'g', + '=', + ':', + 'b', + 'f', + '3', + 'y', + '5', + '&', + '_', + '4', + 'v', + '9', + '6', + '7', + '8', + 'k', + '%', + '?', + 'x', + 'C', + 'D', + 'A', + 'S', + 'F', + 'I', + 'B', + 'E', + 'j', + 'P', + 'T', + 'z', + 'R', + 'N', + 'M', + '+', + 'L', + 'O', + 'q', + 'H', + 'G', + 'W', + 'U', + 'V', + ',', + 'Y', + 'K', + 'J', + 'Z', + 'X', + 'Q', + ';', + ')', + '(', + '~', + '[', + ']', + '$', + '!', + '\'', + '*', + '@', + '\x00', + '\x01', + '\x02', + '\x03', + '\x04', + '\x05', + '\x06', + '\x07', + '\x08', + '\t', + '\n', + '\x0b', + '\x0c', + '\r', + '\x0e', + '\x0f', + '\x10', + '\x11', + '\x12', + '\x13', + '\x14', + '\x15', + '\x16', + '\x17', + '\x18', + '\x19', + '\x1a', + '\x1b', + '\x1c', + '\x1d', + '\x1e', + '\x1f', + ' ', + '"', + '#', + '<', + '>', + '\\', + '^', + '`', + '{', + '|', + '}', + '\x7f', + '\x80', + '\x81', + '\x82', + '\x83', + '\x84', + '\x85', + '\x86', + '\x87', + '\x88', + '\x89', + '\x8a', + '\x8b', + '\x8c', + '\x8d', + '\x8e', + '\x8f', + '\x90', + '\x91', + '\x92', + '\x93', + '\x94', + '\x95', + '\x96', + '\x97', + '\x98', + '\x99', + '\x9a', + '\x9b', + '\x9c', + '\x9d', + '\x9e', + '\x9f', + '\xa0', + '\xa1', + '\xa2', + '\xa3', + '\xa4', + '\xa5', + '\xa6', + '\xa7', + '\xa8', + '\xa9', + '\xaa', + '\xab', + '\xac', + '\xad', + '\xae', + '\xaf', + '\xb0', + '\xb1', + '\xb2', + '\xb3', + '\xb4', + '\xb5', + '\xb6', + '\xb7', + '\xb8', + '\xb9', + '\xba', + '\xbb', + '\xbc', + '\xbd', + '\xbe', + '\xbf', + '\xc0', + '\xc1', + '\xc2', + '\xc3', + '\xc4', + '\xc5', + '\xc6', + '\xc7', + '\xc8', + '\xc9', + '\xca', + '\xcb', + '\xcc', + '\xcd', + '\xce', + '\xcf', + '\xd0', + '\xd1', + '\xd2', + '\xd3', + '\xd4', + '\xd5', + '\xd6', + '\xd7', + '\xd8', + '\xd9', + '\xda', + '\xdb', + '\xdc', + '\xdd', + '\xde', + '\xdf', + '\xe0', + '\xe1', + '\xe2', + '\xe3', + '\xe4', + '\xe5', + '\xe6', + '\xe7', + '\xe8', + '\xe9', + '\xea', + '\xeb', + '\xec', + '\xed', + '\xee', + '\xef', + '\xf0', + '\xf1', + '\xf2', + '\xf3', + '\xf4', + '\xf5', + '\xf6', + '\xf7', + '\xf8', + '\xf9', + '\xfa', + '\xfb', + '\xfc', + '\xfd', + '\xfe', + '\xff', +} diff --git a/vendor/github.com/couchbase/vellum/decoder_v1.go b/vendor/github.com/couchbase/vellum/decoder_v1.go new file mode 100644 index 0000000000..5a0ea68871 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/decoder_v1.go @@ -0,0 +1,316 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "bytes" + "encoding/binary" + "fmt" + "strconv" +) + +func init() { + registerDecoder(versionV1, func(data []byte) decoder { + return newDecoderV1(data) + }) +} + +type decoderV1 struct { + data []byte + root uint64 + len uint64 +} + +func newDecoderV1(data []byte) *decoderV1 { + return &decoderV1{ + data: data, + } +} + +func (d *decoderV1) getRoot() int { + if len(d.data) < footerSizeV1 { + return noneAddr + } + footer := d.data[len(d.data)-footerSizeV1:] + root := binary.LittleEndian.Uint64(footer[8:]) + return int(root) +} + +func (d *decoderV1) getLen() int { + if len(d.data) < footerSizeV1 { + return 0 + } + footer := d.data[len(d.data)-footerSizeV1:] + dlen := binary.LittleEndian.Uint64(footer) + return int(dlen) +} + +func (d *decoderV1) stateAt(addr int, prealloc fstState) (fstState, error) { + state, ok := prealloc.(*fstStateV1) + if ok && state != nil { + *state = fstStateV1{} // clear the struct + } else { + state = &fstStateV1{} + } + err := state.at(d.data, addr) + if err != nil { + return nil, err + } + return state, nil +} + +type fstStateV1 struct { + data []byte + top int + bottom int + numTrans int + + // single trans only + singleTransChar byte + singleTransNext bool + singleTransAddr uint64 + singleTransOut uint64 + + // shared + transSize int + outSize int + + // multiple trans only + final bool + transTop int + transBottom int + destTop int + destBottom int + outTop int + outBottom int + outFinal int +} + +func (f *fstStateV1) isEncodedSingle() bool { + if f.data[f.top]>>7 > 0 { + return true + } + return false +} + +func (f *fstStateV1) at(data []byte, addr int) error { + f.data = data + if addr == emptyAddr { + return f.atZero() + } else if addr == noneAddr { + return f.atNone() + } + if addr > len(data) || addr < 16 { + return fmt.Errorf("invalid address %d/%d", addr, len(data)) + } + f.top = addr + f.bottom = addr + if f.isEncodedSingle() { + return f.atSingle(data, addr) + } + return f.atMulti(data, addr) +} + +func (f *fstStateV1) atZero() error { + f.top = 0 + f.bottom = 1 + f.numTrans = 0 + f.final = true + f.outFinal = 0 + return nil +} + +func (f *fstStateV1) atNone() error { + f.top = 0 + f.bottom = 1 + f.numTrans = 0 + f.final = false + f.outFinal = 0 + return nil +} + +func (f *fstStateV1) atSingle(data []byte, addr int) error { + // handle single transition case + f.numTrans = 1 + f.singleTransNext = data[f.top]&transitionNext > 0 + f.singleTransChar = data[f.top] & maxCommon + if f.singleTransChar == 0 { + f.bottom-- // extra byte for uncommon + f.singleTransChar = data[f.bottom] + } else { + f.singleTransChar = decodeCommon(f.singleTransChar) + } + if f.singleTransNext { + // now we know the bottom, can compute next addr + f.singleTransAddr = uint64(f.bottom - 1) + f.singleTransOut = 0 + } else { + f.bottom-- // extra byte with pack sizes + f.transSize, f.outSize = decodePackSize(data[f.bottom]) + f.bottom -= f.transSize // exactly one trans + f.singleTransAddr = readPackedUint(data[f.bottom : f.bottom+f.transSize]) + if f.outSize > 0 { + f.bottom -= f.outSize // exactly one out (could be length 0 though) + f.singleTransOut = readPackedUint(data[f.bottom : f.bottom+f.outSize]) + } else { + f.singleTransOut = 0 + } + // need to wait till we know bottom + if f.singleTransAddr != 0 { + f.singleTransAddr = uint64(f.bottom) - f.singleTransAddr + } + } + return nil +} + +func (f *fstStateV1) atMulti(data []byte, addr int) error { + // handle multiple transitions case + f.final = data[f.top]&stateFinal > 0 + f.numTrans = int(data[f.top] & maxNumTrans) + if f.numTrans == 0 { + f.bottom-- // extra byte for number of trans + f.numTrans = int(data[f.bottom]) + if f.numTrans == 1 { + // can't really be 1 here, this is special case that means 256 + f.numTrans = 256 + } + } + f.bottom-- // extra byte with pack sizes + f.transSize, f.outSize = decodePackSize(data[f.bottom]) + + f.transTop = f.bottom + f.bottom -= f.numTrans // one byte for each transition + f.transBottom = f.bottom + + f.destTop = f.bottom + f.bottom -= f.numTrans * f.transSize + f.destBottom = f.bottom + + if f.outSize > 0 { + f.outTop = f.bottom + f.bottom -= f.numTrans * f.outSize + f.outBottom = f.bottom + if f.final { + f.bottom -= f.outSize + f.outFinal = f.bottom + } + } + return nil +} + +func (f *fstStateV1) Address() int { + return f.top +} + +func (f *fstStateV1) Final() bool { + return f.final +} + +func (f *fstStateV1) FinalOutput() uint64 { + if f.numTrans > 0 && f.final && f.outSize > 0 { + return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize]) + } + return 0 +} + +func (f *fstStateV1) NumTransitions() int { + return f.numTrans +} + +func (f *fstStateV1) TransitionAt(i int) byte { + if f.isEncodedSingle() { + return f.singleTransChar + } + transitionKeys := f.data[f.transBottom:f.transTop] + return transitionKeys[f.numTrans-i-1] +} + +func (f *fstStateV1) TransitionFor(b byte) (int, int, uint64) { + if f.isEncodedSingle() { + if f.singleTransChar == b { + return 0, int(f.singleTransAddr), f.singleTransOut + } + return -1, noneAddr, 0 + } + transitionKeys := f.data[f.transBottom:f.transTop] + pos := bytes.IndexByte(transitionKeys, b) + if pos < 0 { + return -1, noneAddr, 0 + } + transDests := f.data[f.destBottom:f.destTop] + dest := int(readPackedUint(transDests[pos*f.transSize : pos*f.transSize+f.transSize])) + if dest > 0 { + // convert delta + dest = f.bottom - dest + } + transVals := f.data[f.outBottom:f.outTop] + var out uint64 + if f.outSize > 0 { + out = readPackedUint(transVals[pos*f.outSize : pos*f.outSize+f.outSize]) + } + return f.numTrans - pos - 1, dest, out +} + +func (f *fstStateV1) String() string { + rv := "" + rv += fmt.Sprintf("State: %d (%#x)", f.top, f.top) + if f.final { + rv += " final" + fout := f.FinalOutput() + if fout != 0 { + rv += fmt.Sprintf(" (%d)", fout) + } + } + rv += "\n" + rv += fmt.Sprintf("Data: % x\n", f.data[f.bottom:f.top+1]) + + for i := 0; i < f.numTrans; i++ { + transChar := f.TransitionAt(i) + _, transDest, transOut := f.TransitionFor(transChar) + rv += fmt.Sprintf(" - %d (%#x) '%s' ---> %d (%#x) with output: %d", transChar, transChar, string(transChar), transDest, transDest, transOut) + rv += "\n" + } + if f.numTrans == 0 { + rv += "\n" + } + return rv +} + +func (f *fstStateV1) DotString(num int) string { + rv := "" + label := fmt.Sprintf("%d", num) + final := "" + if f.final { + final = ",peripheries=2" + } + rv += fmt.Sprintf(" %d [label=\"%s\"%s];\n", f.top, label, final) + + for i := 0; i < f.numTrans; i++ { + transChar := f.TransitionAt(i) + _, transDest, transOut := f.TransitionFor(transChar) + out := "" + if transOut != 0 { + out = fmt.Sprintf("/%d", transOut) + } + rv += fmt.Sprintf(" %d -> %d [label=\"%s%s\"];\n", f.top, transDest, escapeInput(transChar), out) + } + + return rv +} + +func escapeInput(b byte) string { + x := strconv.AppendQuoteRune(nil, rune(b)) + return string(x[1:(len(x) - 1)]) +} diff --git a/vendor/github.com/couchbase/vellum/encoder_v1.go b/vendor/github.com/couchbase/vellum/encoder_v1.go new file mode 100644 index 0000000000..0651fc8614 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/encoder_v1.go @@ -0,0 +1,227 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "encoding/binary" + "fmt" + "io" +) + +const versionV1 = 1 +const oneTransition = 1 << 7 +const transitionNext = 1 << 6 +const stateFinal = 1 << 6 +const footerSizeV1 = 16 + +func init() { + registerEncoder(versionV1, func(w io.Writer) encoder { + return newEncoderV1(w) + }) +} + +type encoderV1 struct { + bw *writer +} + +func newEncoderV1(w io.Writer) *encoderV1 { + return &encoderV1{ + bw: newWriter(w), + } +} + +func (e *encoderV1) reset(w io.Writer) { + e.bw.Reset(w) +} + +func (e *encoderV1) start() error { + header := make([]byte, headerSize) + binary.LittleEndian.PutUint64(header, versionV1) + binary.LittleEndian.PutUint64(header[8:], uint64(0)) // type + n, err := e.bw.Write(header) + if err != nil { + return err + } + if n != headerSize { + return fmt.Errorf("short write of header %d/%d", n, headerSize) + } + return nil +} + +func (e *encoderV1) encodeState(s *builderNode, lastAddr int) (int, error) { + if len(s.trans) == 0 && s.final && s.finalOutput == 0 { + return 0, nil + } else if len(s.trans) != 1 || s.final { + return e.encodeStateMany(s) + } else if !s.final && s.trans[0].out == 0 && s.trans[0].addr == lastAddr { + return e.encodeStateOneFinish(s, transitionNext) + } + return e.encodeStateOne(s) +} + +func (e *encoderV1) encodeStateOne(s *builderNode) (int, error) { + start := uint64(e.bw.counter) + outPackSize := 0 + if s.trans[0].out != 0 { + outPackSize = packedSize(s.trans[0].out) + err := e.bw.WritePackedUintIn(s.trans[0].out, outPackSize) + if err != nil { + return 0, err + } + } + delta := deltaAddr(start, uint64(s.trans[0].addr)) + transPackSize := packedSize(delta) + err := e.bw.WritePackedUintIn(delta, transPackSize) + if err != nil { + return 0, err + } + + packSize := encodePackSize(transPackSize, outPackSize) + err = e.bw.WriteByte(packSize) + if err != nil { + return 0, err + } + + return e.encodeStateOneFinish(s, 0) +} + +func (e *encoderV1) encodeStateOneFinish(s *builderNode, next byte) (int, error) { + enc := encodeCommon(s.trans[0].in) + + // not a common input + if enc == 0 { + err := e.bw.WriteByte(s.trans[0].in) + if err != nil { + return 0, err + } + } + err := e.bw.WriteByte(oneTransition | next | enc) + if err != nil { + return 0, err + } + + return e.bw.counter - 1, nil +} + +func (e *encoderV1) encodeStateMany(s *builderNode) (int, error) { + start := uint64(e.bw.counter) + transPackSize := 0 + outPackSize := packedSize(s.finalOutput) + anyOutputs := s.finalOutput != 0 + for i := range s.trans { + delta := deltaAddr(start, uint64(s.trans[i].addr)) + tsize := packedSize(delta) + if tsize > transPackSize { + transPackSize = tsize + } + osize := packedSize(s.trans[i].out) + if osize > outPackSize { + outPackSize = osize + } + anyOutputs = anyOutputs || s.trans[i].out != 0 + } + if !anyOutputs { + outPackSize = 0 + } + + if anyOutputs { + // output final value + if s.final { + err := e.bw.WritePackedUintIn(s.finalOutput, outPackSize) + if err != nil { + return 0, err + } + } + // output transition values (in reverse) + for j := len(s.trans) - 1; j >= 0; j-- { + err := e.bw.WritePackedUintIn(s.trans[j].out, outPackSize) + if err != nil { + return 0, err + } + } + } + + // output transition dests (in reverse) + for j := len(s.trans) - 1; j >= 0; j-- { + delta := deltaAddr(start, uint64(s.trans[j].addr)) + err := e.bw.WritePackedUintIn(delta, transPackSize) + if err != nil { + return 0, err + } + } + + // output transition keys (in reverse) + for j := len(s.trans) - 1; j >= 0; j-- { + err := e.bw.WriteByte(s.trans[j].in) + if err != nil { + return 0, err + } + } + + packSize := encodePackSize(transPackSize, outPackSize) + err := e.bw.WriteByte(packSize) + if err != nil { + return 0, err + } + + numTrans := encodeNumTrans(len(s.trans)) + + // if number of transitions wont fit in edge header byte + // write out separately + if numTrans == 0 { + if len(s.trans) == 256 { + // this wouldn't fit in single byte, but reuse value 1 + // which would have always fit in the edge header instead + err = e.bw.WriteByte(1) + if err != nil { + return 0, err + } + } else { + err = e.bw.WriteByte(byte(len(s.trans))) + if err != nil { + return 0, err + } + } + } + + // finally write edge header + if s.final { + numTrans |= stateFinal + } + err = e.bw.WriteByte(numTrans) + if err != nil { + return 0, err + } + + return e.bw.counter - 1, nil +} + +func (e *encoderV1) finish(count, rootAddr int) error { + footer := make([]byte, footerSizeV1) + binary.LittleEndian.PutUint64(footer, uint64(count)) // root addr + binary.LittleEndian.PutUint64(footer[8:], uint64(rootAddr)) // root addr + n, err := e.bw.Write(footer) + if err != nil { + return err + } + if n != footerSizeV1 { + return fmt.Errorf("short write of footer %d/%d", n, footerSizeV1) + } + err = e.bw.Flush() + if err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/couchbase/vellum/encoding.go b/vendor/github.com/couchbase/vellum/encoding.go new file mode 100644 index 0000000000..988d486499 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/encoding.go @@ -0,0 +1,87 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "encoding/binary" + "fmt" + "io" +) + +const headerSize = 16 + +type encoderConstructor func(w io.Writer) encoder +type decoderConstructor func([]byte) decoder + +var encoders = map[int]encoderConstructor{} +var decoders = map[int]decoderConstructor{} + +type encoder interface { + start() error + encodeState(s *builderNode, addr int) (int, error) + finish(count, rootAddr int) error + reset(w io.Writer) +} + +func loadEncoder(ver int, w io.Writer) (encoder, error) { + if cons, ok := encoders[ver]; ok { + return cons(w), nil + } + return nil, fmt.Errorf("no encoder for version %d registered", ver) +} + +func registerEncoder(ver int, cons encoderConstructor) { + encoders[ver] = cons +} + +type decoder interface { + getRoot() int + getLen() int + stateAt(addr int, prealloc fstState) (fstState, error) +} + +func loadDecoder(ver int, data []byte) (decoder, error) { + if cons, ok := decoders[ver]; ok { + return cons(data), nil + } + return nil, fmt.Errorf("no decoder for version %d registered", ver) +} + +func registerDecoder(ver int, cons decoderConstructor) { + decoders[ver] = cons +} + +func decodeHeader(header []byte) (ver int, typ int, err error) { + if len(header) < headerSize { + err = fmt.Errorf("invalid header < 16 bytes") + return + } + ver = int(binary.LittleEndian.Uint64(header[0:8])) + typ = int(binary.LittleEndian.Uint64(header[8:16])) + return +} + +// fstState represents a state inside the FTS runtime +// It is the main contract between the FST impl and the decoder +// The FST impl should work only with this interface, while only the decoder +// impl knows the physical representation. +type fstState interface { + Address() int + Final() bool + FinalOutput() uint64 + NumTransitions() int + TransitionFor(b byte) (int, int, uint64) + TransitionAt(i int) byte +} diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go new file mode 100644 index 0000000000..ecc528395c --- /dev/null +++ b/vendor/github.com/couchbase/vellum/fst.go @@ -0,0 +1,254 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "io" + + "github.com/willf/bitset" +) + +// FST is an in-memory representation of a finite state transducer, +// capable of returning the uint64 value associated with +// each []byte key stored, as well as enumerating all of the keys +// in order. +type FST struct { + f io.Closer + ver int + len int + typ int + data []byte + decoder decoder +} + +func new(data []byte, f io.Closer) (rv *FST, err error) { + rv = &FST{ + data: data, + f: f, + } + + rv.ver, rv.typ, err = decodeHeader(data) + if err != nil { + return nil, err + } + + rv.decoder, err = loadDecoder(rv.ver, rv.data) + if err != nil { + return nil, err + } + + rv.len = rv.decoder.getLen() + + return rv, nil +} + +// Contains returns true if this FST contains the specified key. +func (f *FST) Contains(val []byte) (bool, error) { + _, exists, err := f.Get(val) + return exists, err +} + +// Get returns the value associated with the key. NOTE: a value of zero +// does not imply the key does not exist, you must consult the second +// return value as well. +func (f *FST) Get(input []byte) (uint64, bool, error) { + return f.get(input, nil) +} + +func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { + var total uint64 + curr := f.decoder.getRoot() + state, err := f.decoder.stateAt(curr, prealloc) + if err != nil { + return 0, false, err + } + for i := range input { + _, curr, output := state.TransitionFor(input[i]) + if curr == noneAddr { + return 0, false, nil + } + + state, err = f.decoder.stateAt(curr, state) + if err != nil { + return 0, false, err + } + + total += output + } + + if state.Final() { + total += state.FinalOutput() + return total, true, nil + } + return 0, false, nil +} + +// Version returns the encoding version used by this FST instance. +func (f *FST) Version() int { + return f.ver +} + +// Len returns the number of entries in this FST instance. +func (f *FST) Len() int { + return f.len +} + +// Type returns the type of this FST instance. +func (f *FST) Type() int { + return f.typ +} + +// Close will unmap any mmap'd data (if managed by vellum) and it will close +// the backing file (if managed by vellum). You MUST call Close() for any +// FST instance that is created. +func (f *FST) Close() error { + if f.f != nil { + err := f.f.Close() + if err != nil { + return err + } + } + f.data = nil + f.decoder = nil + return nil +} + +// Start returns the start state of this Automaton +func (f *FST) Start() int { + return f.decoder.getRoot() +} + +// IsMatch returns if this state is a matching state in this Automaton +func (f *FST) IsMatch(addr int) bool { + match, _ := f.IsMatchWithVal(addr) + return match +} + +// CanMatch returns if this state can ever transition to a matching state +// in this Automaton +func (f *FST) CanMatch(addr int) bool { + if addr == noneAddr { + return false + } + return true +} + +// WillAlwaysMatch returns if from this state the Automaton will always +// be in a matching state +func (f *FST) WillAlwaysMatch(int) bool { + return false +} + +// Accept returns the next state for this Automaton on input of byte b +func (f *FST) Accept(addr int, b byte) int { + next, _ := f.AcceptWithVal(addr, b) + return next +} + +// IsMatchWithVal returns if this state is a matching state in this Automaton +// and also returns the final output value for this state +func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { + s, err := f.decoder.stateAt(addr, nil) + if err != nil { + return false, 0 + } + return s.Final(), s.FinalOutput() +} + +// AcceptWithVal returns the next state for this Automaton on input of byte b +// and also returns the output value for the transition +func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { + s, err := f.decoder.stateAt(addr, nil) + if err != nil { + return noneAddr, 0 + } + _, next, output := s.TransitionFor(b) + return next, output +} + +// Iterator returns a new Iterator capable of enumerating the key/value pairs +// between the provided startKeyInclusive and endKeyExclusive. +func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { + return newIterator(f, startKeyInclusive, endKeyExclusive, nil) +} + +// Search returns a new Iterator capable of enumerating the key/value pairs +// between the provided startKeyInclusive and endKeyExclusive that also +// satisfy the provided automaton. +func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { + return newIterator(f, startKeyInclusive, endKeyExclusive, aut) +} + +// Debug is only intended for debug purposes, it simply asks the underlying +// decoder visit each state, and pass it to the provided callback. +func (f *FST) Debug(callback func(int, interface{}) error) error { + + addr := f.decoder.getRoot() + set := bitset.New(uint(addr)) + stack := addrStack{addr} + + stateNumber := 0 + stack, addr = stack[:len(stack)-1], stack[len(stack)-1] + for addr != noneAddr { + if set.Test(uint(addr)) { + stack, addr = stack.Pop() + continue + } + set.Set(uint(addr)) + state, err := f.decoder.stateAt(addr, nil) + if err != nil { + return err + } + err = callback(stateNumber, state) + if err != nil { + return err + } + for i := 0; i < state.NumTransitions(); i++ { + tchar := state.TransitionAt(i) + _, dest, _ := state.TransitionFor(tchar) + stack = append(stack, dest) + } + stateNumber++ + stack, addr = stack.Pop() + } + + return nil +} + +type addrStack []int + +func (a addrStack) Pop() (addrStack, int) { + l := len(a) + if l < 1 { + return a, noneAddr + } + return a[:l-1], a[l-1] +} + +// Reader() returns a Reader instance that a single thread may use to +// retrieve data from the FST +func (f *FST) Reader() (*Reader, error) { + return &Reader{f: f}, nil +} + +// A Reader is meant for a single threaded use +type Reader struct { + f *FST + prealloc fstStateV1 +} + +func (r *Reader) Get(input []byte) (uint64, bool, error) { + return r.f.get(input, &r.prealloc) +} diff --git a/vendor/github.com/couchbase/vellum/fst_iterator.go b/vendor/github.com/couchbase/vellum/fst_iterator.go new file mode 100644 index 0000000000..389ac64aab --- /dev/null +++ b/vendor/github.com/couchbase/vellum/fst_iterator.go @@ -0,0 +1,276 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "bytes" +) + +// Iterator represents a means of visity key/value pairs in order. +type Iterator interface { + + // Current() returns the key/value pair currently pointed to. + // The []byte of the key is ONLY guaranteed to be valid until + // another call to Next/Seek/Close. If you need it beyond that + // point you MUST make a copy. + Current() ([]byte, uint64) + + // Next() advances the iterator to the next key/value pair. + // If no more key/value pairs exist, ErrIteratorDone is returned. + Next() error + + // Seek() advances the iterator the specified key, or the next key + // if it does not exist. + // If no keys exist after that point, ErrIteratorDone is returned. + Seek(key []byte) error + + // Reset resets the Iterator' internal state to allow for iterator + // reuse (e.g. pooling). + Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error + + // Close() frees any resources held by this iterator. + Close() error +} + +// FSTIterator is a structure for iterating key/value pairs in this FST in +// lexicographic order. Iterators should be constructed with the FSTIterator +// method on the parent FST structure. +type FSTIterator struct { + f *FST + aut Automaton + + startKeyInclusive []byte + endKeyExclusive []byte + + statesStack []fstState + keysStack []byte + keysPosStack []int + valsStack []uint64 + autStatesStack []int + + nextStart []byte +} + +func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte, + aut Automaton) (*FSTIterator, error) { + + rv := &FSTIterator{} + err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut) + if err != nil { + return nil, err + } + return rv, nil +} + +// Reset resets the Iterator' internal state to allow for iterator +// reuse (e.g. pooling). +func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error { + if aut == nil { + aut = alwaysMatchAutomaton + } + + i.f = f + i.startKeyInclusive = startKeyInclusive + i.endKeyExclusive = endKeyExclusive + i.aut = aut + + return i.pointTo(startKeyInclusive) +} + +// pointTo attempts to point us to the specified location +func (i *FSTIterator) pointTo(key []byte) error { + + // tried to seek before start + if bytes.Compare(key, i.startKeyInclusive) < 0 { + key = i.startKeyInclusive + } + + // trid to see past end + if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 { + key = i.endKeyExclusive + } + + // reset any state, pointTo always starts over + i.statesStack = i.statesStack[:0] + i.keysStack = i.keysStack[:0] + i.keysPosStack = i.keysPosStack[:0] + i.valsStack = i.valsStack[:0] + i.autStatesStack = i.autStatesStack[:0] + + root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil) + if err != nil { + return err + } + + autStart := i.aut.Start() + + maxQ := -1 + // root is always part of the path + i.statesStack = append(i.statesStack, root) + i.autStatesStack = append(i.autStatesStack, autStart) + for j := 0; j < len(key); j++ { + curr := i.statesStack[len(i.statesStack)-1] + autCurr := i.autStatesStack[len(i.autStatesStack)-1] + + pos, nextAddr, nextVal := curr.TransitionFor(key[j]) + if nextAddr == noneAddr { + // needed transition doesn't exist + // find last trans before the one we needed + for q := 0; q < curr.NumTransitions(); q++ { + if curr.TransitionAt(q) < key[j] { + maxQ = q + } + } + break + } + autNext := i.aut.Accept(autCurr, key[j]) + + next, err := i.f.decoder.stateAt(nextAddr, nil) + if err != nil { + return err + } + + i.statesStack = append(i.statesStack, next) + i.keysStack = append(i.keysStack, key[j]) + i.keysPosStack = append(i.keysPosStack, pos) + i.valsStack = append(i.valsStack, nextVal) + i.autStatesStack = append(i.autStatesStack, autNext) + continue + } + + if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 { + return i.next(maxQ) + } + + return nil +} + +// Current returns the key and value currently pointed to by the iterator. +// If the iterator is not pointing at a valid value (because Iterator/Next/Seek) +// returned an error previously, it may return nil,0. +func (i *FSTIterator) Current() ([]byte, uint64) { + curr := i.statesStack[len(i.statesStack)-1] + if curr.Final() { + var total uint64 + for _, v := range i.valsStack { + total += v + } + total += curr.FinalOutput() + return i.keysStack, total + } + return nil, 0 +} + +// Next advances this iterator to the next key/value pair. If there is none +// or the advancement goes beyond the configured endKeyExclusive, then +// ErrIteratorDone is returned. +func (i *FSTIterator) Next() error { + return i.next(-1) +} + +func (i *FSTIterator) next(lastOffset int) error { + + // remember where we started + if cap(i.nextStart) < len(i.keysStack) { + i.nextStart = make([]byte, len(i.keysStack)) + } else { + i.nextStart = i.nextStart[0:len(i.keysStack)] + } + copy(i.nextStart, i.keysStack) + + for true { + curr := i.statesStack[len(i.statesStack)-1] + autCurr := i.autStatesStack[len(i.autStatesStack)-1] + + if curr.Final() && i.aut.IsMatch(autCurr) && + bytes.Compare(i.keysStack, i.nextStart) > 0 { + // in final state greater than start key + return nil + } + + nextOffset := lastOffset + 1 + if nextOffset < curr.NumTransitions() { + t := curr.TransitionAt(nextOffset) + autNext := i.aut.Accept(autCurr, t) + if i.aut.CanMatch(autNext) { + pos, nextAddr, v := curr.TransitionFor(t) + + // the next slot in the statesStack might have an + // fstState instance that we can reuse + var nextPrealloc fstState + if len(i.statesStack) < cap(i.statesStack) { + nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)] + } + + // push onto stack + next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc) + if err != nil { + return err + } + i.statesStack = append(i.statesStack, next) + i.keysStack = append(i.keysStack, t) + i.keysPosStack = append(i.keysPosStack, pos) + i.valsStack = append(i.valsStack, v) + i.autStatesStack = append(i.autStatesStack, autNext) + lastOffset = -1 + + // check to see if new keystack might have gone too far + if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { + return ErrIteratorDone + } + } else { + lastOffset = nextOffset + } + + continue + } + + if len(i.statesStack) > 1 { + // no transitions, and still room to pop + i.statesStack = i.statesStack[:len(i.statesStack)-1] + i.keysStack = i.keysStack[:len(i.keysStack)-1] + lastOffset = i.keysPosStack[len(i.keysPosStack)-1] + + i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1] + i.valsStack = i.valsStack[:len(i.valsStack)-1] + i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] + continue + } else { + // stack len is 1 (root), can't go back further, we're done + break + } + + } + + return ErrIteratorDone +} + +// Seek advances this iterator to the specified key/value pair. If this key +// is not in the FST, Current() will return the next largest key. If this +// seek operation would go past the last key, or outside the configured +// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned. +func (i *FSTIterator) Seek(key []byte) error { + err := i.pointTo(key) + if err != nil { + return err + } + return nil +} + +// Close will free any resources held by this iterator. +func (i *FSTIterator) Close() error { + // at the moment we don't do anything, but wanted this for API completeness + return nil +} diff --git a/vendor/github.com/couchbase/vellum/merge_iterator.go b/vendor/github.com/couchbase/vellum/merge_iterator.go new file mode 100644 index 0000000000..f00f7783e1 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/merge_iterator.go @@ -0,0 +1,188 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "bytes" +) + +// MergeFunc is used to choose the new value for a key when merging a slice +// of iterators, and the same key is observed with multiple values. +// Values presented to the MergeFunc will be in the same order as the +// original slice creating the MergeIterator. This allows some MergeFunc +// implementations to prioritize one iterator over another. +type MergeFunc func([]uint64) uint64 + +// MergeIterator implements the Iterator interface by traversing a slice +// of iterators and merging the contents of them. If the same key exists +// in mulitipe underlying iterators, a user-provided MergeFunc will be +// invoked to choose the new value. +type MergeIterator struct { + itrs []Iterator + f MergeFunc + currKs [][]byte + currVs []uint64 + + lowK []byte + lowV uint64 + lowIdxs []int + + mergeV []uint64 +} + +// NewMergeIterator creates a new MergeIterator over the provided slice of +// Iterators and with the specified MergeFunc to resolve duplicate keys. +func NewMergeIterator(itrs []Iterator, f MergeFunc) (*MergeIterator, error) { + rv := &MergeIterator{ + itrs: itrs, + f: f, + currKs: make([][]byte, len(itrs)), + currVs: make([]uint64, len(itrs)), + lowIdxs: make([]int, 0, len(itrs)), + mergeV: make([]uint64, 0, len(itrs)), + } + rv.init() + if rv.lowK == nil { + return rv, ErrIteratorDone + } + return rv, nil +} + +func (m *MergeIterator) init() { + for i, itr := range m.itrs { + m.currKs[i], m.currVs[i] = itr.Current() + } + m.updateMatches() +} + +func (m *MergeIterator) updateMatches() { + if len(m.itrs) < 1 { + return + } + m.lowK = m.currKs[0] + m.lowIdxs = m.lowIdxs[:0] + m.lowIdxs = append(m.lowIdxs, 0) + for i := 1; i < len(m.itrs); i++ { + if m.currKs[i] == nil { + continue + } + cmp := bytes.Compare(m.currKs[i], m.lowK) + if m.lowK == nil || cmp < 0 { + // reached a new low + m.lowK = m.currKs[i] + m.lowIdxs = m.lowIdxs[:0] + m.lowIdxs = append(m.lowIdxs, i) + } else if cmp == 0 { + m.lowIdxs = append(m.lowIdxs, i) + } + } + if len(m.lowIdxs) > 1 { + // merge multiple values + m.mergeV = m.mergeV[:0] + for _, vi := range m.lowIdxs { + m.mergeV = append(m.mergeV, m.currVs[vi]) + } + m.lowV = m.f(m.mergeV) + } else if len(m.lowIdxs) == 1 { + m.lowV = m.currVs[m.lowIdxs[0]] + } +} + +// Current returns the key and value currently pointed to by this iterator. +// If the iterator is not pointing at a valid value (because Iterator/Next/Seek) +// returned an error previously, it may return nil,0. +func (m *MergeIterator) Current() ([]byte, uint64) { + return m.lowK, m.lowV +} + +// Next advances this iterator to the next key/value pair. If there is none, +// then ErrIteratorDone is returned. +func (m *MergeIterator) Next() error { + // move all the current low iterators to next + for _, vi := range m.lowIdxs { + err := m.itrs[vi].Next() + if err != nil && err != ErrIteratorDone { + return err + } + m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current() + } + m.updateMatches() + if m.lowK == nil { + return ErrIteratorDone + } + return nil +} + +// Seek advances this iterator to the specified key/value pair. If this key +// is not in the FST, Current() will return the next largest key. If this +// seek operation would go past the last key, then ErrIteratorDone is returned. +func (m *MergeIterator) Seek(key []byte) error { + for i := range m.itrs { + err := m.itrs[i].Seek(key) + if err != nil && err != ErrIteratorDone { + return err + } + } + m.updateMatches() + if m.lowK == nil { + return ErrIteratorDone + } + return nil +} + +// Close will attempt to close all the underlying Iterators. If any errors +// are encountered, the first will be returned. +func (m *MergeIterator) Close() error { + var rv error + for i := range m.itrs { + // close all iterators, return first error if any + err := m.itrs[i].Close() + if rv == nil { + rv = err + } + } + return rv +} + +// MergeMin chooses the minimum value +func MergeMin(vals []uint64) uint64 { + rv := vals[0] + for _, v := range vals[1:] { + if v < rv { + rv = v + } + } + return rv +} + +// MergeMax chooses the maximum value +func MergeMax(vals []uint64) uint64 { + rv := vals[0] + for _, v := range vals[1:] { + if v > rv { + rv = v + } + } + return rv +} + +// MergeSum sums the values +func MergeSum(vals []uint64) uint64 { + rv := vals[0] + for _, v := range vals[1:] { + rv += v + } + return rv +} diff --git a/vendor/github.com/couchbase/vellum/pack.go b/vendor/github.com/couchbase/vellum/pack.go new file mode 100644 index 0000000000..78f3dcd588 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/pack.go @@ -0,0 +1,55 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +func deltaAddr(base, trans uint64) uint64 { + // transition dest of 0 is special case + if trans == 0 { + return 0 + } + return base - trans +} + +const packOutMask = 1<<4 - 1 + +func encodePackSize(transSize, outSize int) byte { + var rv byte + rv = byte(transSize << 4) + rv |= byte(outSize) + return rv +} + +func decodePackSize(pack byte) (transSize int, packSize int) { + transSize = int(pack >> 4) + packSize = int(pack & packOutMask) + return +} + +const maxNumTrans = 1<<6 - 1 + +func encodeNumTrans(n int) byte { + if n <= maxNumTrans { + return byte(n) + } + return 0 +} + +func readPackedUint(data []byte) (rv uint64) { + for i := range data { + shifted := uint64(data[i]) << uint(i*8) + rv |= shifted + } + return +} diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go new file mode 100644 index 0000000000..6922b749db --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/compile.go @@ -0,0 +1,316 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "regexp/syntax" + "unicode" + + "github.com/couchbase/vellum/utf8" +) + +type compiler struct { + sizeLimit uint + insts prog +} + +func newCompiler(sizeLimit uint) *compiler { + return &compiler{ + sizeLimit: sizeLimit, + } +} + +func (c *compiler) compile(ast *syntax.Regexp) (prog, error) { + err := c.c(ast) + if err != nil { + return nil, err + } + c.insts = append(c.insts, &inst{ + op: OpMatch, + }) + return c.insts, nil +} + +func (c *compiler) c(ast *syntax.Regexp) error { + if ast.Flags&syntax.NonGreedy > 1 { + return ErrNoLazy + } + + switch ast.Op { + case syntax.OpEndLine, syntax.OpBeginLine, + syntax.OpBeginText, syntax.OpEndText: + return ErrNoEmpty + case syntax.OpWordBoundary, syntax.OpNoWordBoundary: + return ErrNoWordBoundary + case syntax.OpEmptyMatch: + return nil + case syntax.OpLiteral: + for _, r := range ast.Rune { + if ast.Flags&syntax.FoldCase > 0 { + next := syntax.Regexp{ + Op: syntax.OpCharClass, + Flags: ast.Flags & syntax.FoldCase, + Rune0: [2]rune{r, r}, + } + next.Rune = next.Rune0[0:2] + return c.c(&next) + } + seqs, err := utf8.NewSequences(r, r) + if err != nil { + return err + } + for _, seq := range seqs { + c.compileUtf8Ranges(seq) + } + } + case syntax.OpAnyChar: + next := syntax.Regexp{ + Op: syntax.OpCharClass, + Flags: ast.Flags & syntax.FoldCase, + Rune0: [2]rune{0, unicode.MaxRune}, + } + next.Rune = next.Rune0[:2] + return c.c(&next) + case syntax.OpAnyCharNotNL: + next := syntax.Regexp{ + Op: syntax.OpCharClass, + Flags: ast.Flags & syntax.FoldCase, + Rune: []rune{0, 0x09, 0x0B, unicode.MaxRune}, + } + return c.c(&next) + case syntax.OpCharClass: + return c.compileClass(ast) + case syntax.OpCapture: + return c.c(ast.Sub[0]) + case syntax.OpConcat: + for _, sub := range ast.Sub { + err := c.c(sub) + if err != nil { + return err + } + } + return nil + case syntax.OpAlternate: + if len(ast.Sub) == 0 { + return nil + } + jmpsToEnd := []uint{} + + // does not handle last entry + for i := 0; i < len(ast.Sub)-1; i++ { + sub := ast.Sub[i] + split := c.emptySplit() + j1 := c.top() + err := c.c(sub) + if err != nil { + return err + } + jmpsToEnd = append(jmpsToEnd, c.emptyJump()) + j2 := c.top() + c.setSplit(split, j1, j2) + } + // handle last entry + err := c.c(ast.Sub[len(ast.Sub)-1]) + if err != nil { + return err + } + end := uint(len(c.insts)) + for _, jmpToEnd := range jmpsToEnd { + c.setJump(jmpToEnd, end) + } + case syntax.OpQuest: + split := c.emptySplit() + j1 := c.top() + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + j2 := c.top() + c.setSplit(split, j1, j2) + + case syntax.OpStar: + j1 := c.top() + split := c.emptySplit() + j2 := c.top() + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + jmp := c.emptyJump() + j3 := uint(len(c.insts)) + + c.setJump(jmp, j1) + c.setSplit(split, j2, j3) + + case syntax.OpPlus: + j1 := c.top() + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + split := c.emptySplit() + j2 := c.top() + c.setSplit(split, j1, j2) + + case syntax.OpRepeat: + if ast.Max == -1 { + for i := 0; i < ast.Min; i++ { + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + } + next := syntax.Regexp{ + Op: syntax.OpStar, + Flags: ast.Flags, + Sub: ast.Sub, + Sub0: ast.Sub0, + Rune: ast.Rune, + Rune0: ast.Rune0, + } + return c.c(&next) + } + for i := 0; i < ast.Min; i++ { + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + } + var splits, starts []uint + for i := ast.Min; i < ast.Max; i++ { + splits = append(splits, c.emptySplit()) + starts = append(starts, uint(len(c.insts))) + err := c.c(ast.Sub[0]) + if err != nil { + return err + } + } + end := uint(len(c.insts)) + for i := 0; i < len(splits); i++ { + c.setSplit(splits[i], starts[i], end) + } + + } + + return c.checkSize() +} + +func (c *compiler) checkSize() error { + if uint(len(c.insts)*instSize) > c.sizeLimit { + return ErrCompiledTooBig + } + return nil +} + +func (c *compiler) compileClass(ast *syntax.Regexp) error { + if len(ast.Rune) == 0 { + return nil + } + var jmps []uint + + // does not do last pair + for i := 0; i < len(ast.Rune)-2; i += 2 { + rstart := ast.Rune[i] + rend := ast.Rune[i+1] + + split := c.emptySplit() + j1 := c.top() + err := c.compileClassRange(rstart, rend) + if err != nil { + return err + } + jmps = append(jmps, c.emptyJump()) + j2 := c.top() + c.setSplit(split, j1, j2) + } + // handle last pair + rstart := ast.Rune[len(ast.Rune)-2] + rend := ast.Rune[len(ast.Rune)-1] + err := c.compileClassRange(rstart, rend) + if err != nil { + return err + } + end := c.top() + for _, jmp := range jmps { + c.setJump(jmp, end) + } + return nil +} + +func (c *compiler) compileClassRange(startR, endR rune) error { + seqs, err := utf8.NewSequences(startR, endR) + if err != nil { + return err + } + var jmps []uint + + // does not do last entry + for i := 0; i < len(seqs)-1; i++ { + seq := seqs[i] + split := c.emptySplit() + j1 := c.top() + c.compileUtf8Ranges(seq) + jmps = append(jmps, c.emptyJump()) + j2 := c.top() + c.setSplit(split, j1, j2) + } + // handle last entry + c.compileUtf8Ranges(seqs[len(seqs)-1]) + end := c.top() + for _, jmp := range jmps { + c.setJump(jmp, end) + } + + return nil +} + +func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) { + for _, r := range seq { + c.insts = append(c.insts, &inst{ + op: OpRange, + rangeStart: r.Start, + rangeEnd: r.End, + }) + } +} + +func (c *compiler) emptySplit() uint { + c.insts = append(c.insts, &inst{ + op: OpSplit, + }) + return c.top() - 1 +} + +func (c *compiler) emptyJump() uint { + c.insts = append(c.insts, &inst{ + op: OpJmp, + }) + return c.top() - 1 +} + +func (c *compiler) setSplit(i, pc1, pc2 uint) { + split := c.insts[i] + split.splitA = pc1 + split.splitB = pc2 +} + +func (c *compiler) setJump(i, pc uint) { + jmp := c.insts[i] + jmp.to = pc +} + +func (c *compiler) top() uint { + return uint(len(c.insts)) +} diff --git a/vendor/github.com/couchbase/vellum/regexp/dfa.go b/vendor/github.com/couchbase/vellum/regexp/dfa.go new file mode 100644 index 0000000000..9864606b6a --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go @@ -0,0 +1,188 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "encoding/binary" + "fmt" +) + +// StateLimit is the maximum number of states allowed +const StateLimit = 10000 + +// ErrTooManyStates is returned if you attempt to build a Levenshtein +// automaton which requries too many states. +var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states", + StateLimit) + +type dfaBuilder struct { + dfa *dfa + cache map[string]int + keyBuf []byte +} + +func newDfaBuilder(insts prog) *dfaBuilder { + d := &dfaBuilder{ + dfa: &dfa{ + insts: insts, + states: make([]*state, 0, 16), + }, + cache: make(map[string]int, 1024), + } + // add 0 state that is invalid + d.dfa.states = append(d.dfa.states, &state{ + next: make([]int, 256), + match: false, + }) + return d +} + +func (d *dfaBuilder) build() (*dfa, error) { + cur := newSparseSet(uint(len(d.dfa.insts))) + next := newSparseSet(uint(len(d.dfa.insts))) + + d.dfa.add(cur, 0) + states := intStack{d.cachedState(cur)} + seen := make(map[int]struct{}) + var s int + states, s = states.Pop() + for s != 0 { + for b := 0; b < 256; b++ { + ns := d.runState(cur, next, s, byte(b)) + if ns != 0 { + if _, ok := seen[ns]; !ok { + seen[ns] = struct{}{} + states = states.Push(ns) + } + } + if len(d.dfa.states) > StateLimit { + return nil, ErrTooManyStates + } + } + states, s = states.Pop() + } + return d.dfa, nil +} + +func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int { + cur.Clear() + for _, ip := range d.dfa.states[state].insts { + cur.Add(ip) + } + d.dfa.run(cur, next, b) + nextState := d.cachedState(next) + d.dfa.states[state].next[b] = nextState + return nextState +} + +func instsKey(insts []uint, buf []byte) []byte { + if cap(buf) < 8*len(insts) { + buf = make([]byte, 8*len(insts)) + } else { + buf = buf[0 : 8*len(insts)] + } + for i, inst := range insts { + binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst)) + } + return buf +} + +func (d *dfaBuilder) cachedState(set *sparseSet) int { + var insts []uint + var isMatch bool + for i := uint(0); i < uint(set.Len()); i++ { + ip := set.Get(i) + switch d.dfa.insts[ip].op { + case OpRange: + insts = append(insts, ip) + case OpMatch: + isMatch = true + insts = append(insts, ip) + } + } + if len(insts) == 0 { + return 0 + } + d.keyBuf = instsKey(insts, d.keyBuf) + v, ok := d.cache[string(d.keyBuf)] + if ok { + return v + } + d.dfa.states = append(d.dfa.states, &state{ + insts: insts, + next: make([]int, 256), + match: isMatch, + }) + newV := len(d.dfa.states) - 1 + d.cache[string(d.keyBuf)] = newV + return newV +} + +type dfa struct { + insts prog + states []*state +} + +func (d *dfa) add(set *sparseSet, ip uint) { + if set.Contains(ip) { + return + } + set.Add(ip) + switch d.insts[ip].op { + case OpJmp: + d.add(set, d.insts[ip].to) + case OpSplit: + d.add(set, d.insts[ip].splitA) + d.add(set, d.insts[ip].splitB) + } +} + +func (d *dfa) run(from, to *sparseSet, b byte) bool { + to.Clear() + var isMatch bool + for i := uint(0); i < uint(from.Len()); i++ { + ip := from.Get(i) + switch d.insts[ip].op { + case OpMatch: + isMatch = true + case OpRange: + if d.insts[ip].rangeStart <= b && + b <= d.insts[ip].rangeEnd { + d.add(to, ip+1) + } + } + } + return isMatch +} + +type state struct { + insts []uint + next []int + match bool +} + +type intStack []int + +func (s intStack) Push(v int) intStack { + return append(s, v) +} + +func (s intStack) Pop() (intStack, int) { + l := len(s) + if l < 1 { + return s, 0 + } + return s[:l-1], s[l-1] +} diff --git a/vendor/github.com/couchbase/vellum/regexp/inst.go b/vendor/github.com/couchbase/vellum/regexp/inst.go new file mode 100644 index 0000000000..61cbf2f333 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/inst.go @@ -0,0 +1,62 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import "fmt" + +// instOp represents a instruction operation +type instOp int + +// the enumeration of operations +const ( + OpMatch instOp = iota + OpJmp + OpSplit + OpRange +) + +// instSize is the approxmiate size of the an inst struct in bytes +const instSize = 40 + +type inst struct { + op instOp + to uint + splitA uint + splitB uint + rangeStart byte + rangeEnd byte +} + +func (i *inst) String() string { + switch i.op { + case OpJmp: + return fmt.Sprintf("JMP: %d", i.to) + case OpSplit: + return fmt.Sprintf("SPLIT: %d - %d", i.splitA, i.splitB) + case OpRange: + return fmt.Sprintf("RANGE: %x - %x", i.rangeStart, i.rangeEnd) + } + return "MATCH" +} + +type prog []*inst + +func (p prog) String() string { + rv := "\n" + for i, pi := range p { + rv += fmt.Sprintf("%d %v\n", i, pi) + } + return rv +} diff --git a/vendor/github.com/couchbase/vellum/regexp/regexp.go b/vendor/github.com/couchbase/vellum/regexp/regexp.go new file mode 100644 index 0000000000..ed0e7823e1 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go @@ -0,0 +1,113 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "fmt" + "regexp/syntax" +) + +// ErrNoEmpty returned when "zero width assertions" are used +var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed") + +// ErrNoWordBoundary returned when word boundaries are used +var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed") + +// ErrNoBytes returned when byte literals are used +var ErrNoBytes = fmt.Errorf("byte literals are not allowed") + +// ErrNoLazy returned when lazy quantifiers are used +var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed") + +// ErrCompiledTooBig returned when regular expression parses into +// too many instructions +var ErrCompiledTooBig = fmt.Errorf("too many instructions") + +// Regexp implements the vellum.Automaton interface for matcing a user +// specified regular expression. +type Regexp struct { + orig string + dfa *dfa +} + +// NewRegexp creates a new Regular Expression automaton with the specified +// expression. By default it is limited to approximately 10MB for the +// compiled finite state automaton. If this size is exceeded, +// ErrCompiledTooBig will be returned. +func New(expr string) (*Regexp, error) { + return NewWithLimit(expr, 10*(1<<20)) +} + +// NewRegexpWithLimit creates a new Regular Expression automaton with +// the specified expression. The size of the compiled finite state +// automaton exceeds the user specified size, ErrCompiledTooBig will be +// returned. +func NewWithLimit(expr string, size uint) (*Regexp, error) { + parsed, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return nil, err + } + compiler := newCompiler(size) + insts, err := compiler.compile(parsed) + if err != nil { + return nil, err + } + dfaBuilder := newDfaBuilder(insts) + dfa, err := dfaBuilder.build() + if err != nil { + return nil, err + } + return &Regexp{ + orig: expr, + dfa: dfa, + }, nil +} + +// Start returns the start state of this automaton. +func (r *Regexp) Start() int { + return 1 +} + +// IsMatch returns if the specified state is a matching state. +func (r *Regexp) IsMatch(s int) bool { + if s < len(r.dfa.states) { + return r.dfa.states[s].match + } + return false +} + +// CanMatch returns if the specified state can ever transition to a matching +// state. +func (r *Regexp) CanMatch(s int) bool { + if s < len(r.dfa.states) && s > 0 { + return true + } + return false +} + +// WillAlwaysMatch returns if the specified state will always end in a +// matching state. +func (r *Regexp) WillAlwaysMatch(int) bool { + return false +} + +// Accept returns the new state, resulting from the transite byte b +// when currently in the state s. +func (r *Regexp) Accept(s int, b byte) int { + if s < len(r.dfa.states) { + return r.dfa.states[s].next[b] + } + return 0 +} diff --git a/vendor/github.com/couchbase/vellum/regexp/sparse.go b/vendor/github.com/couchbase/vellum/regexp/sparse.go new file mode 100644 index 0000000000..7afbfceba6 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/sparse.go @@ -0,0 +1,54 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +type sparseSet struct { + dense []uint + sparse []uint + size uint +} + +func newSparseSet(size uint) *sparseSet { + return &sparseSet{ + dense: make([]uint, size), + sparse: make([]uint, size), + size: 0, + } +} + +func (s *sparseSet) Len() int { + return int(s.size) +} + +func (s *sparseSet) Add(ip uint) uint { + i := s.size + s.dense[i] = ip + s.sparse[ip] = i + s.size++ + return i +} + +func (s *sparseSet) Get(i uint) uint { + return s.dense[i] +} + +func (s *sparseSet) Contains(ip uint) bool { + i := s.sparse[ip] + return i < s.size && s.dense[i] == ip +} + +func (s *sparseSet) Clear() { + s.size = 0 +} diff --git a/vendor/github.com/couchbase/vellum/registry.go b/vendor/github.com/couchbase/vellum/registry.go new file mode 100644 index 0000000000..3721a7c9c3 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/registry.go @@ -0,0 +1,116 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "hash" + "hash/fnv" +) + +type registryCell struct { + addr int + node *builderNode +} + +type registry struct { + table []registryCell + tableSize uint + mruSize uint + hasher hash.Hash64 +} + +func newRegistry(tableSize, mruSize int) *registry { + nsize := tableSize * mruSize + rv := ®istry{ + table: make([]registryCell, nsize), + tableSize: uint(tableSize), + mruSize: uint(mruSize), + hasher: fnv.New64a(), + } + return rv +} + +func (r *registry) Reset() { + for i := 0; i < len(r.table); i++ { + r.table[i] = registryCell{} + } + r.hasher.Reset() +} + +func (r *registry) entry(node *builderNode) (bool, int, *registryCell) { + if len(r.table) == 0 { + return false, 0, nil + } + bucket := r.hash(node) + start := r.mruSize * uint(bucket) + end := start + r.mruSize + rc := registryCache(r.table[start:end]) + return rc.entry(node) +} + +const fnvPrime = 1099511628211 + +func (r *registry) hash(b *builderNode) int { + var final uint64 + if b.final { + final = 1 + } + + var h uint64 = 14695981039346656037 + h = (h ^ final) * fnvPrime + h = (h ^ b.finalOutput) * fnvPrime + for _, t := range b.trans { + h = (h ^ uint64(t.in)) * fnvPrime + h = (h ^ t.out) * fnvPrime + h = (h ^ uint64(t.addr)) * fnvPrime + } + return int(h % uint64(r.tableSize)) +} + +type registryCache []registryCell + +func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) { + if len(r) == 1 { + if r[0].node != nil && r[0].node.equiv(node) { + return true, r[0].addr, nil + } + r[0].node = node + return false, 0, &r[0] + } + for i := range r { + if r[i].node != nil && r[i].node.equiv(node) { + addr := r[i].addr + r.promote(i) + return true, addr, nil + } + } + // no match + last := len(r) - 1 + r[last].node = node // discard LRU + r.promote(last) + return false, 0, &r[0] + +} + +func (r registryCache) promote(i int) { + for i > 0 { + r.swap(i-1, i) + i-- + } +} + +func (r registryCache) swap(i, j int) { + r[i], r[j] = r[j], r[i] +} diff --git a/vendor/github.com/couchbase/vellum/transducer.go b/vendor/github.com/couchbase/vellum/transducer.go new file mode 100644 index 0000000000..753c422d57 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/transducer.go @@ -0,0 +1,55 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +// Transducer represents the general contract of a byte-based finite transducer +type Transducer interface { + + // all transducers are also automatons + Automaton + + // IsMatchWithValue returns true if and only if the state is a match + // additionally it returns a states final value (if any) + IsMatchWithVal(int) (bool, uint64) + + // Accept returns the next state given the input to the specified state + // additionally it returns the value associated with the transition + AcceptWithVal(int, byte) (int, uint64) +} + +// TransducerGet implements an generic Get() method which works +// on any implementation of Transducer +// The caller MUST check the boolean return value for a match. +// Zero is a valid value regardless of match status, +// and if it is NOT a match, the value collected so far is returned. +func TransducerGet(t Transducer, k []byte) (bool, uint64) { + var total uint64 + i := 0 + curr := t.Start() + for t.CanMatch(curr) && i < len(k) { + var transVal uint64 + curr, transVal = t.AcceptWithVal(curr, k[i]) + if curr == noneAddr { + break + } + total += transVal + i++ + } + if i != len(k) { + return false, total + } + match, finalVal := t.IsMatchWithVal(curr) + return match, total + finalVal +} diff --git a/vendor/github.com/couchbase/vellum/utf8/utf8.go b/vendor/github.com/couchbase/vellum/utf8/utf8.go new file mode 100644 index 0000000000..47dbe9d1c5 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/utf8/utf8.go @@ -0,0 +1,246 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utf8 + +import ( + "fmt" + "unicode/utf8" +) + +// Sequences is a collection of Sequence +type Sequences []Sequence + +// NewSequences constructs a collection of Sequence which describe the +// byte ranges covered between the start and end runes. +func NewSequences(start, end rune) (Sequences, error) { + var rv Sequences + + var rangeStack rangeStack + rangeStack = rangeStack.Push(&scalarRange{start, end}) + + rangeStack, r := rangeStack.Pop() +TOP: + for r != nil { + INNER: + for { + r1, r2 := r.split() + if r1 != nil { + rangeStack = rangeStack.Push(&scalarRange{r2.start, r2.end}) + r.start = r1.start + r.end = r1.end + continue INNER + } + if !r.valid() { + rangeStack, r = rangeStack.Pop() + continue TOP + } + for i := 1; i < utf8.UTFMax; i++ { + max := maxScalarValue(i) + if r.start <= max && max < r.end { + rangeStack = rangeStack.Push(&scalarRange{max + 1, r.end}) + r.end = max + continue INNER + } + } + asciiRange := r.ascii() + if asciiRange != nil { + rv = append(rv, Sequence{ + asciiRange, + }) + rangeStack, r = rangeStack.Pop() + continue TOP + } + for i := uint(1); i < utf8.UTFMax; i++ { + m := rune((1 << (6 * i)) - 1) + if (r.start & ^m) != (r.end & ^m) { + if (r.start & m) != 0 { + rangeStack = rangeStack.Push(&scalarRange{(r.start | m) + 1, r.end}) + r.end = r.start | m + continue INNER + } + if (r.end & m) != m { + rangeStack = rangeStack.Push(&scalarRange{r.end & ^m, r.end}) + r.end = (r.end & ^m) - 1 + continue INNER + } + } + } + start := make([]byte, utf8.UTFMax) + end := make([]byte, utf8.UTFMax) + n, m := r.encode(start, end) + seq, err := SequenceFromEncodedRange(start[0:n], end[0:m]) + if err != nil { + return nil, err + } + rv = append(rv, seq) + rangeStack, r = rangeStack.Pop() + continue TOP + } + } + + return rv, nil +} + +// Sequence is a collection of *Range +type Sequence []*Range + +// SequenceFromEncodedRange creates sequence from the encoded bytes +func SequenceFromEncodedRange(start, end []byte) (Sequence, error) { + if len(start) != len(end) { + return nil, fmt.Errorf("byte slices must be the same length") + } + switch len(start) { + case 2: + return Sequence{ + &Range{start[0], end[0]}, + &Range{start[1], end[1]}, + }, nil + case 3: + return Sequence{ + &Range{start[0], end[0]}, + &Range{start[1], end[1]}, + &Range{start[2], end[2]}, + }, nil + case 4: + return Sequence{ + &Range{start[0], end[0]}, + &Range{start[1], end[1]}, + &Range{start[2], end[2]}, + &Range{start[3], end[3]}, + }, nil + } + + return nil, fmt.Errorf("invalid encoded byte length") +} + +// Matches checks to see if the provided byte slice matches the Sequence +func (u Sequence) Matches(bytes []byte) bool { + if len(bytes) < len(u) { + return false + } + for i := 0; i < len(u); i++ { + if !u[i].matches(bytes[i]) { + return false + } + } + return true +} + +func (u Sequence) String() string { + switch len(u) { + case 1: + return fmt.Sprintf("%v", u[0]) + case 2: + return fmt.Sprintf("%v%v", u[0], u[1]) + case 3: + return fmt.Sprintf("%v%v%v", u[0], u[1], u[2]) + case 4: + return fmt.Sprintf("%v%v%v%v", u[0], u[1], u[2], u[3]) + default: + return fmt.Sprintf("invalid utf8 sequence") + } +} + +// Range describes a single range of byte values +type Range struct { + Start byte + End byte +} + +func (u Range) matches(b byte) bool { + if u.Start <= b && b <= u.End { + return true + } + return false +} + +func (u Range) String() string { + if u.Start == u.End { + return fmt.Sprintf("[%X]", u.Start) + } + return fmt.Sprintf("[%X-%X]", u.Start, u.End) +} + +type scalarRange struct { + start rune + end rune +} + +func (s *scalarRange) String() string { + return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end) +} + +// split this scalar range if it overlaps with a surrogate codepoint +func (s *scalarRange) split() (*scalarRange, *scalarRange) { + if s.start < 0xe000 && s.end > 0xd7ff { + return &scalarRange{ + start: s.start, + end: 0xd7ff, + }, + &scalarRange{ + start: 0xe000, + end: s.end, + } + } + return nil, nil +} + +func (s *scalarRange) valid() bool { + return s.start <= s.end +} + +func (s *scalarRange) ascii() *Range { + if s.valid() && s.end <= 0x7f { + return &Range{ + Start: byte(s.start), + End: byte(s.end), + } + } + return nil +} + +// start and end MUST have capacity for utf8.UTFMax bytes +func (s *scalarRange) encode(start, end []byte) (int, int) { + n := utf8.EncodeRune(start, s.start) + m := utf8.EncodeRune(end, s.end) + return n, m +} + +type rangeStack []*scalarRange + +func (s rangeStack) Push(v *scalarRange) rangeStack { + return append(s, v) +} + +func (s rangeStack) Pop() (rangeStack, *scalarRange) { + l := len(s) + if l < 1 { + return s, nil + } + return s[:l-1], s[l-1] +} + +func maxScalarValue(nbytes int) rune { + switch nbytes { + case 1: + return 0x007f + case 2: + return 0x07FF + case 3: + return 0xFFFF + default: + return 0x10FFFF + } +} diff --git a/vendor/github.com/couchbase/vellum/vellum.go b/vendor/github.com/couchbase/vellum/vellum.go new file mode 100644 index 0000000000..b2537b3f00 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/vellum.go @@ -0,0 +1,111 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package vellum is a library for building, serializing and executing an FST (finite +state transducer). + +There are two distinct phases, building an FST and using it. + +When building an FST, you insert keys ([]byte) and their associated value +(uint64). Insert operations MUST be done in lexicographic order. While +building the FST, data is streamed to an underlying Writer. At the conclusion +of building, you MUST call Close() on the builder. + +After completion of the build phase, you can either Open() the FST if you +serialized it to disk. Alternatively, if you already have the bytes in +memory, you can use Load(). By default, Open() will use mmap to avoid loading +the entire file into memory. + +Once the FST is ready, you can use the Contains() method to see if a keys is +in the FST. You can use the Get() method to see if a key is in the FST and +retrieve it's associated value. And, you can use the Iterator method to +enumerate key/value pairs within a specified range. + +*/ +package vellum + +import ( + "errors" + "io" +) + +// ErrOutOfOrder is returned when values are not inserted in +// lexicographic order. +var ErrOutOfOrder = errors.New("values not inserted in lexicographic order") + +// ErrIteratorDone is returned by Iterator/Next/Seek methods when the +// Current() value pointed to by the iterator is greater than the last +// key in this FST, or outside the configured startKeyInclusive/endKeyExclusive +// range of the Iterator. +var ErrIteratorDone = errors.New("iterator-done") + +// BuilderOpts is a structure to let advanced users customize the behavior +// of the builder and some aspects of the generated FST. +type BuilderOpts struct { + Encoder int + RegistryTableSize int + RegistryMRUSize int +} + +// New returns a new Builder which will stream out the +// underlying representation to the provided Writer as the set is built. +func New(w io.Writer, opts *BuilderOpts) (*Builder, error) { + return newBuilder(w, opts) +} + +// Open loads the FST stored in the provided path +func Open(path string) (*FST, error) { + return open(path) +} + +// Load will return the FST represented by the provided byte slice. +func Load(data []byte) (*FST, error) { + return new(data, nil) +} + +// Merge will iterate through the provided Iterators, merge duplicate keys +// with the provided MergeFunc, and build a new FST to the provided Writer. +func Merge(w io.Writer, opts *BuilderOpts, itrs []Iterator, f MergeFunc) error { + builder, err := New(w, opts) + if err != nil { + return err + } + + itr, err := NewMergeIterator(itrs, f) + for err == nil { + k, v := itr.Current() + err = builder.Insert(k, v) + if err != nil { + return err + } + err = itr.Next() + } + + if err != nil && err != ErrIteratorDone { + return err + } + + err = itr.Close() + if err != nil { + return err + } + + err = builder.Close() + if err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/couchbase/vellum/vellum_mmap.go b/vendor/github.com/couchbase/vellum/vellum_mmap.go new file mode 100644 index 0000000000..5acd2f4707 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/vellum_mmap.go @@ -0,0 +1,60 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !nommap + +package vellum + +import ( + "os" + + mmap "github.com/edsrzf/mmap-go" +) + +type mmapWrapper struct { + f *os.File + mm mmap.MMap +} + +func (m *mmapWrapper) Close() (err error) { + if m.mm != nil { + err = m.mm.Unmap() + } + // try to close file even if unmap failed + if m.f != nil { + err2 := m.f.Close() + if err == nil { + // try to return first error + err = err2 + } + } + return +} + +func open(path string) (*FST, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + mm, err := mmap.Map(f, mmap.RDONLY, 0) + if err != nil { + // mmap failed, try to close the file + _ = f.Close() + return nil, err + } + return new(mm, &mmapWrapper{ + f: f, + mm: mm, + }) +} diff --git a/vendor/github.com/couchbase/vellum/vellum_nommap.go b/vendor/github.com/couchbase/vellum/vellum_nommap.go new file mode 100644 index 0000000000..e985272872 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/vellum_nommap.go @@ -0,0 +1,27 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build nommap + +package vellum + +import "io/ioutil" + +func open(path string) (*FST, error) { + data, err := ioutil.ReadFile(string) + if err != nil { + return nil, err + } + return new(data, nil) +} diff --git a/vendor/github.com/couchbase/vellum/writer.go b/vendor/github.com/couchbase/vellum/writer.go new file mode 100644 index 0000000000..d655d47f7f --- /dev/null +++ b/vendor/github.com/couchbase/vellum/writer.go @@ -0,0 +1,92 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "bufio" + "io" +) + +// A writer is a buffered writer used by vellum. It counts how many bytes have +// been written and has some convenience methods used for encoding the data. +type writer struct { + w *bufio.Writer + counter int +} + +func newWriter(w io.Writer) *writer { + return &writer{ + w: bufio.NewWriter(w), + } +} + +func (w *writer) Reset(newWriter io.Writer) { + w.w.Reset(newWriter) + w.counter = 0 +} + +func (w *writer) WriteByte(c byte) error { + err := w.w.WriteByte(c) + if err != nil { + return err + } + w.counter++ + return nil +} + +func (w *writer) Write(p []byte) (int, error) { + n, err := w.w.Write(p) + w.counter += n + return n, err +} + +func (w *writer) Flush() error { + return w.w.Flush() +} + +func (w *writer) WritePackedUintIn(v uint64, n int) error { + for shift := uint(0); shift < uint(n*8); shift += 8 { + err := w.WriteByte(byte(v >> shift)) + if err != nil { + return err + } + } + + return nil +} + +func (w *writer) WritePackedUint(v uint64) error { + n := packedSize(v) + return w.WritePackedUintIn(v, n) +} + +func packedSize(n uint64) int { + if n < 1<<8 { + return 1 + } else if n < 1<<16 { + return 2 + } else if n < 1<<24 { + return 3 + } else if n < 1<<32 { + return 4 + } else if n < 1<<40 { + return 5 + } else if n < 1<<48 { + return 6 + } else if n < 1<<56 { + return 7 + } + return 8 +} diff --git a/vendor/github.com/edsrzf/mmap-go/LICENSE b/vendor/github.com/edsrzf/mmap-go/LICENSE new file mode 100644 index 0000000000..8f05f338ac --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2011, Evan Shaw <edsrzf@gmail.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/edsrzf/mmap-go/README.md b/vendor/github.com/edsrzf/mmap-go/README.md new file mode 100644 index 0000000000..4cc2bfe1c8 --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/README.md @@ -0,0 +1,12 @@ +mmap-go +======= + +mmap-go is a portable mmap package for the [Go programming language](http://golang.org). +It has been tested on Linux (386, amd64), OS X, and Windows (386). It should also +work on other Unix-like platforms, but hasn't been tested with them. I'm interested +to hear about the results. + +I haven't been able to add more features without adding significant complexity, +so mmap-go doesn't support mprotect, mincore, and maybe a few other things. +If you're running on a Unix-like platform and need some of these features, +I suggest Gustavo Niemeyer's [gommap](http://labix.org/gommap). diff --git a/vendor/github.com/edsrzf/mmap-go/mmap.go b/vendor/github.com/edsrzf/mmap-go/mmap.go new file mode 100644 index 0000000000..14fb22580a --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/mmap.go @@ -0,0 +1,116 @@ +// Copyright 2011 Evan Shaw. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file defines the common package interface and contains a little bit of +// factored out logic. + +// Package mmap allows mapping files into memory. It tries to provide a simple, reasonably portable interface, +// but doesn't go out of its way to abstract away every little platform detail. +// This specifically means: +// * forked processes may or may not inherit mappings +// * a file's timestamp may or may not be updated by writes through mappings +// * specifying a size larger than the file's actual size can increase the file's size +// * If the mapped file is being modified by another process while your program's running, don't expect consistent results between platforms +package mmap + +import ( + "errors" + "os" + "reflect" + "unsafe" +) + +const ( + // RDONLY maps the memory read-only. + // Attempts to write to the MMap object will result in undefined behavior. + RDONLY = 0 + // RDWR maps the memory as read-write. Writes to the MMap object will update the + // underlying file. + RDWR = 1 << iota + // COPY maps the memory as copy-on-write. Writes to the MMap object will affect + // memory, but the underlying file will remain unchanged. + COPY + // If EXEC is set, the mapped memory is marked as executable. + EXEC +) + +const ( + // If the ANON flag is set, the mapped memory will not be backed by a file. + ANON = 1 << iota +) + +// MMap represents a file mapped into memory. +type MMap []byte + +// Map maps an entire file into memory. +// If ANON is set in flags, f is ignored. +func Map(f *os.File, prot, flags int) (MMap, error) { + return MapRegion(f, -1, prot, flags, 0) +} + +// MapRegion maps part of a file into memory. +// The offset parameter must be a multiple of the system's page size. +// If length < 0, the entire file will be mapped. +// If ANON is set in flags, f is ignored. +func MapRegion(f *os.File, length int, prot, flags int, offset int64) (MMap, error) { + if offset%int64(os.Getpagesize()) != 0 { + return nil, errors.New("offset parameter must be a multiple of the system's page size") + } + + var fd uintptr + if flags&ANON == 0 { + fd = uintptr(f.Fd()) + if length < 0 { + fi, err := f.Stat() + if err != nil { + return nil, err + } + length = int(fi.Size()) + } + } else { + if length <= 0 { + return nil, errors.New("anonymous mapping requires non-zero length") + } + fd = ^uintptr(0) + } + return mmap(length, uintptr(prot), uintptr(flags), fd, offset) +} + +func (m *MMap) header() *reflect.SliceHeader { + return (*reflect.SliceHeader)(unsafe.Pointer(m)) +} + +// Lock keeps the mapped region in physical memory, ensuring that it will not be +// swapped out. +func (m MMap) Lock() error { + dh := m.header() + return lock(dh.Data, uintptr(dh.Len)) +} + +// Unlock reverses the effect of Lock, allowing the mapped region to potentially +// be swapped out. +// If m is already unlocked, aan error will result. +func (m MMap) Unlock() error { + dh := m.header() + return unlock(dh.Data, uintptr(dh.Len)) +} + +// Flush synchronizes the mapping's contents to the file's contents on disk. +func (m MMap) Flush() error { + dh := m.header() + return flush(dh.Data, uintptr(dh.Len)) +} + +// Unmap deletes the memory mapped region, flushes any remaining changes, and sets +// m to nil. +// Trying to read or write any remaining references to m after Unmap is called will +// result in undefined behavior. +// Unmap should only be called on the slice value that was originally returned from +// a call to Map. Calling Unmap on a derived slice may cause errors. +func (m *MMap) Unmap() error { + dh := m.header() + err := unmap(dh.Data, uintptr(dh.Len)) + *m = nil + return err +} diff --git a/vendor/github.com/edsrzf/mmap-go/mmap_unix.go b/vendor/github.com/edsrzf/mmap-go/mmap_unix.go new file mode 100644 index 0000000000..4af98420d5 --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/mmap_unix.go @@ -0,0 +1,67 @@ +// Copyright 2011 Evan Shaw. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin dragonfly freebsd linux openbsd solaris netbsd + +package mmap + +import ( + "syscall" +) + +func mmap(len int, inprot, inflags, fd uintptr, off int64) ([]byte, error) { + flags := syscall.MAP_SHARED + prot := syscall.PROT_READ + switch { + case inprot© != 0: + prot |= syscall.PROT_WRITE + flags = syscall.MAP_PRIVATE + case inprot&RDWR != 0: + prot |= syscall.PROT_WRITE + } + if inprot&EXEC != 0 { + prot |= syscall.PROT_EXEC + } + if inflags&ANON != 0 { + flags |= syscall.MAP_ANON + } + + b, err := syscall.Mmap(int(fd), off, len, prot, flags) + if err != nil { + return nil, err + } + return b, nil +} + +func flush(addr, len uintptr) error { + _, _, errno := syscall.Syscall(_SYS_MSYNC, addr, len, _MS_SYNC) + if errno != 0 { + return syscall.Errno(errno) + } + return nil +} + +func lock(addr, len uintptr) error { + _, _, errno := syscall.Syscall(syscall.SYS_MLOCK, addr, len, 0) + if errno != 0 { + return syscall.Errno(errno) + } + return nil +} + +func unlock(addr, len uintptr) error { + _, _, errno := syscall.Syscall(syscall.SYS_MUNLOCK, addr, len, 0) + if errno != 0 { + return syscall.Errno(errno) + } + return nil +} + +func unmap(addr, len uintptr) error { + _, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, addr, len, 0) + if errno != 0 { + return syscall.Errno(errno) + } + return nil +} diff --git a/vendor/github.com/edsrzf/mmap-go/mmap_windows.go b/vendor/github.com/edsrzf/mmap-go/mmap_windows.go new file mode 100644 index 0000000000..c3d2d02d3f --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/mmap_windows.go @@ -0,0 +1,125 @@ +// Copyright 2011 Evan Shaw. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mmap + +import ( + "errors" + "os" + "sync" + "syscall" +) + +// mmap on Windows is a two-step process. +// First, we call CreateFileMapping to get a handle. +// Then, we call MapviewToFile to get an actual pointer into memory. +// Because we want to emulate a POSIX-style mmap, we don't want to expose +// the handle -- only the pointer. We also want to return only a byte slice, +// not a struct, so it's convenient to manipulate. + +// We keep this map so that we can get back the original handle from the memory address. +var handleLock sync.Mutex +var handleMap = map[uintptr]syscall.Handle{} + +func mmap(len int, prot, flags, hfile uintptr, off int64) ([]byte, error) { + flProtect := uint32(syscall.PAGE_READONLY) + dwDesiredAccess := uint32(syscall.FILE_MAP_READ) + switch { + case prot© != 0: + flProtect = syscall.PAGE_WRITECOPY + dwDesiredAccess = syscall.FILE_MAP_COPY + case prot&RDWR != 0: + flProtect = syscall.PAGE_READWRITE + dwDesiredAccess = syscall.FILE_MAP_WRITE + } + if prot&EXEC != 0 { + flProtect <<= 4 + dwDesiredAccess |= syscall.FILE_MAP_EXECUTE + } + + // The maximum size is the area of the file, starting from 0, + // that we wish to allow to be mappable. It is the sum of + // the length the user requested, plus the offset where that length + // is starting from. This does not map the data into memory. + maxSizeHigh := uint32((off + int64(len)) >> 32) + maxSizeLow := uint32((off + int64(len)) & 0xFFFFFFFF) + // TODO: Do we need to set some security attributes? It might help portability. + h, errno := syscall.CreateFileMapping(syscall.Handle(hfile), nil, flProtect, maxSizeHigh, maxSizeLow, nil) + if h == 0 { + return nil, os.NewSyscallError("CreateFileMapping", errno) + } + + // Actually map a view of the data into memory. The view's size + // is the length the user requested. + fileOffsetHigh := uint32(off >> 32) + fileOffsetLow := uint32(off & 0xFFFFFFFF) + addr, errno := syscall.MapViewOfFile(h, dwDesiredAccess, fileOffsetHigh, fileOffsetLow, uintptr(len)) + if addr == 0 { + return nil, os.NewSyscallError("MapViewOfFile", errno) + } + handleLock.Lock() + handleMap[addr] = h + handleLock.Unlock() + + m := MMap{} + dh := m.header() + dh.Data = addr + dh.Len = len + dh.Cap = dh.Len + + return m, nil +} + +func flush(addr, len uintptr) error { + errno := syscall.FlushViewOfFile(addr, len) + if errno != nil { + return os.NewSyscallError("FlushViewOfFile", errno) + } + + handleLock.Lock() + defer handleLock.Unlock() + handle, ok := handleMap[addr] + if !ok { + // should be impossible; we would've errored above + return errors.New("unknown base address") + } + + errno = syscall.FlushFileBuffers(handle) + return os.NewSyscallError("FlushFileBuffers", errno) +} + +func lock(addr, len uintptr) error { + errno := syscall.VirtualLock(addr, len) + return os.NewSyscallError("VirtualLock", errno) +} + +func unlock(addr, len uintptr) error { + errno := syscall.VirtualUnlock(addr, len) + return os.NewSyscallError("VirtualUnlock", errno) +} + +func unmap(addr, len uintptr) error { + flush(addr, len) + // Lock the UnmapViewOfFile along with the handleMap deletion. + // As soon as we unmap the view, the OS is free to give the + // same addr to another new map. We don't want another goroutine + // to insert and remove the same addr into handleMap while + // we're trying to remove our old addr/handle pair. + handleLock.Lock() + defer handleLock.Unlock() + err := syscall.UnmapViewOfFile(addr) + if err != nil { + return err + } + + handle, ok := handleMap[addr] + if !ok { + // should be impossible; we would've errored above + return errors.New("unknown base address") + } + delete(handleMap, addr) + + e := syscall.CloseHandle(syscall.Handle(handle)) + return os.NewSyscallError("CloseHandle", e) +} diff --git a/vendor/github.com/edsrzf/mmap-go/msync_netbsd.go b/vendor/github.com/edsrzf/mmap-go/msync_netbsd.go new file mode 100644 index 0000000000..a64b003e2d --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/msync_netbsd.go @@ -0,0 +1,8 @@ +// Copyright 2011 Evan Shaw. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mmap + +const _SYS_MSYNC = 277 +const _MS_SYNC = 0x04 diff --git a/vendor/github.com/edsrzf/mmap-go/msync_unix.go b/vendor/github.com/edsrzf/mmap-go/msync_unix.go new file mode 100644 index 0000000000..91ee5f40f1 --- /dev/null +++ b/vendor/github.com/edsrzf/mmap-go/msync_unix.go @@ -0,0 +1,14 @@ +// Copyright 2011 Evan Shaw. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin dragonfly freebsd linux openbsd solaris + +package mmap + +import ( + "syscall" +) + +const _SYS_MSYNC = syscall.SYS_MSYNC +const _MS_SYNC = syscall.MS_SYNC diff --git a/vendor/github.com/glycerine/go-unsnap-stream/LICENSE b/vendor/github.com/glycerine/go-unsnap-stream/LICENSE new file mode 100644 index 0000000000..31671ea603 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2014 the go-unsnap-stream authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Permission is explicitly granted to relicense this material under new terms of +your choice when integrating this library with another library or project. diff --git a/vendor/github.com/glycerine/go-unsnap-stream/README.md b/vendor/github.com/glycerine/go-unsnap-stream/README.md new file mode 100644 index 0000000000..b1b8c74751 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/README.md @@ -0,0 +1,20 @@ +go-unsnap-stream +================ + +This is a small golang library for decoding and encoding the snappy *streaming* format, specified here: https://github.com/google/snappy/blob/master/framing_format.txt + +Note that the *streaming or framing format* for snappy is different from snappy itself. Think of it as a train of boxcars: the streaming format breaks your data in chunks, applies snappy to each chunk alone, then puts a thin wrapper around the chunk, and sends it along in turn. You can begin decoding before receiving everything. And memory requirements for decoding are sane. + +Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4]. + +For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. + +However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead. + +[1] https://groups.google.com/forum/#!msg/snappy-compression/qvLNe2cSH9s/R19oBC-p7g4J + +[2] https://codereview.appspot.com/5167058 + +[3] https://github.com/kubo/snzip + +[4] https://pypi.python.org/pypi/python-snappy
\ No newline at end of file diff --git a/vendor/github.com/glycerine/go-unsnap-stream/binary.dat b/vendor/github.com/glycerine/go-unsnap-stream/binary.dat Binary files differnew file mode 100644 index 0000000000..f31eee2e24 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/binary.dat diff --git a/vendor/github.com/glycerine/go-unsnap-stream/binary.dat.snappy b/vendor/github.com/glycerine/go-unsnap-stream/binary.dat.snappy Binary files differnew file mode 100644 index 0000000000..ed37024293 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/binary.dat.snappy diff --git a/vendor/github.com/glycerine/go-unsnap-stream/rbuf.go b/vendor/github.com/glycerine/go-unsnap-stream/rbuf.go new file mode 100644 index 0000000000..f771c392d0 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/rbuf.go @@ -0,0 +1,375 @@ +package unsnap + +// copyright (c) 2014, Jason E. Aten +// license: MIT + +// Some text from the Golang standard library doc is adapted and +// reproduced in fragments below to document the expected behaviors +// of the interface functions Read()/Write()/ReadFrom()/WriteTo() that +// are implemented here. Those descriptions (see +// http://golang.org/pkg/io/#Reader for example) are +// copyright 2010 The Go Authors. + +import "io" + +// FixedSizeRingBuf: +// +// a fixed-size circular ring buffer. Yes, just what is says. +// +// We keep a pair of ping/pong buffers so that we can linearize +// the circular buffer into a contiguous slice if need be. +// +// For efficiency, a FixedSizeRingBuf may be vastly preferred to +// a bytes.Buffer. The ReadWithoutAdvance(), Advance(), and Adopt() +// methods are all non-standard methods written for speed. +// +// For an I/O heavy application, I have replaced bytes.Buffer with +// FixedSizeRingBuf and seen memory consumption go from 8GB to 25MB. +// Yes, that is a 300x reduction in memory footprint. Everything ran +// faster too. +// +// Note that Bytes(), while inescapable at times, is expensive: avoid +// it if possible. Instead it is better to use the FixedSizeRingBuf.Readable +// member to get the number of bytes available. Bytes() is expensive because +// it may copy the back and then the front of a wrapped buffer A[Use] +// into A[1-Use] in order to get a contiguous slice. If possible use ContigLen() +// first to get the size that can be read without copying, Read() that +// amount, and then Read() a second time -- to avoid the copy. + +type FixedSizeRingBuf struct { + A [2][]byte // a pair of ping/pong buffers. Only one is active. + Use int // which A buffer is in active use, 0 or 1 + N int // MaxViewInBytes, the size of A[0] and A[1] in bytes. + Beg int // start of data in A[Use] + Readable int // number of bytes available to read in A[Use] + + OneMade bool // lazily instantiate the [1] buffer. If we never call Bytes(), + // we may never need it. If OneMade is false, the Use must be = 0. +} + +func (b *FixedSizeRingBuf) Make2ndBuffer() { + if b.OneMade { + return + } + b.A[1] = make([]byte, b.N, b.N) + b.OneMade = true +} + +// get the length of the largest read that we can provide to a contiguous slice +// without an extra linearizing copy of all bytes internally. +func (b *FixedSizeRingBuf) ContigLen() int { + extent := b.Beg + b.Readable + firstContigLen := intMin(extent, b.N) - b.Beg + return firstContigLen +} + +func NewFixedSizeRingBuf(maxViewInBytes int) *FixedSizeRingBuf { + n := maxViewInBytes + r := &FixedSizeRingBuf{ + Use: 0, // 0 or 1, whichever is actually in use at the moment. + // If we are asked for Bytes() and we wrap, linearize into the other. + + N: n, + Beg: 0, + Readable: 0, + OneMade: false, + } + r.A[0] = make([]byte, n, n) + + // r.A[1] initialized lazily now. + + return r +} + +// from the standard library description of Bytes(): +// Bytes() returns a slice of the contents of the unread portion of the buffer. +// If the caller changes the contents of the +// returned slice, the contents of the buffer will change provided there +// are no intervening method calls on the Buffer. +// +func (b *FixedSizeRingBuf) Bytes() []byte { + + extent := b.Beg + b.Readable + if extent <= b.N { + // we fit contiguously in this buffer without wrapping to the other + return b.A[b.Use][b.Beg:(b.Beg + b.Readable)] + } + + // wrap into the other buffer + b.Make2ndBuffer() + + src := b.Use + dest := 1 - b.Use + + n := copy(b.A[dest], b.A[src][b.Beg:]) + n += copy(b.A[dest][n:], b.A[src][0:(extent%b.N)]) + + b.Use = dest + b.Beg = 0 + + return b.A[b.Use][:n] +} + +// Read(): +// +// from bytes.Buffer.Read(): Read reads the next len(p) bytes +// from the buffer or until the buffer is drained. The return +// value n is the number of bytes read. If the buffer has no data +// to return, err is io.EOF (unless len(p) is zero); otherwise it is nil. +// +// from the description of the Reader interface, +// http://golang.org/pkg/io/#Reader +// +/* +Reader is the interface that wraps the basic Read method. + +Read reads up to len(p) bytes into p. It returns the number +of bytes read (0 <= n <= len(p)) and any error encountered. +Even if Read returns n < len(p), it may use all of p as scratch +space during the call. If some data is available but not +len(p) bytes, Read conventionally returns what is available +instead of waiting for more. + +When Read encounters an error or end-of-file condition after +successfully reading n > 0 bytes, it returns the number of bytes +read. It may return the (non-nil) error from the same call or +return the error (and n == 0) from a subsequent call. An instance +of this general case is that a Reader returning a non-zero number +of bytes at the end of the input stream may return +either err == EOF or err == nil. The next Read should +return 0, EOF regardless. + +Callers should always process the n > 0 bytes returned before +considering the error err. Doing so correctly handles I/O errors +that happen after reading some bytes and also both of the +allowed EOF behaviors. + +Implementations of Read are discouraged from returning a zero +byte count with a nil error, and callers should treat that +situation as a no-op. +*/ +// + +func (b *FixedSizeRingBuf) Read(p []byte) (n int, err error) { + return b.ReadAndMaybeAdvance(p, true) +} + +// if you want to Read the data and leave it in the buffer, so as +// to peek ahead for example. +func (b *FixedSizeRingBuf) ReadWithoutAdvance(p []byte) (n int, err error) { + return b.ReadAndMaybeAdvance(p, false) +} + +func (b *FixedSizeRingBuf) ReadAndMaybeAdvance(p []byte, doAdvance bool) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + if b.Readable == 0 { + return 0, io.EOF + } + extent := b.Beg + b.Readable + if extent <= b.N { + n += copy(p, b.A[b.Use][b.Beg:extent]) + } else { + n += copy(p, b.A[b.Use][b.Beg:b.N]) + if n < len(p) { + n += copy(p[n:], b.A[b.Use][0:(extent%b.N)]) + } + } + if doAdvance { + b.Advance(n) + } + return +} + +// +// Write writes len(p) bytes from p to the underlying data stream. +// It returns the number of bytes written from p (0 <= n <= len(p)) +// and any error encountered that caused the write to stop early. +// Write must return a non-nil error if it returns n < len(p). +// +func (b *FixedSizeRingBuf) Write(p []byte) (n int, err error) { + for { + if len(p) == 0 { + // nothing (left) to copy in; notice we shorten our + // local copy p (below) as we read from it. + return + } + + writeCapacity := b.N - b.Readable + if writeCapacity <= 0 { + // we are all full up already. + return n, io.ErrShortWrite + } + if len(p) > writeCapacity { + err = io.ErrShortWrite + // leave err set and + // keep going, write what we can. + } + + writeStart := (b.Beg + b.Readable) % b.N + + upperLim := intMin(writeStart+writeCapacity, b.N) + + k := copy(b.A[b.Use][writeStart:upperLim], p) + + n += k + b.Readable += k + p = p[k:] + + // we can fill from b.A[b.Use][0:something] from + // p's remainder, so loop + } +} + +// WriteTo and ReadFrom avoid intermediate allocation and copies. + +// WriteTo writes data to w until there's no more data to write +// or when an error occurs. The return value n is the number of +// bytes written. Any error encountered during the write is also returned. +func (b *FixedSizeRingBuf) WriteTo(w io.Writer) (n int64, err error) { + + if b.Readable == 0 { + return 0, io.EOF + } + + extent := b.Beg + b.Readable + firstWriteLen := intMin(extent, b.N) - b.Beg + secondWriteLen := b.Readable - firstWriteLen + if firstWriteLen > 0 { + m, e := w.Write(b.A[b.Use][b.Beg:(b.Beg + firstWriteLen)]) + n += int64(m) + b.Advance(m) + + if e != nil { + return n, e + } + // all bytes should have been written, by definition of + // Write method in io.Writer + if m != firstWriteLen { + return n, io.ErrShortWrite + } + } + if secondWriteLen > 0 { + m, e := w.Write(b.A[b.Use][0:secondWriteLen]) + n += int64(m) + b.Advance(m) + + if e != nil { + return n, e + } + // all bytes should have been written, by definition of + // Write method in io.Writer + if m != secondWriteLen { + return n, io.ErrShortWrite + } + } + + return n, nil +} + +// ReadFrom() reads data from r until EOF or error. The return value n +// is the number of bytes read. Any error except io.EOF encountered +// during the read is also returned. +func (b *FixedSizeRingBuf) ReadFrom(r io.Reader) (n int64, err error) { + for { + writeCapacity := b.N - b.Readable + if writeCapacity <= 0 { + // we are all full + return n, nil + } + writeStart := (b.Beg + b.Readable) % b.N + upperLim := intMin(writeStart+writeCapacity, b.N) + + m, e := r.Read(b.A[b.Use][writeStart:upperLim]) + n += int64(m) + b.Readable += m + if e == io.EOF { + return n, nil + } + if e != nil { + return n, e + } + } +} + +func (b *FixedSizeRingBuf) Reset() { + b.Beg = 0 + b.Readable = 0 + b.Use = 0 +} + +// Advance(): non-standard, but better than Next(), +// because we don't have to unwrap our buffer and pay the cpu time +// for the copy that unwrapping may need. +// Useful in conjuction/after ReadWithoutAdvance() above. +func (b *FixedSizeRingBuf) Advance(n int) { + if n <= 0 { + return + } + if n > b.Readable { + n = b.Readable + } + b.Readable -= n + b.Beg = (b.Beg + n) % b.N +} + +// Adopt(): non-standard. +// +// For efficiency's sake, (possibly) take ownership of +// already allocated slice offered in me. +// +// If me is large we will adopt it, and we will potentially then +// write to the me buffer. +// If we already have a bigger buffer, copy me into the existing +// buffer instead. +func (b *FixedSizeRingBuf) Adopt(me []byte) { + n := len(me) + if n > b.N { + b.A[0] = me + b.OneMade = false + b.N = n + b.Use = 0 + b.Beg = 0 + b.Readable = n + } else { + // we already have a larger buffer, reuse it. + copy(b.A[0], me) + b.Use = 0 + b.Beg = 0 + b.Readable = n + } +} + +func intMax(a, b int) int { + if a > b { + return a + } else { + return b + } +} + +func intMin(a, b int) int { + if a < b { + return a + } else { + return b + } +} + +// Get the (beg, end] indices of the tailing empty buffer of bytes slice that from that is free for writing. +// Note: not guaranteed to be zeroed. At all. +func (b *FixedSizeRingBuf) GetEndmostWritable() (beg int, end int) { + extent := b.Beg + b.Readable + if extent < b.N { + return extent, b.N + } + + return extent % b.N, b.Beg +} + +// Note: not guaranteed to be zeroed. +func (b *FixedSizeRingBuf) GetEndmostWritableSlice() []byte { + beg, e := b.GetEndmostWritable() + return b.A[b.Use][beg:e] +} diff --git a/vendor/github.com/glycerine/go-unsnap-stream/snap.go b/vendor/github.com/glycerine/go-unsnap-stream/snap.go new file mode 100644 index 0000000000..12a8d40b5b --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/snap.go @@ -0,0 +1,100 @@ +package unsnap + +import ( + "encoding/binary" + + // no c lib dependency + snappy "github.com/golang/snappy" + // or, use the C wrapper for speed + //snappy "github.com/dgryski/go-csnappy" +) + +// add Write() method for SnappyFile (see unsnap.go) + +// reference for snappy framing/streaming format: +// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt +// ?spec=svn68&r=71 + +// +// Write writes len(p) bytes from p to the underlying data stream. +// It returns the number of bytes written from p (0 <= n <= len(p)) and +// any error encountered that caused the write to stop early. Write +// must return a non-nil error if it returns n < len(p). +// +func (sf *SnappyFile) Write(p []byte) (n int, err error) { + + if sf.SnappyEncodeDecodeOff { + return sf.Writer.Write(p) + } + + if !sf.Writing { + panic("Writing on a read-only SnappyFile") + } + + // encoding in snappy can apparently go beyond the original size, beware. + // so our buffers must be sized 2*max snappy chunk => 2 * CHUNK_MAX(65536) + + sf.DecBuf.Reset() + sf.EncBuf.Reset() + + if !sf.HeaderChunkWritten { + sf.HeaderChunkWritten = true + _, err = sf.Writer.Write(SnappyStreamHeaderMagic) + if err != nil { + return + } + } + var chunk []byte + var chunk_type byte + var crc uint32 + + for len(p) > 0 { + + // chunk points to input p by default, unencoded input. + chunk = p[:IntMin(len(p), CHUNK_MAX)] + crc = masked_crc32c(chunk) + + writeme := chunk[:] + + // first write to EncBuf, as a temp, in case we want + // to discard and send uncompressed instead. + compressed_chunk := snappy.Encode(sf.EncBuf.GetEndmostWritableSlice(), chunk) + + if len(compressed_chunk) <= int((1-_COMPRESSION_THRESHOLD)*float64(len(chunk))) { + writeme = compressed_chunk + chunk_type = _COMPRESSED_CHUNK + } else { + // keep writeme pointing at original chunk (uncompressed) + chunk_type = _UNCOMPRESSED_CHUNK + } + + const crc32Sz = 4 + var tag32 uint32 = uint32(chunk_type) + (uint32(len(writeme)+crc32Sz) << 8) + + err = binary.Write(sf.Writer, binary.LittleEndian, tag32) + if err != nil { + return + } + + err = binary.Write(sf.Writer, binary.LittleEndian, crc) + if err != nil { + return + } + + _, err = sf.Writer.Write(writeme) + if err != nil { + return + } + + n += len(chunk) + p = p[len(chunk):] + } + return n, nil +} + +func IntMin(a int, b int) int { + if a < b { + return a + } + return b +} diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt b/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt new file mode 100644 index 0000000000..5f50279394 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt @@ -0,0 +1 @@ +hello_snappy diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt.snappy b/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt.snappy Binary files differnew file mode 100644 index 0000000000..ba45ecd426 --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/unenc.txt.snappy diff --git a/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go new file mode 100644 index 0000000000..8789445c9a --- /dev/null +++ b/vendor/github.com/glycerine/go-unsnap-stream/unsnap.go @@ -0,0 +1,513 @@ +package unsnap + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + "os" + + "hash/crc32" + + snappy "github.com/golang/snappy" + // The C library can be used, but this makes the binary dependent + // lots of extraneous c-libraries; it is no longer stand-alone. Yuck. + // + // Therefore we comment out the "dgryski/go-csnappy" path and use the + // "github.com/golang/snappy/snappy" above instead. If you are + // performance limited and can deal with distributing more libraries, + // then this is easy to swap. + // + // If you swap, note that some of the tests won't pass + // because snappy-go produces slightly different (but still + // conformant) encodings on some data. Here are bindings + // to the C-snappy: + // snappy "github.com/dgryski/go-csnappy" +) + +// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it + +type SnappyFile struct { + Fname string + + Reader io.Reader + Writer io.Writer + + // allow clients to substitute us for an os.File and just switch + // off compression if they don't want it. + SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep + + EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded + DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded + + // for writing to stream-framed snappy + HeaderChunkWritten bool + + // Sanity check: we can only read, or only write, to one SnappyFile. + // EncBuf and DecBuf are used differently in each mode. Verify + // that we are consistent with this flag. + Writing bool +} + +var total int + +// for debugging, show state of buffers +func (f *SnappyFile) Dump() { + fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes())) + fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes())) +} + +func (f *SnappyFile) Read(p []byte) (n int, err error) { + + if f.SnappyEncodeDecodeOff { + return f.Reader.Read(p) + } + + if f.Writing { + panic("Reading on a write-only SnappyFile") + } + + // before we unencrypt more, try to drain the DecBuf first + n, _ = f.DecBuf.Read(p) + if n > 0 { + total += n + return n, nil + } + + //nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname) + _, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname) + if err != nil && err != io.EOF { + panic(err) + } + + n, _ = f.DecBuf.Read(p) + + if n > 0 { + total += n + return n, nil + } + if f.DecBuf.Readable == 0 { + if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 { + // only now (when EncBuf is empty) can we give io.EOF. + // Any earlier, and we leave stuff un-decoded! + return 0, io.EOF + } + } + return 0, nil +} + +func Open(name string) (file *SnappyFile, err error) { + fp, err := os.Open(name) + if err != nil { + return nil, err + } + // encoding in snappy can apparently go beyond the original size, so + // we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536) + + snap := NewReader(fp) + snap.Fname = name + return snap, nil +} + +func NewReader(r io.Reader) *SnappyFile { + return &SnappyFile{ + Reader: r, + EncBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes + DecBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes + Writing: false, + } +} + +func NewWriter(w io.Writer) *SnappyFile { + return &SnappyFile{ + Writer: w, + EncBuf: *NewFixedSizeRingBuf(65536), // on writing: temp for testing compression + DecBuf: *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes + Writing: true, + } +} + +func Create(name string) (file *SnappyFile, err error) { + fp, err := os.Create(name) + if err != nil { + return nil, err + } + snap := NewWriter(fp) + snap.Fname = name + return snap, nil +} + +func (f *SnappyFile) Close() error { + if f.Writing { + wc, ok := f.Writer.(io.WriteCloser) + if ok { + return wc.Close() + } + return nil + } + rc, ok := f.Reader.(io.ReadCloser) + if ok { + return rc.Close() + } + return nil +} + +func (f *SnappyFile) Sync() error { + file, ok := f.Writer.(*os.File) + if ok { + return file.Sync() + } + return nil +} + +// for an increment of a frame at a time: +// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf +// the returned n: number of bytes read from the encrypted encBuf +func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) { + // b, err := ioutil.ReadAll(r) + // if err != nil { + // panic(err) + // } + + nEnc = 0 + nDec = 0 + + // read up to 65536 bytes from r into encBuf, at least a snappy frame + nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err + nEnc += nread + if err != nil { + if err == io.EOF { + if nread == 0 { + if encBuf.Readable == 0 { + return nEnc, nDec, io.EOF + } + // else we have bytes in encBuf, so decode them! + err = nil + } else { + // continue below, processing the nread bytes + err = nil + } + } else { + panic(err) + } + } + + // flag for printing chunk size alignment messages + verbose := false + + const snappyStreamHeaderSz = 10 + const headerSz = 4 + const crc32Sz = 4 + // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + + chunk := (*encBuf).Bytes() + + // however we exit, advance as + // defer func() { (*encBuf).Next(N) }() + + // 65536 is the max size of a snappy framed chunk. See + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91 + // buf := make([]byte, 65536) + + // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b) + // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut) + + //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14]) + + fourbytes := make([]byte, 4) + chunkCount := 0 + + for nDec < 65536 { + if len(chunk) == 0 { + break + } + chunkCount++ + fourbytes[3] = 0 + copy(fourbytes, chunk[1:4]) + chunksz := binary.LittleEndian.Uint32(fourbytes) + chunk_type := chunk[0] + + switch true { + case chunk_type == 0xff: + { // stream identifier + + streamHeader := chunk[:snappyStreamHeaderSz] + if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) { + panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.") + } else { + //fmt.Printf("got streaming snappy magic header just fine.\n") + } + chunk = chunk[snappyStreamHeaderSz:] + (*encBuf).Advance(snappyStreamHeaderSz) + nEnc += snappyStreamHeaderSz + continue + } + case chunk_type == 0x00: + { // compressed data + if verbose { + fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4) + } + + crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + dec, ok := snappy.Decode(nil, section) + if ok != nil { + // we've probably truncated a snappy frame at this point + // ok=snappy: corrupt input + // len(dec) == 0 + // + panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok)) + + // get back to caller with what we've got so far + return nEnc, nDec, nil + } + // fmt.Printf("ok, b is %#v , %#v\n", ok, dec) + + // spit out decoded text + // n, err := w.Write(dec) + //fmt.Printf("len(dec) = %d, outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable) + bnb := bytes.NewBuffer(dec) + n, err := io.Copy(outDecodedBuf, bnb) + if err != nil { + //fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable) + panic(err) + } + if n != int64(len(dec)) { + panic("could not write all bytes to outDecodedBuf") + } + nDec += n + + // verify the crc32 rotated checksum + m32 := masked_crc32c(dec) + if m32 != crc { + panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32)) + } else { + //fmt.Printf("\nchecksums match: %v == %v\n", crc, m32) + } + + // move to next header + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + (*encBuf).Advance(inc) + nEnc += int64(inc) + continue + } + case chunk_type == 0x01: + { // uncompressed data + + //n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))]) + n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))])) + if verbose { + //fmt.Printf("debug: n=%d err=%v chunksz=%d outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf) + } + if err != nil { + panic(err) + } + if n != int64(chunksz-crc32Sz) { + panic("could not write all bytes to stdout") + } + nDec += n + + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + (*encBuf).Advance(inc) + nEnc += int64(inc) + continue + } + case chunk_type == 0xfe: + fallthrough // padding, just skip it + case chunk_type >= 0x80 && chunk_type <= 0xfd: + { // Reserved skippable chunks + //fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc) + inc := (headerSz + int(chunksz)) + chunk = chunk[inc:] + nEnc += int64(inc) + (*encBuf).Advance(inc) + continue + } + + default: + panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type)) + } + + } // end for{} + + return nEnc, nDec, err + //return int64(N), nil +} + +// for whole file at once: +// +// receive on stdin a stream of bytes in the snappy-streaming framed +// format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt +// Grab each frame, run it through the snappy decoder, and spit out +// each frame all joined back-to-back on stdout. +// +func Unsnappy(r io.Reader, w io.Writer) (err error) { + b, err := ioutil.ReadAll(r) + if err != nil { + panic(err) + } + + // flag for printing chunk size alignment messages + verbose := false + + const snappyStreamHeaderSz = 10 + const headerSz = 4 + const crc32Sz = 4 + // the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + + chunk := b[:] + + // 65536 is the max size of a snappy framed chunk. See + // http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91 + //buf := make([]byte, 65536) + + // fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b) + // fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut) + + //fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14]) + + fourbytes := make([]byte, 4) + chunkCount := 0 + + for { + if len(chunk) == 0 { + break + } + chunkCount++ + fourbytes[3] = 0 + copy(fourbytes, chunk[1:4]) + chunksz := binary.LittleEndian.Uint32(fourbytes) + chunk_type := chunk[0] + + switch true { + case chunk_type == 0xff: + { // stream identifier + + streamHeader := chunk[:snappyStreamHeaderSz] + if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) { + panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.") + } else { + //fmt.Printf("got streaming snappy magic header just fine.\n") + } + chunk = chunk[snappyStreamHeaderSz:] + continue + } + case chunk_type == 0x00: + { // compressed data + if verbose { + fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4) + } + + //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + dec, ok := snappy.Decode(nil, section) + if ok != nil { + panic("could not decode snappy stream") + } + // fmt.Printf("ok, b is %#v , %#v\n", ok, dec) + + // spit out decoded text + n, err := w.Write(dec) + if err != nil { + panic(err) + } + if n != len(dec) { + panic("could not write all bytes to stdout") + } + + // TODO: verify the crc32 rotated checksum? + + // move to next header + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + case chunk_type == 0x01: + { // uncompressed data + + //crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)]) + section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)] + + n, err := w.Write(section) + if err != nil { + panic(err) + } + if n != int(chunksz-crc32Sz) { + panic("could not write all bytes to stdout") + } + + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + case chunk_type == 0xfe: + fallthrough // padding, just skip it + case chunk_type >= 0x80 && chunk_type <= 0xfd: + { // Reserved skippable chunks + chunk = chunk[(headerSz + int(chunksz)):] + continue + } + + default: + panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type)) + } + + } // end for{} + + return nil +} + +// 0xff 0x06 0x00 0x00 sNaPpY +var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59} + +const CHUNK_MAX = 65536 +const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAX +const _STREAM_IDENTIFIER = `sNaPpY` +const _COMPRESSED_CHUNK = 0x00 +const _UNCOMPRESSED_CHUNK = 0x01 +const _IDENTIFIER_CHUNK = 0xff +const _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive) +const _RESERVED_UNSKIPPABLE1 = 0x80 +const _RESERVED_SKIPPABLE0 = 0x80 +const _RESERVED_SKIPPABLE1 = 0xff + +// the minimum percent of bytes compression must save to be enabled in automatic +// mode +const _COMPRESSION_THRESHOLD = .125 + +var crctab *crc32.Table + +func init() { + crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python +} + +func masked_crc32c(data []byte) uint32 { + + // see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt + var crc uint32 = crc32.Checksum(data, crctab) + return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8) +} + +func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) { + + snappyFile, err := Open(filename) + if err != nil { + return []byte{}, err + } + + var bb bytes.Buffer + _, err = bb.ReadFrom(snappyFile) + if err == io.EOF { + err = nil + } + if err != nil { + panic(err) + } + + return bb.Bytes(), err +} diff --git a/vendor/github.com/mschoch/smat/LICENSE b/vendor/github.com/mschoch/smat/LICENSE new file mode 100644 index 0000000000..7a4a3ea242 --- /dev/null +++ b/vendor/github.com/mschoch/smat/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.
\ No newline at end of file diff --git a/vendor/github.com/mschoch/smat/README.md b/vendor/github.com/mschoch/smat/README.md new file mode 100644 index 0000000000..f5ca1c5440 --- /dev/null +++ b/vendor/github.com/mschoch/smat/README.md @@ -0,0 +1,166 @@ +# smat – State Machine Assisted Testing + +The concept is simple, describe valid uses of your library as states and actions. States describe which actions are possible, and with what probability they should occur. Actions mutate the context and transition to another state. + +By doing this, two things are possible: + +1. Use [go-fuzz](https://github.com/dvyukov/go-fuzz) to find/test interesting sequences of operations on your library. + +2. Automate longevity testing of your application by performing long sequences of valid operations. + +**NOTE**: both of these can also incorporate validation logic (not just failure detection by building validation into the state machine) + +## Status + +The API is still not stable. This is brand new and we'll probably change things we don't like... + +[![Build Status](https://travis-ci.org/mschoch/smat.svg?branch=master)](https://travis-ci.org/mschoch/smat) +[![Coverage Status](https://coveralls.io/repos/github/mschoch/smat/badge.svg?branch=master)](https://coveralls.io/github/mschoch/smat?branch=master) +[![GoDoc](https://godoc.org/github.com/mschoch/smat?status.svg)](https://godoc.org/github.com/mschoch/smat) +[![codebeat badge](https://codebeat.co/badges/c3ff6180-a241-4128-97f0-fa6bf6f48752)](https://codebeat.co/projects/github-com-mschoch-smat) +[![Go Report Card](https://goreportcard.com/badge/github.com/mschoch/smat)](https://goreportcard.com/report/github.com/mschoch/smat) + +## License + +Apache 2.0 + +## How do I use it? + +### smat.Context + +Choose a structure to keep track of any state. You pass in an instance of this when you start, and it will be passed to every action when it executes. The actions may mutate this context. + +For example, consider a database library, once you open a database handle, you need to use it inside of the other actions. So you might use a structure like: + +``` +type context struct { + db *DB +} +``` + +### smat.State + +A state represents a state that your application/library can be in, and the probabilities thats certain actions should be taken. + +For example, consider a database library, in a state where the database is open, there many things you can do. Let's consider just two right now, you can set a value, or you can delete a value. + +``` +func dbOpen(next byte) smat.ActionID { + return smat.PercentExecute(next, + smat.PercentAction{50, setValue}, + smat.PercentAction{50, deleteValue}, + ) +} +``` + +This says that in the open state, there are two valid actions, 50% of the time you should set a value and 50% of the time you should delete a value. **NOTE**: these percentages are just for characterizing the test workload. + +### smat.Action + +Actions are functions that do some work, optionally mutate the context, and indicate the next state to transition to. Below we see an example action to set value in a database. + +``` +func setValueFunc(ctx smat.Context) (next smat.State, err error) { + // type assert to our custom context type + context := ctx.(*context) + // perform the operation + err = context.db.Set("k", "v") + if err != nil { + return nil, err + } + // return the new state + return dbOpen, nil +} +``` + +### smat.ActionID and smat.ActionMap + +Actions are just functions, and since we can't compare functions in Go, we need to introduce an external identifier for them. This allows us to build a bi-directional mapping which we'll take advantage of later. + +``` +const ( + setup smat.ActionID = iota + teardown + setValue + deleteValue +) + +var actionMap = smat.ActionMap{ + setup: setupFunc, + teardown: teardownFunc, + setValue: setValueFunc, + deleteValue: deleteValueFunc, +} +``` + +### smat.ActionSeq + +A common way that many users think about a library is as a sequence of actions to be performed. Using the ActionID's that we've already seen we can build up sequences of operations. + +``` + actionSeq := smat.ActionSeq{ + open, + setValue, + setValue, + setValue, + } +``` + +Notice that we build these actions using the constants we defined above, and because of this we can have a bi-directional mapping between a stream of bytes (driving the state machine) and a sequence of actions to be performed. + +## Fuzzing + +We've built a lot of pieces, lets wire it up to go-fuzz. + +``` +func Fuzz(data []byte) int { + return smat.Fuzz(&context{}, setup, teardown, actionMap, data) +} +``` + +* The first argument is an instance of context structure. +* The second argument is the ActionID of our setup function. The setup function does not consume any of the input stream and is used to initialize the context and determine the start state. +* The third argument is the teardown function. This will be called unconditionally to clean up any resources associated with the test. +* The fourth argument is the actionMap which maps all ActionIDs to Actions. +* The fifth argument is the data passed in from the go-fuzz application. + +### Generating Initial go-fuzz Corpus + +Earlier we mentioned the bi-directional mapping between Actions and the byte stream driving the state machine. We can now leverage this to build the inital go-fuzz corpus. + +Using the `ActinSeq`s we learned about earlier we can build up a list of them as: + + var actionSeqs = []smat.ActionSeq{...} + +Then, we can write them out to disk using: + +``` +for i, actionSeq := range actionSeqs { + byteSequence, err := actionSeq.ByteEncoding(&context{}, setup, teardown, actionMap) + if err != nil { + // handle error + } + os.MkdirAll("workdir/corpus", 0700) + ioutil.WriteFile(fmt.Sprintf("workdir/corpus/%d", i), byteSequence, 0600) +} +``` + +You can then either put this into a test case or a main application depending on your needs. + +## Longevity Testing + +Fuzzing is great, but most of your corpus is likely to be shorter meaningful sequences. And go-fuzz works to find shortest sequences that cause problems, but sometimes you actually want to explore longer sequences that appear to go-fuzz as not triggering additional code coverage. + +For these cases we have another helper you can use: + +``` + Longevity(ctx, setup, teardown, actionMap, 0, closeChan) +``` + +The first four arguments are the same, the last two are: +* random seed used to ensure repeatable tests +* closeChan (chan struct{}) - close this channel if you want the function to stop and return ErrClosed, otherwise it will run forever + +## Examples + +See the examples directory for a working example that tests some BoltDB functionality. diff --git a/vendor/github.com/mschoch/smat/actionseq.go b/vendor/github.com/mschoch/smat/actionseq.go new file mode 100644 index 0000000000..6c8297f891 --- /dev/null +++ b/vendor/github.com/mschoch/smat/actionseq.go @@ -0,0 +1,61 @@ +// Copyright (c) 2016 Marty Schoch + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package smat + +// ActionSeq represents a sequence of actions, used for populating a corpus +// of byte sequences for the corresponding fuzz tests +type ActionSeq []ActionID + +// ByteEncoding runs the FSM to produce a byte sequence to trigger the +// desired action +func (a ActionSeq) ByteEncoding(ctx Context, setup, teardown ActionID, actionMap ActionMap) ([]byte, error) { + setupFunc, teardownFunc, err := actionMap.findSetupTeardown(setup, teardown) + if err != nil { + return nil, err + } + state, err := setupFunc(ctx) + if err != nil { + return nil, err + } + defer func() { + _, _ = teardownFunc(ctx) + }() + + var rv []byte + for _, actionID := range a { + b, err := probeStateForAction(state, actionID) + if err != nil { + return nil, err + } + rv = append(rv, b) + action, ok := actionMap[actionID] + if !ok { + continue + } + state, err = action(ctx) + if err != nil { + return nil, err + } + } + return rv, nil +} + +func probeStateForAction(state State, actionID ActionID) (byte, error) { + for i := 0; i < 256; i++ { + nextActionID := state(byte(i)) + if nextActionID == actionID { + return byte(i), nil + } + } + return 0, ErrActionNotPossible +} diff --git a/vendor/github.com/mschoch/smat/smat.go b/vendor/github.com/mschoch/smat/smat.go new file mode 100644 index 0000000000..f6ea4975f2 --- /dev/null +++ b/vendor/github.com/mschoch/smat/smat.go @@ -0,0 +1,161 @@ +// Copyright (c) 2016 Marty Schoch + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package smat + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "log" + "math/rand" +) + +// Logger is a configurable logger used by this package +// by default output is discarded +var Logger = log.New(ioutil.Discard, "smat ", log.LstdFlags) + +// Context is a container for any user state +type Context interface{} + +// State is a function which describes which action to perform in the event +// that a particular byte is seen +type State func(next byte) ActionID + +// PercentAction describes the frequency with which an action should occur +// for example: Action{Percent:10, Action:DonateMoney} means that 10% of +// the time you should donate money. +type PercentAction struct { + Percent int + Action ActionID +} + +// Action is any function which returns the next state to transition to +// it can optionally mutate the provided context object +// if any error occurs, it may return an error which will abort execution +type Action func(Context) (State, error) + +// ActionID is a unique identifier for an action +type ActionID int + +// NopAction does nothing and simply continues to the next input +var NopAction ActionID = -1 + +// ActionMap is a mapping form ActionID to Action +type ActionMap map[ActionID]Action + +func (a ActionMap) findSetupTeardown(setup, teardown ActionID) (Action, Action, error) { + setupFunc, ok := a[setup] + if !ok { + return nil, nil, ErrSetupMissing + } + teardownFunc, ok := a[teardown] + if !ok { + return nil, nil, ErrTeardownMissing + } + return setupFunc, teardownFunc, nil +} + +// Fuzz runs the fuzzing state machine with the provided context +// first, the setup action is executed unconditionally +// the start state is determined by this action +// actionMap is a lookup table for all actions +// the data byte slice determines all future state transitions +// finally, the teardown action is executed unconditionally for cleanup +func Fuzz(ctx Context, setup, teardown ActionID, actionMap ActionMap, data []byte) int { + reader := bytes.NewReader(data) + err := runReader(ctx, setup, teardown, actionMap, reader, nil) + if err != nil { + panic(err) + } + return 1 +} + +// Longevity runs the state machine with the provided context +// first, the setup action is executed unconditionally +// the start state is determined by this action +// actionMap is a lookup table for all actions +// random bytes are generated to determine all future state transitions +// finally, the teardown action is executed unconditionally for cleanup +func Longevity(ctx Context, setup, teardown ActionID, actionMap ActionMap, seed int64, closeChan chan struct{}) error { + source := rand.NewSource(seed) + return runReader(ctx, setup, teardown, actionMap, rand.New(source), closeChan) +} + +var ( + // ErrSetupMissing is returned when the setup action cannot be found + ErrSetupMissing = fmt.Errorf("setup action missing") + // ErrTeardownMissing is returned when the teardown action cannot be found + ErrTeardownMissing = fmt.Errorf("teardown action missing") + // ErrClosed is returned when the closeChan was closed to cancel the op + ErrClosed = fmt.Errorf("closed") + // ErrActionNotPossible is returned when an action is encountered in a + // FuzzCase that is not possible in the current state + ErrActionNotPossible = fmt.Errorf("action not possible in state") +) + +func runReader(ctx Context, setup, teardown ActionID, actionMap ActionMap, r io.Reader, closeChan chan struct{}) error { + setupFunc, teardownFunc, err := actionMap.findSetupTeardown(setup, teardown) + if err != nil { + return err + } + Logger.Printf("invoking setup action") + state, err := setupFunc(ctx) + if err != nil { + return err + } + defer func() { + Logger.Printf("invoking teardown action") + _, _ = teardownFunc(ctx) + }() + + reader := bufio.NewReader(r) + for next, err := reader.ReadByte(); err == nil; next, err = reader.ReadByte() { + select { + case <-closeChan: + return ErrClosed + default: + actionID := state(next) + action, ok := actionMap[actionID] + if !ok { + Logger.Printf("no such action defined, continuing") + continue + } + Logger.Printf("invoking action - %d", actionID) + state, err = action(ctx) + if err != nil { + Logger.Printf("it was action %d that returned err %v", actionID, err) + return err + } + } + } + return err +} + +// PercentExecute interprets the next byte as a random value and normalizes it +// to values 0-99, it then looks to see which action should be execued based +// on the action distributions +func PercentExecute(next byte, pas ...PercentAction) ActionID { + percent := int(99 * int(next) / 255) + + sofar := 0 + for _, pa := range pas { + sofar = sofar + pa.Percent + if percent < sofar { + return pa.Action + } + + } + return NopAction +} diff --git a/vendor/github.com/philhofer/fwd/LICENSE.md b/vendor/github.com/philhofer/fwd/LICENSE.md new file mode 100644 index 0000000000..1ac6a81f6a --- /dev/null +++ b/vendor/github.com/philhofer/fwd/LICENSE.md @@ -0,0 +1,7 @@ +Copyright (c) 2014-2015, Philip Hofer + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/github.com/philhofer/fwd/README.md b/vendor/github.com/philhofer/fwd/README.md new file mode 100644 index 0000000000..38349af34d --- /dev/null +++ b/vendor/github.com/philhofer/fwd/README.md @@ -0,0 +1,315 @@ + +# fwd + import "github.com/philhofer/fwd" + +The `fwd` package provides a buffered reader +and writer. Each has methods that help improve +the encoding/decoding performance of some binary +protocols. + +The `fwd.Writer` and `fwd.Reader` type provide similar +functionality to their counterparts in `bufio`, plus +a few extra utility methods that simplify read-ahead +and write-ahead. I wrote this package to improve serialization +performance for <a href="http://github.com/tinylib/msgp">http://github.com/tinylib/msgp</a>, +where it provided about a 2x speedup over `bufio` for certain +workloads. However, care must be taken to understand the semantics of the +extra methods provided by this package, as they allow +the user to access and manipulate the buffer memory +directly. + +The extra methods for `fwd.Reader` are `Peek`, `Skip` +and `Next`. `(*fwd.Reader).Peek`, unlike `(*bufio.Reader).Peek`, +will re-allocate the read buffer in order to accommodate arbitrarily +large read-ahead. `(*fwd.Reader).Skip` skips the next `n` bytes +in the stream, and uses the `io.Seeker` interface if the underlying +stream implements it. `(*fwd.Reader).Next` returns a slice pointing +to the next `n` bytes in the read buffer (like `Peek`), but also +increments the read position. This allows users to process streams +in arbitrary block sizes without having to manage appropriately-sized +slices. Additionally, obviating the need to copy the data from the +buffer to another location in memory can improve performance dramatically +in CPU-bound applications. + +`fwd.Writer` only has one extra method, which is `(*fwd.Writer).Next`, which +returns a slice pointing to the next `n` bytes of the writer, and increments +the write position by the length of the returned slice. This allows users +to write directly to the end of the buffer. + + + + +## Constants +``` go +const ( + // DefaultReaderSize is the default size of the read buffer + DefaultReaderSize = 2048 +) +``` +``` go +const ( + // DefaultWriterSize is the + // default write buffer size. + DefaultWriterSize = 2048 +) +``` + + + +## type Reader +``` go +type Reader struct { + // contains filtered or unexported fields +} +``` +Reader is a buffered look-ahead reader + + + + + + + + + +### func NewReader +``` go +func NewReader(r io.Reader) *Reader +``` +NewReader returns a new *Reader that reads from 'r' + + +### func NewReaderSize +``` go +func NewReaderSize(r io.Reader, n int) *Reader +``` +NewReaderSize returns a new *Reader that +reads from 'r' and has a buffer size 'n' + + + + +### func (\*Reader) BufferSize +``` go +func (r *Reader) BufferSize() int +``` +BufferSize returns the total size of the buffer + + + +### func (\*Reader) Buffered +``` go +func (r *Reader) Buffered() int +``` +Buffered returns the number of bytes currently in the buffer + + + +### func (\*Reader) Next +``` go +func (r *Reader) Next(n int) ([]byte, error) +``` +Next returns the next 'n' bytes in the stream. +Unlike Peek, Next advances the reader position. +The returned bytes point to the same +data as the buffer, so the slice is +only valid until the next reader method call. +An EOF is considered an unexpected error. +If an the returned slice is less than the +length asked for, an error will be returned, +and the reader position will not be incremented. + + + +### func (\*Reader) Peek +``` go +func (r *Reader) Peek(n int) ([]byte, error) +``` +Peek returns the next 'n' buffered bytes, +reading from the underlying reader if necessary. +It will only return a slice shorter than 'n' bytes +if it also returns an error. Peek does not advance +the reader. EOF errors are *not* returned as +io.ErrUnexpectedEOF. + + + +### func (\*Reader) Read +``` go +func (r *Reader) Read(b []byte) (int, error) +``` +Read implements `io.Reader` + + + +### func (\*Reader) ReadByte +``` go +func (r *Reader) ReadByte() (byte, error) +``` +ReadByte implements `io.ByteReader` + + + +### func (\*Reader) ReadFull +``` go +func (r *Reader) ReadFull(b []byte) (int, error) +``` +ReadFull attempts to read len(b) bytes into +'b'. It returns the number of bytes read into +'b', and an error if it does not return len(b). +EOF is considered an unexpected error. + + + +### func (\*Reader) Reset +``` go +func (r *Reader) Reset(rd io.Reader) +``` +Reset resets the underlying reader +and the read buffer. + + + +### func (\*Reader) Skip +``` go +func (r *Reader) Skip(n int) (int, error) +``` +Skip moves the reader forward 'n' bytes. +Returns the number of bytes skipped and any +errors encountered. It is analogous to Seek(n, 1). +If the underlying reader implements io.Seeker, then +that method will be used to skip forward. + +If the reader encounters +an EOF before skipping 'n' bytes, it +returns io.ErrUnexpectedEOF. If the +underlying reader implements io.Seeker, then +those rules apply instead. (Many implementations +will not return `io.EOF` until the next call +to Read.) + + + +### func (\*Reader) WriteTo +``` go +func (r *Reader) WriteTo(w io.Writer) (int64, error) +``` +WriteTo implements `io.WriterTo` + + + +## type Writer +``` go +type Writer struct { + // contains filtered or unexported fields +} +``` +Writer is a buffered writer + + + + + + + + + +### func NewWriter +``` go +func NewWriter(w io.Writer) *Writer +``` +NewWriter returns a new writer +that writes to 'w' and has a buffer +that is `DefaultWriterSize` bytes. + + +### func NewWriterSize +``` go +func NewWriterSize(w io.Writer, size int) *Writer +``` +NewWriterSize returns a new writer +that writes to 'w' and has a buffer +that is 'size' bytes. + + + + +### func (\*Writer) BufferSize +``` go +func (w *Writer) BufferSize() int +``` +BufferSize returns the maximum size of the buffer. + + + +### func (\*Writer) Buffered +``` go +func (w *Writer) Buffered() int +``` +Buffered returns the number of buffered bytes +in the reader. + + + +### func (\*Writer) Flush +``` go +func (w *Writer) Flush() error +``` +Flush flushes any buffered bytes +to the underlying writer. + + + +### func (\*Writer) Next +``` go +func (w *Writer) Next(n int) ([]byte, error) +``` +Next returns the next 'n' free bytes +in the write buffer, flushing the writer +as necessary. Next will return `io.ErrShortBuffer` +if 'n' is greater than the size of the write buffer. +Calls to 'next' increment the write position by +the size of the returned buffer. + + + +### func (\*Writer) ReadFrom +``` go +func (w *Writer) ReadFrom(r io.Reader) (int64, error) +``` +ReadFrom implements `io.ReaderFrom` + + + +### func (\*Writer) Write +``` go +func (w *Writer) Write(p []byte) (int, error) +``` +Write implements `io.Writer` + + + +### func (\*Writer) WriteByte +``` go +func (w *Writer) WriteByte(b byte) error +``` +WriteByte implements `io.ByteWriter` + + + +### func (\*Writer) WriteString +``` go +func (w *Writer) WriteString(s string) (int, error) +``` +WriteString is analogous to Write, but it takes a string. + + + + + + + + + +- - - +Generated by [godoc2md](http://godoc.org/github.com/davecheney/godoc2md)
\ No newline at end of file diff --git a/vendor/github.com/philhofer/fwd/reader.go b/vendor/github.com/philhofer/fwd/reader.go new file mode 100644 index 0000000000..75be62ab09 --- /dev/null +++ b/vendor/github.com/philhofer/fwd/reader.go @@ -0,0 +1,383 @@ +// The `fwd` package provides a buffered reader +// and writer. Each has methods that help improve +// the encoding/decoding performance of some binary +// protocols. +// +// The `fwd.Writer` and `fwd.Reader` type provide similar +// functionality to their counterparts in `bufio`, plus +// a few extra utility methods that simplify read-ahead +// and write-ahead. I wrote this package to improve serialization +// performance for http://github.com/tinylib/msgp, +// where it provided about a 2x speedup over `bufio` for certain +// workloads. However, care must be taken to understand the semantics of the +// extra methods provided by this package, as they allow +// the user to access and manipulate the buffer memory +// directly. +// +// The extra methods for `fwd.Reader` are `Peek`, `Skip` +// and `Next`. `(*fwd.Reader).Peek`, unlike `(*bufio.Reader).Peek`, +// will re-allocate the read buffer in order to accommodate arbitrarily +// large read-ahead. `(*fwd.Reader).Skip` skips the next `n` bytes +// in the stream, and uses the `io.Seeker` interface if the underlying +// stream implements it. `(*fwd.Reader).Next` returns a slice pointing +// to the next `n` bytes in the read buffer (like `Peek`), but also +// increments the read position. This allows users to process streams +// in arbitrary block sizes without having to manage appropriately-sized +// slices. Additionally, obviating the need to copy the data from the +// buffer to another location in memory can improve performance dramatically +// in CPU-bound applications. +// +// `fwd.Writer` only has one extra method, which is `(*fwd.Writer).Next`, which +// returns a slice pointing to the next `n` bytes of the writer, and increments +// the write position by the length of the returned slice. This allows users +// to write directly to the end of the buffer. +// +package fwd + +import "io" + +const ( + // DefaultReaderSize is the default size of the read buffer + DefaultReaderSize = 2048 + + // minimum read buffer; straight from bufio + minReaderSize = 16 +) + +// NewReader returns a new *Reader that reads from 'r' +func NewReader(r io.Reader) *Reader { + return NewReaderSize(r, DefaultReaderSize) +} + +// NewReaderSize returns a new *Reader that +// reads from 'r' and has a buffer size 'n' +func NewReaderSize(r io.Reader, n int) *Reader { + rd := &Reader{ + r: r, + data: make([]byte, 0, max(minReaderSize, n)), + } + if s, ok := r.(io.Seeker); ok { + rd.rs = s + } + return rd +} + +// Reader is a buffered look-ahead reader +type Reader struct { + r io.Reader // underlying reader + + // data[n:len(data)] is buffered data; data[len(data):cap(data)] is free buffer space + data []byte // data + n int // read offset + state error // last read error + + // if the reader past to NewReader was + // also an io.Seeker, this is non-nil + rs io.Seeker +} + +// Reset resets the underlying reader +// and the read buffer. +func (r *Reader) Reset(rd io.Reader) { + r.r = rd + r.data = r.data[0:0] + r.n = 0 + r.state = nil + if s, ok := rd.(io.Seeker); ok { + r.rs = s + } else { + r.rs = nil + } +} + +// more() does one read on the underlying reader +func (r *Reader) more() { + // move data backwards so that + // the read offset is 0; this way + // we can supply the maximum number of + // bytes to the reader + if r.n != 0 { + if r.n < len(r.data) { + r.data = r.data[:copy(r.data[0:], r.data[r.n:])] + } else { + r.data = r.data[:0] + } + r.n = 0 + } + var a int + a, r.state = r.r.Read(r.data[len(r.data):cap(r.data)]) + if a == 0 && r.state == nil { + r.state = io.ErrNoProgress + return + } else if a > 0 && r.state == io.EOF { + // discard the io.EOF if we read more than 0 bytes. + // the next call to Read should return io.EOF again. + r.state = nil + } + r.data = r.data[:len(r.data)+a] +} + +// pop error +func (r *Reader) err() (e error) { + e, r.state = r.state, nil + return +} + +// pop error; EOF -> io.ErrUnexpectedEOF +func (r *Reader) noEOF() (e error) { + e, r.state = r.state, nil + if e == io.EOF { + e = io.ErrUnexpectedEOF + } + return +} + +// buffered bytes +func (r *Reader) buffered() int { return len(r.data) - r.n } + +// Buffered returns the number of bytes currently in the buffer +func (r *Reader) Buffered() int { return len(r.data) - r.n } + +// BufferSize returns the total size of the buffer +func (r *Reader) BufferSize() int { return cap(r.data) } + +// Peek returns the next 'n' buffered bytes, +// reading from the underlying reader if necessary. +// It will only return a slice shorter than 'n' bytes +// if it also returns an error. Peek does not advance +// the reader. EOF errors are *not* returned as +// io.ErrUnexpectedEOF. +func (r *Reader) Peek(n int) ([]byte, error) { + // in the degenerate case, + // we may need to realloc + // (the caller asked for more + // bytes than the size of the buffer) + if cap(r.data) < n { + old := r.data[r.n:] + r.data = make([]byte, n+r.buffered()) + r.data = r.data[:copy(r.data, old)] + r.n = 0 + } + + // keep filling until + // we hit an error or + // read enough bytes + for r.buffered() < n && r.state == nil { + r.more() + } + + // we must have hit an error + if r.buffered() < n { + return r.data[r.n:], r.err() + } + + return r.data[r.n : r.n+n], nil +} + +// Skip moves the reader forward 'n' bytes. +// Returns the number of bytes skipped and any +// errors encountered. It is analogous to Seek(n, 1). +// If the underlying reader implements io.Seeker, then +// that method will be used to skip forward. +// +// If the reader encounters +// an EOF before skipping 'n' bytes, it +// returns io.ErrUnexpectedEOF. If the +// underlying reader implements io.Seeker, then +// those rules apply instead. (Many implementations +// will not return `io.EOF` until the next call +// to Read.) +func (r *Reader) Skip(n int) (int, error) { + + // fast path + if r.buffered() >= n { + r.n += n + return n, nil + } + + // use seeker implementation + // if we can + if r.rs != nil { + return r.skipSeek(n) + } + + // loop on filling + // and then erasing + o := n + for r.buffered() < n && r.state == nil { + r.more() + // we can skip forward + // up to r.buffered() bytes + step := min(r.buffered(), n) + r.n += step + n -= step + } + // at this point, n should be + // 0 if everything went smoothly + return o - n, r.noEOF() +} + +// Next returns the next 'n' bytes in the stream. +// Unlike Peek, Next advances the reader position. +// The returned bytes point to the same +// data as the buffer, so the slice is +// only valid until the next reader method call. +// An EOF is considered an unexpected error. +// If an the returned slice is less than the +// length asked for, an error will be returned, +// and the reader position will not be incremented. +func (r *Reader) Next(n int) ([]byte, error) { + + // in case the buffer is too small + if cap(r.data) < n { + old := r.data[r.n:] + r.data = make([]byte, n+r.buffered()) + r.data = r.data[:copy(r.data, old)] + r.n = 0 + } + + // fill at least 'n' bytes + for r.buffered() < n && r.state == nil { + r.more() + } + + if r.buffered() < n { + return r.data[r.n:], r.noEOF() + } + out := r.data[r.n : r.n+n] + r.n += n + return out, nil +} + +// skipSeek uses the io.Seeker to seek forward. +// only call this function when n > r.buffered() +func (r *Reader) skipSeek(n int) (int, error) { + o := r.buffered() + // first, clear buffer + n -= o + r.n = 0 + r.data = r.data[:0] + + // then seek forward remaning bytes + i, err := r.rs.Seek(int64(n), 1) + return int(i) + o, err +} + +// Read implements `io.Reader` +func (r *Reader) Read(b []byte) (int, error) { + // if we have data in the buffer, just + // return that. + if r.buffered() != 0 { + x := copy(b, r.data[r.n:]) + r.n += x + return x, nil + } + var n int + // we have no buffered data; determine + // whether or not to buffer or call + // the underlying reader directly + if len(b) >= cap(r.data) { + n, r.state = r.r.Read(b) + } else { + r.more() + n = copy(b, r.data) + r.n = n + } + if n == 0 { + return 0, r.err() + } + return n, nil +} + +// ReadFull attempts to read len(b) bytes into +// 'b'. It returns the number of bytes read into +// 'b', and an error if it does not return len(b). +// EOF is considered an unexpected error. +func (r *Reader) ReadFull(b []byte) (int, error) { + var n int // read into b + var nn int // scratch + l := len(b) + // either read buffered data, + // or read directly for the underlying + // buffer, or fetch more buffered data. + for n < l && r.state == nil { + if r.buffered() != 0 { + nn = copy(b[n:], r.data[r.n:]) + n += nn + r.n += nn + } else if l-n > cap(r.data) { + nn, r.state = r.r.Read(b[n:]) + n += nn + } else { + r.more() + } + } + if n < l { + return n, r.noEOF() + } + return n, nil +} + +// ReadByte implements `io.ByteReader` +func (r *Reader) ReadByte() (byte, error) { + for r.buffered() < 1 && r.state == nil { + r.more() + } + if r.buffered() < 1 { + return 0, r.err() + } + b := r.data[r.n] + r.n++ + return b, nil +} + +// WriteTo implements `io.WriterTo` +func (r *Reader) WriteTo(w io.Writer) (int64, error) { + var ( + i int64 + ii int + err error + ) + // first, clear buffer + if r.buffered() > 0 { + ii, err = w.Write(r.data[r.n:]) + i += int64(ii) + if err != nil { + return i, err + } + r.data = r.data[0:0] + r.n = 0 + } + for r.state == nil { + // here we just do + // 1:1 reads and writes + r.more() + if r.buffered() > 0 { + ii, err = w.Write(r.data) + i += int64(ii) + if err != nil { + return i, err + } + r.data = r.data[0:0] + r.n = 0 + } + } + if r.state != io.EOF { + return i, r.err() + } + return i, nil +} + +func min(a int, b int) int { + if a < b { + return a + } + return b +} + +func max(a int, b int) int { + if a < b { + return b + } + return a +} diff --git a/vendor/github.com/philhofer/fwd/writer.go b/vendor/github.com/philhofer/fwd/writer.go new file mode 100644 index 0000000000..2dc392a91b --- /dev/null +++ b/vendor/github.com/philhofer/fwd/writer.go @@ -0,0 +1,224 @@ +package fwd + +import "io" + +const ( + // DefaultWriterSize is the + // default write buffer size. + DefaultWriterSize = 2048 + + minWriterSize = minReaderSize +) + +// Writer is a buffered writer +type Writer struct { + w io.Writer // writer + buf []byte // 0:len(buf) is bufered data +} + +// NewWriter returns a new writer +// that writes to 'w' and has a buffer +// that is `DefaultWriterSize` bytes. +func NewWriter(w io.Writer) *Writer { + if wr, ok := w.(*Writer); ok { + return wr + } + return &Writer{ + w: w, + buf: make([]byte, 0, DefaultWriterSize), + } +} + +// NewWriterSize returns a new writer +// that writes to 'w' and has a buffer +// that is 'size' bytes. +func NewWriterSize(w io.Writer, size int) *Writer { + if wr, ok := w.(*Writer); ok && cap(wr.buf) >= size { + return wr + } + return &Writer{ + w: w, + buf: make([]byte, 0, max(size, minWriterSize)), + } +} + +// Buffered returns the number of buffered bytes +// in the reader. +func (w *Writer) Buffered() int { return len(w.buf) } + +// BufferSize returns the maximum size of the buffer. +func (w *Writer) BufferSize() int { return cap(w.buf) } + +// Flush flushes any buffered bytes +// to the underlying writer. +func (w *Writer) Flush() error { + l := len(w.buf) + if l > 0 { + n, err := w.w.Write(w.buf) + + // if we didn't write the whole + // thing, copy the unwritten + // bytes to the beginnning of the + // buffer. + if n < l && n > 0 { + w.pushback(n) + if err == nil { + err = io.ErrShortWrite + } + } + if err != nil { + return err + } + w.buf = w.buf[:0] + return nil + } + return nil +} + +// Write implements `io.Writer` +func (w *Writer) Write(p []byte) (int, error) { + c, l, ln := cap(w.buf), len(w.buf), len(p) + avail := c - l + + // requires flush + if avail < ln { + if err := w.Flush(); err != nil { + return 0, err + } + l = len(w.buf) + } + // too big to fit in buffer; + // write directly to w.w + if c < ln { + return w.w.Write(p) + } + + // grow buf slice; copy; return + w.buf = w.buf[:l+ln] + return copy(w.buf[l:], p), nil +} + +// WriteString is analogous to Write, but it takes a string. +func (w *Writer) WriteString(s string) (int, error) { + c, l, ln := cap(w.buf), len(w.buf), len(s) + avail := c - l + + // requires flush + if avail < ln { + if err := w.Flush(); err != nil { + return 0, err + } + l = len(w.buf) + } + // too big to fit in buffer; + // write directly to w.w + // + // yes, this is unsafe. *but* + // io.Writer is not allowed + // to mutate its input or + // maintain a reference to it, + // per the spec in package io. + // + // plus, if the string is really + // too big to fit in the buffer, then + // creating a copy to write it is + // expensive (and, strictly speaking, + // unnecessary) + if c < ln { + return w.w.Write(unsafestr(s)) + } + + // grow buf slice; copy; return + w.buf = w.buf[:l+ln] + return copy(w.buf[l:], s), nil +} + +// WriteByte implements `io.ByteWriter` +func (w *Writer) WriteByte(b byte) error { + if len(w.buf) == cap(w.buf) { + if err := w.Flush(); err != nil { + return err + } + } + w.buf = append(w.buf, b) + return nil +} + +// Next returns the next 'n' free bytes +// in the write buffer, flushing the writer +// as necessary. Next will return `io.ErrShortBuffer` +// if 'n' is greater than the size of the write buffer. +// Calls to 'next' increment the write position by +// the size of the returned buffer. +func (w *Writer) Next(n int) ([]byte, error) { + c, l := cap(w.buf), len(w.buf) + if n > c { + return nil, io.ErrShortBuffer + } + avail := c - l + if avail < n { + if err := w.Flush(); err != nil { + return nil, err + } + l = len(w.buf) + } + w.buf = w.buf[:l+n] + return w.buf[l:], nil +} + +// take the bytes from w.buf[n:len(w.buf)] +// and put them at the beginning of w.buf, +// and resize to the length of the copied segment. +func (w *Writer) pushback(n int) { + w.buf = w.buf[:copy(w.buf, w.buf[n:])] +} + +// ReadFrom implements `io.ReaderFrom` +func (w *Writer) ReadFrom(r io.Reader) (int64, error) { + // anticipatory flush + if err := w.Flush(); err != nil { + return 0, err + } + + w.buf = w.buf[0:cap(w.buf)] // expand buffer + + var nn int64 // written + var err error // error + var x int // read + + // 1:1 reads and writes + for err == nil { + x, err = r.Read(w.buf) + if x > 0 { + n, werr := w.w.Write(w.buf[:x]) + nn += int64(n) + + if err != nil { + if n < x && n > 0 { + w.pushback(n - x) + } + return nn, werr + } + if n < x { + w.pushback(n - x) + return nn, io.ErrShortWrite + } + } else if err == nil { + err = io.ErrNoProgress + break + } + } + if err != io.EOF { + return nn, err + } + + // we only clear here + // because we are sure + // the writes have + // succeeded. otherwise, + // we retain the data in case + // future writes succeed. + w.buf = w.buf[0:0] + + return nn, nil +} diff --git a/vendor/github.com/philhofer/fwd/writer_unsafe.go b/vendor/github.com/philhofer/fwd/writer_unsafe.go new file mode 100644 index 0000000000..a0bf453b39 --- /dev/null +++ b/vendor/github.com/philhofer/fwd/writer_unsafe.go @@ -0,0 +1,18 @@ +// +build !appengine + +package fwd + +import ( + "reflect" + "unsafe" +) + +// unsafe cast string as []byte +func unsafestr(b string) []byte { + l := len(b) + return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ + Len: l, + Cap: l, + Data: (*reflect.StringHeader)(unsafe.Pointer(&b)).Data, + })) +} diff --git a/vendor/github.com/tinylib/msgp/LICENSE b/vendor/github.com/tinylib/msgp/LICENSE new file mode 100644 index 0000000000..14d60424e8 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/LICENSE @@ -0,0 +1,8 @@ +Copyright (c) 2014 Philip Hofer +Portions Copyright (c) 2009 The Go Authors (license at http://golang.org) where indicated + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/github.com/tinylib/msgp/msgp/advise_linux.go b/vendor/github.com/tinylib/msgp/msgp/advise_linux.go new file mode 100644 index 0000000000..6c6bb37a5f --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/advise_linux.go @@ -0,0 +1,24 @@ +// +build linux,!appengine + +package msgp + +import ( + "os" + "syscall" +) + +func adviseRead(mem []byte) { + syscall.Madvise(mem, syscall.MADV_SEQUENTIAL|syscall.MADV_WILLNEED) +} + +func adviseWrite(mem []byte) { + syscall.Madvise(mem, syscall.MADV_SEQUENTIAL) +} + +func fallocate(f *os.File, sz int64) error { + err := syscall.Fallocate(int(f.Fd()), 0, 0, sz) + if err == syscall.ENOTSUP { + return f.Truncate(sz) + } + return err +} diff --git a/vendor/github.com/tinylib/msgp/msgp/advise_other.go b/vendor/github.com/tinylib/msgp/msgp/advise_other.go new file mode 100644 index 0000000000..da65ea5412 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/advise_other.go @@ -0,0 +1,17 @@ +// +build !linux appengine + +package msgp + +import ( + "os" +) + +// TODO: darwin, BSD support + +func adviseRead(mem []byte) {} + +func adviseWrite(mem []byte) {} + +func fallocate(f *os.File, sz int64) error { + return f.Truncate(sz) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/circular.go b/vendor/github.com/tinylib/msgp/msgp/circular.go new file mode 100644 index 0000000000..a0434c7ea1 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/circular.go @@ -0,0 +1,39 @@ +package msgp + +type timer interface { + StartTimer() + StopTimer() +} + +// EndlessReader is an io.Reader +// that loops over the same data +// endlessly. It is used for benchmarking. +type EndlessReader struct { + tb timer + data []byte + offset int +} + +// NewEndlessReader returns a new endless reader +func NewEndlessReader(b []byte, tb timer) *EndlessReader { + return &EndlessReader{tb: tb, data: b, offset: 0} +} + +// Read implements io.Reader. In practice, it +// always returns (len(p), nil), although it +// fills the supplied slice while the benchmark +// timer is stopped. +func (c *EndlessReader) Read(p []byte) (int, error) { + c.tb.StopTimer() + var n int + l := len(p) + m := len(c.data) + for n < l { + nn := copy(p[n:], c.data[c.offset:]) + n += nn + c.offset += nn + c.offset %= m + } + c.tb.StartTimer() + return n, nil +} diff --git a/vendor/github.com/tinylib/msgp/msgp/defs.go b/vendor/github.com/tinylib/msgp/msgp/defs.go new file mode 100644 index 0000000000..c634eef1df --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/defs.go @@ -0,0 +1,142 @@ +// This package is the support library for the msgp code generator (http://github.com/tinylib/msgp). +// +// This package defines the utilites used by the msgp code generator for encoding and decoding MessagePack +// from []byte and io.Reader/io.Writer types. Much of this package is devoted to helping the msgp code +// generator implement the Marshaler/Unmarshaler and Encodable/Decodable interfaces. +// +// This package defines four "families" of functions: +// - AppendXxxx() appends an object to a []byte in MessagePack encoding. +// - ReadXxxxBytes() reads an object from a []byte and returns the remaining bytes. +// - (*Writer).WriteXxxx() writes an object to the buffered *Writer type. +// - (*Reader).ReadXxxx() reads an object from a buffered *Reader type. +// +// Once a type has satisfied the `Encodable` and `Decodable` interfaces, +// it can be written and read from arbitrary `io.Writer`s and `io.Reader`s using +// msgp.Encode(io.Writer, msgp.Encodable) +// and +// msgp.Decode(io.Reader, msgp.Decodable) +// +// There are also methods for converting MessagePack to JSON without +// an explicit de-serialization step. +// +// For additional tips, tricks, and gotchas, please visit +// the wiki at http://github.com/tinylib/msgp +package msgp + +const last4 = 0x0f +const first4 = 0xf0 +const last5 = 0x1f +const first3 = 0xe0 +const last7 = 0x7f + +func isfixint(b byte) bool { + return b>>7 == 0 +} + +func isnfixint(b byte) bool { + return b&first3 == mnfixint +} + +func isfixmap(b byte) bool { + return b&first4 == mfixmap +} + +func isfixarray(b byte) bool { + return b&first4 == mfixarray +} + +func isfixstr(b byte) bool { + return b&first3 == mfixstr +} + +func wfixint(u uint8) byte { + return u & last7 +} + +func rfixint(b byte) uint8 { + return b +} + +func wnfixint(i int8) byte { + return byte(i) | mnfixint +} + +func rnfixint(b byte) int8 { + return int8(b) +} + +func rfixmap(b byte) uint8 { + return b & last4 +} + +func wfixmap(u uint8) byte { + return mfixmap | (u & last4) +} + +func rfixstr(b byte) uint8 { + return b & last5 +} + +func wfixstr(u uint8) byte { + return (u & last5) | mfixstr +} + +func rfixarray(b byte) uint8 { + return (b & last4) +} + +func wfixarray(u uint8) byte { + return (u & last4) | mfixarray +} + +// These are all the byte +// prefixes defined by the +// msgpack standard +const ( + // 0XXXXXXX + mfixint uint8 = 0x00 + + // 111XXXXX + mnfixint uint8 = 0xe0 + + // 1000XXXX + mfixmap uint8 = 0x80 + + // 1001XXXX + mfixarray uint8 = 0x90 + + // 101XXXXX + mfixstr uint8 = 0xa0 + + mnil uint8 = 0xc0 + mfalse uint8 = 0xc2 + mtrue uint8 = 0xc3 + mbin8 uint8 = 0xc4 + mbin16 uint8 = 0xc5 + mbin32 uint8 = 0xc6 + mext8 uint8 = 0xc7 + mext16 uint8 = 0xc8 + mext32 uint8 = 0xc9 + mfloat32 uint8 = 0xca + mfloat64 uint8 = 0xcb + muint8 uint8 = 0xcc + muint16 uint8 = 0xcd + muint32 uint8 = 0xce + muint64 uint8 = 0xcf + mint8 uint8 = 0xd0 + mint16 uint8 = 0xd1 + mint32 uint8 = 0xd2 + mint64 uint8 = 0xd3 + mfixext1 uint8 = 0xd4 + mfixext2 uint8 = 0xd5 + mfixext4 uint8 = 0xd6 + mfixext8 uint8 = 0xd7 + mfixext16 uint8 = 0xd8 + mstr8 uint8 = 0xd9 + mstr16 uint8 = 0xda + mstr32 uint8 = 0xdb + marray16 uint8 = 0xdc + marray32 uint8 = 0xdd + mmap16 uint8 = 0xde + mmap32 uint8 = 0xdf +) diff --git a/vendor/github.com/tinylib/msgp/msgp/edit.go b/vendor/github.com/tinylib/msgp/msgp/edit.go new file mode 100644 index 0000000000..b473a6f668 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/edit.go @@ -0,0 +1,242 @@ +package msgp + +import ( + "math" +) + +// Locate returns a []byte pointing to the field +// in a messagepack map with the provided key. (The returned []byte +// points to a sub-slice of 'raw'; Locate does no allocations.) If the +// key doesn't exist in the map, a zero-length []byte will be returned. +func Locate(key string, raw []byte) []byte { + s, n := locate(raw, key) + return raw[s:n] +} + +// Replace takes a key ("key") in a messagepack map ("raw") +// and replaces its value with the one provided and returns +// the new []byte. The returned []byte may point to the same +// memory as "raw". Replace makes no effort to evaluate the validity +// of the contents of 'val'. It may use up to the full capacity of 'raw.' +// Replace returns 'nil' if the field doesn't exist or if the object in 'raw' +// is not a map. +func Replace(key string, raw []byte, val []byte) []byte { + start, end := locate(raw, key) + if start == end { + return nil + } + return replace(raw, start, end, val, true) +} + +// CopyReplace works similarly to Replace except that the returned +// byte slice does not point to the same memory as 'raw'. CopyReplace +// returns 'nil' if the field doesn't exist or 'raw' isn't a map. +func CopyReplace(key string, raw []byte, val []byte) []byte { + start, end := locate(raw, key) + if start == end { + return nil + } + return replace(raw, start, end, val, false) +} + +// Remove removes a key-value pair from 'raw'. It returns +// 'raw' unchanged if the key didn't exist. +func Remove(key string, raw []byte) []byte { + start, end := locateKV(raw, key) + if start == end { + return raw + } + raw = raw[:start+copy(raw[start:], raw[end:])] + return resizeMap(raw, -1) +} + +// HasKey returns whether the map in 'raw' has +// a field with key 'key' +func HasKey(key string, raw []byte) bool { + sz, bts, err := ReadMapHeaderBytes(raw) + if err != nil { + return false + } + var field []byte + for i := uint32(0); i < sz; i++ { + field, bts, err = ReadStringZC(bts) + if err != nil { + return false + } + if UnsafeString(field) == key { + return true + } + } + return false +} + +func replace(raw []byte, start int, end int, val []byte, inplace bool) []byte { + ll := end - start // length of segment to replace + lv := len(val) + + if inplace { + extra := lv - ll + + // fastest case: we're doing + // a 1:1 replacement + if extra == 0 { + copy(raw[start:], val) + return raw + + } else if extra < 0 { + // 'val' smaller than replaced value + // copy in place and shift back + + x := copy(raw[start:], val) + y := copy(raw[start+x:], raw[end:]) + return raw[:start+x+y] + + } else if extra < cap(raw)-len(raw) { + // 'val' less than (cap-len) extra bytes + // copy in place and shift forward + raw = raw[0 : len(raw)+extra] + // shift end forward + copy(raw[end+extra:], raw[end:]) + copy(raw[start:], val) + return raw + } + } + + // we have to allocate new space + out := make([]byte, len(raw)+len(val)-ll) + x := copy(out, raw[:start]) + y := copy(out[x:], val) + copy(out[x+y:], raw[end:]) + return out +} + +// locate does a naive O(n) search for the map key; returns start, end +// (returns 0,0 on error) +func locate(raw []byte, key string) (start int, end int) { + var ( + sz uint32 + bts []byte + field []byte + err error + ) + sz, bts, err = ReadMapHeaderBytes(raw) + if err != nil { + return + } + + // loop and locate field + for i := uint32(0); i < sz; i++ { + field, bts, err = ReadStringZC(bts) + if err != nil { + return 0, 0 + } + if UnsafeString(field) == key { + // start location + l := len(raw) + start = l - len(bts) + bts, err = Skip(bts) + if err != nil { + return 0, 0 + } + end = l - len(bts) + return + } + bts, err = Skip(bts) + if err != nil { + return 0, 0 + } + } + return 0, 0 +} + +// locate key AND value +func locateKV(raw []byte, key string) (start int, end int) { + var ( + sz uint32 + bts []byte + field []byte + err error + ) + sz, bts, err = ReadMapHeaderBytes(raw) + if err != nil { + return 0, 0 + } + + for i := uint32(0); i < sz; i++ { + tmp := len(bts) + field, bts, err = ReadStringZC(bts) + if err != nil { + return 0, 0 + } + if UnsafeString(field) == key { + start = len(raw) - tmp + bts, err = Skip(bts) + if err != nil { + return 0, 0 + } + end = len(raw) - len(bts) + return + } + bts, err = Skip(bts) + if err != nil { + return 0, 0 + } + } + return 0, 0 +} + +// delta is delta on map size +func resizeMap(raw []byte, delta int64) []byte { + var sz int64 + switch raw[0] { + case mmap16: + sz = int64(big.Uint16(raw[1:])) + if sz+delta <= math.MaxUint16 { + big.PutUint16(raw[1:], uint16(sz+delta)) + return raw + } + if cap(raw)-len(raw) >= 2 { + raw = raw[0 : len(raw)+2] + copy(raw[5:], raw[3:]) + raw[0] = mmap32 + big.PutUint32(raw[1:], uint32(sz+delta)) + return raw + } + n := make([]byte, 0, len(raw)+5) + n = AppendMapHeader(n, uint32(sz+delta)) + return append(n, raw[3:]...) + + case mmap32: + sz = int64(big.Uint32(raw[1:])) + big.PutUint32(raw[1:], uint32(sz+delta)) + return raw + + default: + sz = int64(rfixmap(raw[0])) + if sz+delta < 16 { + raw[0] = wfixmap(uint8(sz + delta)) + return raw + } else if sz+delta <= math.MaxUint16 { + if cap(raw)-len(raw) >= 2 { + raw = raw[0 : len(raw)+2] + copy(raw[3:], raw[1:]) + raw[0] = mmap16 + big.PutUint16(raw[1:], uint16(sz+delta)) + return raw + } + n := make([]byte, 0, len(raw)+5) + n = AppendMapHeader(n, uint32(sz+delta)) + return append(n, raw[1:]...) + } + if cap(raw)-len(raw) >= 4 { + raw = raw[0 : len(raw)+4] + copy(raw[5:], raw[1:]) + raw[0] = mmap32 + big.PutUint32(raw[1:], uint32(sz+delta)) + return raw + } + n := make([]byte, 0, len(raw)+5) + n = AppendMapHeader(n, uint32(sz+delta)) + return append(n, raw[1:]...) + } +} diff --git a/vendor/github.com/tinylib/msgp/msgp/elsize.go b/vendor/github.com/tinylib/msgp/msgp/elsize.go new file mode 100644 index 0000000000..95762e7eeb --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/elsize.go @@ -0,0 +1,99 @@ +package msgp + +// size of every object on the wire, +// plus type information. gives us +// constant-time type information +// for traversing composite objects. +// +var sizes = [256]bytespec{ + mnil: {size: 1, extra: constsize, typ: NilType}, + mfalse: {size: 1, extra: constsize, typ: BoolType}, + mtrue: {size: 1, extra: constsize, typ: BoolType}, + mbin8: {size: 2, extra: extra8, typ: BinType}, + mbin16: {size: 3, extra: extra16, typ: BinType}, + mbin32: {size: 5, extra: extra32, typ: BinType}, + mext8: {size: 3, extra: extra8, typ: ExtensionType}, + mext16: {size: 4, extra: extra16, typ: ExtensionType}, + mext32: {size: 6, extra: extra32, typ: ExtensionType}, + mfloat32: {size: 5, extra: constsize, typ: Float32Type}, + mfloat64: {size: 9, extra: constsize, typ: Float64Type}, + muint8: {size: 2, extra: constsize, typ: UintType}, + muint16: {size: 3, extra: constsize, typ: UintType}, + muint32: {size: 5, extra: constsize, typ: UintType}, + muint64: {size: 9, extra: constsize, typ: UintType}, + mint8: {size: 2, extra: constsize, typ: IntType}, + mint16: {size: 3, extra: constsize, typ: IntType}, + mint32: {size: 5, extra: constsize, typ: IntType}, + mint64: {size: 9, extra: constsize, typ: IntType}, + mfixext1: {size: 3, extra: constsize, typ: ExtensionType}, + mfixext2: {size: 4, extra: constsize, typ: ExtensionType}, + mfixext4: {size: 6, extra: constsize, typ: ExtensionType}, + mfixext8: {size: 10, extra: constsize, typ: ExtensionType}, + mfixext16: {size: 18, extra: constsize, typ: ExtensionType}, + mstr8: {size: 2, extra: extra8, typ: StrType}, + mstr16: {size: 3, extra: extra16, typ: StrType}, + mstr32: {size: 5, extra: extra32, typ: StrType}, + marray16: {size: 3, extra: array16v, typ: ArrayType}, + marray32: {size: 5, extra: array32v, typ: ArrayType}, + mmap16: {size: 3, extra: map16v, typ: MapType}, + mmap32: {size: 5, extra: map32v, typ: MapType}, +} + +func init() { + // set up fixed fields + + // fixint + for i := mfixint; i < 0x80; i++ { + sizes[i] = bytespec{size: 1, extra: constsize, typ: IntType} + } + + // nfixint + for i := uint16(mnfixint); i < 0x100; i++ { + sizes[uint8(i)] = bytespec{size: 1, extra: constsize, typ: IntType} + } + + // fixstr gets constsize, + // since the prefix yields the size + for i := mfixstr; i < 0xc0; i++ { + sizes[i] = bytespec{size: 1 + rfixstr(i), extra: constsize, typ: StrType} + } + + // fixmap + for i := mfixmap; i < 0x90; i++ { + sizes[i] = bytespec{size: 1, extra: varmode(2 * rfixmap(i)), typ: MapType} + } + + // fixarray + for i := mfixarray; i < 0xa0; i++ { + sizes[i] = bytespec{size: 1, extra: varmode(rfixarray(i)), typ: ArrayType} + } +} + +// a valid bytespsec has +// non-zero 'size' and +// non-zero 'typ' +type bytespec struct { + size uint8 // prefix size information + extra varmode // extra size information + typ Type // type + _ byte // makes bytespec 4 bytes (yes, this matters) +} + +// size mode +// if positive, # elements for composites +type varmode int8 + +const ( + constsize varmode = 0 // constant size (size bytes + uint8(varmode) objects) + extra8 = -1 // has uint8(p[1]) extra bytes + extra16 = -2 // has be16(p[1:]) extra bytes + extra32 = -3 // has be32(p[1:]) extra bytes + map16v = -4 // use map16 + map32v = -5 // use map32 + array16v = -6 // use array16 + array32v = -7 // use array32 +) + +func getType(v byte) Type { + return sizes[v].typ +} diff --git a/vendor/github.com/tinylib/msgp/msgp/errors.go b/vendor/github.com/tinylib/msgp/msgp/errors.go new file mode 100644 index 0000000000..8f197267e4 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/errors.go @@ -0,0 +1,157 @@ +package msgp + +import ( + "fmt" + "reflect" +) + +var ( + // ErrShortBytes is returned when the + // slice being decoded is too short to + // contain the contents of the message + ErrShortBytes error = errShort{} + + // this error is only returned + // if we reach code that should + // be unreachable + fatal error = errFatal{} +) + +// Error is the interface satisfied +// by all of the errors that originate +// from this package. +type Error interface { + error + + // Resumable returns whether + // or not the error means that + // the stream of data is malformed + // and the information is unrecoverable. + Resumable() bool +} + +type errShort struct{} + +func (e errShort) Error() string { return "msgp: too few bytes left to read object" } +func (e errShort) Resumable() bool { return false } + +type errFatal struct{} + +func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" } +func (f errFatal) Resumable() bool { return false } + +// ArrayError is an error returned +// when decoding a fix-sized array +// of the wrong size +type ArrayError struct { + Wanted uint32 + Got uint32 +} + +// Error implements the error interface +func (a ArrayError) Error() string { + return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) +} + +// Resumable is always 'true' for ArrayErrors +func (a ArrayError) Resumable() bool { return true } + +// IntOverflow is returned when a call +// would downcast an integer to a type +// with too few bits to hold its value. +type IntOverflow struct { + Value int64 // the value of the integer + FailedBitsize int // the bit size that the int64 could not fit into +} + +// Error implements the error interface +func (i IntOverflow) Error() string { + return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) +} + +// Resumable is always 'true' for overflows +func (i IntOverflow) Resumable() bool { return true } + +// UintOverflow is returned when a call +// would downcast an unsigned integer to a type +// with too few bits to hold its value +type UintOverflow struct { + Value uint64 // value of the uint + FailedBitsize int // the bit size that couldn't fit the value +} + +// Error implements the error interface +func (u UintOverflow) Error() string { + return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) +} + +// Resumable is always 'true' for overflows +func (u UintOverflow) Resumable() bool { return true } + +// UintBelowZero is returned when a call +// would cast a signed integer below zero +// to an unsigned integer. +type UintBelowZero struct { + Value int64 // value of the incoming int +} + +// Error implements the error interface +func (u UintBelowZero) Error() string { + return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) +} + +// Resumable is always 'true' for overflows +func (u UintBelowZero) Resumable() bool { return true } + +// A TypeError is returned when a particular +// decoding method is unsuitable for decoding +// a particular MessagePack value. +type TypeError struct { + Method Type // Type expected by method + Encoded Type // Type actually encoded +} + +// Error implements the error interface +func (t TypeError) Error() string { + return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) +} + +// Resumable returns 'true' for TypeErrors +func (t TypeError) Resumable() bool { return true } + +// returns either InvalidPrefixError or +// TypeError depending on whether or not +// the prefix is recognized +func badPrefix(want Type, lead byte) error { + t := sizes[lead].typ + if t == InvalidType { + return InvalidPrefixError(lead) + } + return TypeError{Method: want, Encoded: t} +} + +// InvalidPrefixError is returned when a bad encoding +// uses a prefix that is not recognized in the MessagePack standard. +// This kind of error is unrecoverable. +type InvalidPrefixError byte + +// Error implements the error interface +func (i InvalidPrefixError) Error() string { + return fmt.Sprintf("msgp: unrecognized type prefix 0x%x", byte(i)) +} + +// Resumable returns 'false' for InvalidPrefixErrors +func (i InvalidPrefixError) Resumable() bool { return false } + +// ErrUnsupportedType is returned +// when a bad argument is supplied +// to a function that takes `interface{}`. +type ErrUnsupportedType struct { + T reflect.Type +} + +// Error implements error +func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) } + +// Resumable returns 'true' for ErrUnsupportedType +func (e *ErrUnsupportedType) Resumable() bool { return true } diff --git a/vendor/github.com/tinylib/msgp/msgp/extension.go b/vendor/github.com/tinylib/msgp/msgp/extension.go new file mode 100644 index 0000000000..0b31dcdb7b --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/extension.go @@ -0,0 +1,549 @@ +package msgp + +import ( + "fmt" + "math" +) + +const ( + // Complex64Extension is the extension number used for complex64 + Complex64Extension = 3 + + // Complex128Extension is the extension number used for complex128 + Complex128Extension = 4 + + // TimeExtension is the extension number used for time.Time + TimeExtension = 5 +) + +// our extensions live here +var extensionReg = make(map[int8]func() Extension) + +// RegisterExtension registers extensions so that they +// can be initialized and returned by methods that +// decode `interface{}` values. This should only +// be called during initialization. f() should return +// a newly-initialized zero value of the extension. Keep in +// mind that extensions 3, 4, and 5 are reserved for +// complex64, complex128, and time.Time, respectively, +// and that MessagePack reserves extension types from -127 to -1. +// +// For example, if you wanted to register a user-defined struct: +// +// msgp.RegisterExtension(10, func() msgp.Extension { &MyExtension{} }) +// +// RegisterExtension will panic if you call it multiple times +// with the same 'typ' argument, or if you use a reserved +// type (3, 4, or 5). +func RegisterExtension(typ int8, f func() Extension) { + switch typ { + case Complex64Extension, Complex128Extension, TimeExtension: + panic(fmt.Sprint("msgp: forbidden extension type:", typ)) + } + if _, ok := extensionReg[typ]; ok { + panic(fmt.Sprint("msgp: RegisterExtension() called with typ", typ, "more than once")) + } + extensionReg[typ] = f +} + +// ExtensionTypeError is an error type returned +// when there is a mis-match between an extension type +// and the type encoded on the wire +type ExtensionTypeError struct { + Got int8 + Want int8 +} + +// Error implements the error interface +func (e ExtensionTypeError) Error() string { + return fmt.Sprintf("msgp: error decoding extension: wanted type %d; got type %d", e.Want, e.Got) +} + +// Resumable returns 'true' for ExtensionTypeErrors +func (e ExtensionTypeError) Resumable() bool { return true } + +func errExt(got int8, wanted int8) error { + return ExtensionTypeError{Got: got, Want: wanted} +} + +// Extension is the interface fulfilled +// by types that want to define their +// own binary encoding. +type Extension interface { + // ExtensionType should return + // a int8 that identifies the concrete + // type of the extension. (Types <0 are + // officially reserved by the MessagePack + // specifications.) + ExtensionType() int8 + + // Len should return the length + // of the data to be encoded + Len() int + + // MarshalBinaryTo should copy + // the data into the supplied slice, + // assuming that the slice has length Len() + MarshalBinaryTo([]byte) error + + UnmarshalBinary([]byte) error +} + +// RawExtension implements the Extension interface +type RawExtension struct { + Data []byte + Type int8 +} + +// ExtensionType implements Extension.ExtensionType, and returns r.Type +func (r *RawExtension) ExtensionType() int8 { return r.Type } + +// Len implements Extension.Len, and returns len(r.Data) +func (r *RawExtension) Len() int { return len(r.Data) } + +// MarshalBinaryTo implements Extension.MarshalBinaryTo, +// and returns a copy of r.Data +func (r *RawExtension) MarshalBinaryTo(d []byte) error { + copy(d, r.Data) + return nil +} + +// UnmarshalBinary implements Extension.UnmarshalBinary, +// and sets r.Data to the contents of the provided slice +func (r *RawExtension) UnmarshalBinary(b []byte) error { + if cap(r.Data) >= len(b) { + r.Data = r.Data[0:len(b)] + } else { + r.Data = make([]byte, len(b)) + } + copy(r.Data, b) + return nil +} + +// WriteExtension writes an extension type to the writer +func (mw *Writer) WriteExtension(e Extension) error { + l := e.Len() + var err error + switch l { + case 0: + o, err := mw.require(3) + if err != nil { + return err + } + mw.buf[o] = mext8 + mw.buf[o+1] = 0 + mw.buf[o+2] = byte(e.ExtensionType()) + case 1: + o, err := mw.require(2) + if err != nil { + return err + } + mw.buf[o] = mfixext1 + mw.buf[o+1] = byte(e.ExtensionType()) + case 2: + o, err := mw.require(2) + if err != nil { + return err + } + mw.buf[o] = mfixext2 + mw.buf[o+1] = byte(e.ExtensionType()) + case 4: + o, err := mw.require(2) + if err != nil { + return err + } + mw.buf[o] = mfixext4 + mw.buf[o+1] = byte(e.ExtensionType()) + case 8: + o, err := mw.require(2) + if err != nil { + return err + } + mw.buf[o] = mfixext8 + mw.buf[o+1] = byte(e.ExtensionType()) + case 16: + o, err := mw.require(2) + if err != nil { + return err + } + mw.buf[o] = mfixext16 + mw.buf[o+1] = byte(e.ExtensionType()) + default: + switch { + case l < math.MaxUint8: + o, err := mw.require(3) + if err != nil { + return err + } + mw.buf[o] = mext8 + mw.buf[o+1] = byte(uint8(l)) + mw.buf[o+2] = byte(e.ExtensionType()) + case l < math.MaxUint16: + o, err := mw.require(4) + if err != nil { + return err + } + mw.buf[o] = mext16 + big.PutUint16(mw.buf[o+1:], uint16(l)) + mw.buf[o+3] = byte(e.ExtensionType()) + default: + o, err := mw.require(6) + if err != nil { + return err + } + mw.buf[o] = mext32 + big.PutUint32(mw.buf[o+1:], uint32(l)) + mw.buf[o+5] = byte(e.ExtensionType()) + } + } + // we can only write directly to the + // buffer if we're sure that it + // fits the object + if l <= mw.bufsize() { + o, err := mw.require(l) + if err != nil { + return err + } + return e.MarshalBinaryTo(mw.buf[o:]) + } + // here we create a new buffer + // just large enough for the body + // and save it as the write buffer + err = mw.flush() + if err != nil { + return err + } + buf := make([]byte, l) + err = e.MarshalBinaryTo(buf) + if err != nil { + return err + } + mw.buf = buf + mw.wloc = l + return nil +} + +// peek at the extension type, assuming the next +// kind to be read is Extension +func (m *Reader) peekExtensionType() (int8, error) { + p, err := m.R.Peek(2) + if err != nil { + return 0, err + } + spec := sizes[p[0]] + if spec.typ != ExtensionType { + return 0, badPrefix(ExtensionType, p[0]) + } + if spec.extra == constsize { + return int8(p[1]), nil + } + size := spec.size + p, err = m.R.Peek(int(size)) + if err != nil { + return 0, err + } + return int8(p[size-1]), nil +} + +// peekExtension peeks at the extension encoding type +// (must guarantee at least 1 byte in 'b') +func peekExtension(b []byte) (int8, error) { + spec := sizes[b[0]] + size := spec.size + if spec.typ != ExtensionType { + return 0, badPrefix(ExtensionType, b[0]) + } + if len(b) < int(size) { + return 0, ErrShortBytes + } + // for fixed extensions, + // the type information is in + // the second byte + if spec.extra == constsize { + return int8(b[1]), nil + } + // otherwise, it's in the last + // part of the prefix + return int8(b[size-1]), nil +} + +// ReadExtension reads the next object from the reader +// as an extension. ReadExtension will fail if the next +// object in the stream is not an extension, or if +// e.Type() is not the same as the wire type. +func (m *Reader) ReadExtension(e Extension) (err error) { + var p []byte + p, err = m.R.Peek(2) + if err != nil { + return + } + lead := p[0] + var read int + var off int + switch lead { + case mfixext1: + if int8(p[1]) != e.ExtensionType() { + err = errExt(int8(p[1]), e.ExtensionType()) + return + } + p, err = m.R.Peek(3) + if err != nil { + return + } + err = e.UnmarshalBinary(p[2:]) + if err == nil { + _, err = m.R.Skip(3) + } + return + + case mfixext2: + if int8(p[1]) != e.ExtensionType() { + err = errExt(int8(p[1]), e.ExtensionType()) + return + } + p, err = m.R.Peek(4) + if err != nil { + return + } + err = e.UnmarshalBinary(p[2:]) + if err == nil { + _, err = m.R.Skip(4) + } + return + + case mfixext4: + if int8(p[1]) != e.ExtensionType() { + err = errExt(int8(p[1]), e.ExtensionType()) + return + } + p, err = m.R.Peek(6) + if err != nil { + return + } + err = e.UnmarshalBinary(p[2:]) + if err == nil { + _, err = m.R.Skip(6) + } + return + + case mfixext8: + if int8(p[1]) != e.ExtensionType() { + err = errExt(int8(p[1]), e.ExtensionType()) + return + } + p, err = m.R.Peek(10) + if err != nil { + return + } + err = e.UnmarshalBinary(p[2:]) + if err == nil { + _, err = m.R.Skip(10) + } + return + + case mfixext16: + if int8(p[1]) != e.ExtensionType() { + err = errExt(int8(p[1]), e.ExtensionType()) + return + } + p, err = m.R.Peek(18) + if err != nil { + return + } + err = e.UnmarshalBinary(p[2:]) + if err == nil { + _, err = m.R.Skip(18) + } + return + + case mext8: + p, err = m.R.Peek(3) + if err != nil { + return + } + if int8(p[2]) != e.ExtensionType() { + err = errExt(int8(p[2]), e.ExtensionType()) + return + } + read = int(uint8(p[1])) + off = 3 + + case mext16: + p, err = m.R.Peek(4) + if err != nil { + return + } + if int8(p[3]) != e.ExtensionType() { + err = errExt(int8(p[3]), e.ExtensionType()) + return + } + read = int(big.Uint16(p[1:])) + off = 4 + + case mext32: + p, err = m.R.Peek(6) + if err != nil { + return + } + if int8(p[5]) != e.ExtensionType() { + err = errExt(int8(p[5]), e.ExtensionType()) + return + } + read = int(big.Uint32(p[1:])) + off = 6 + + default: + err = badPrefix(ExtensionType, lead) + return + } + + p, err = m.R.Peek(read + off) + if err != nil { + return + } + err = e.UnmarshalBinary(p[off:]) + if err == nil { + _, err = m.R.Skip(read + off) + } + return +} + +// AppendExtension appends a MessagePack extension to the provided slice +func AppendExtension(b []byte, e Extension) ([]byte, error) { + l := e.Len() + var o []byte + var n int + switch l { + case 0: + o, n = ensure(b, 3) + o[n] = mext8 + o[n+1] = 0 + o[n+2] = byte(e.ExtensionType()) + return o[:n+3], nil + case 1: + o, n = ensure(b, 3) + o[n] = mfixext1 + o[n+1] = byte(e.ExtensionType()) + n += 2 + case 2: + o, n = ensure(b, 4) + o[n] = mfixext2 + o[n+1] = byte(e.ExtensionType()) + n += 2 + case 4: + o, n = ensure(b, 6) + o[n] = mfixext4 + o[n+1] = byte(e.ExtensionType()) + n += 2 + case 8: + o, n = ensure(b, 10) + o[n] = mfixext8 + o[n+1] = byte(e.ExtensionType()) + n += 2 + case 16: + o, n = ensure(b, 18) + o[n] = mfixext16 + o[n+1] = byte(e.ExtensionType()) + n += 2 + default: + switch { + case l < math.MaxUint8: + o, n = ensure(b, l+3) + o[n] = mext8 + o[n+1] = byte(uint8(l)) + o[n+2] = byte(e.ExtensionType()) + n += 3 + case l < math.MaxUint16: + o, n = ensure(b, l+4) + o[n] = mext16 + big.PutUint16(o[n+1:], uint16(l)) + o[n+3] = byte(e.ExtensionType()) + n += 4 + default: + o, n = ensure(b, l+6) + o[n] = mext32 + big.PutUint32(o[n+1:], uint32(l)) + o[n+5] = byte(e.ExtensionType()) + n += 6 + } + } + return o, e.MarshalBinaryTo(o[n:]) +} + +// ReadExtensionBytes reads an extension from 'b' into 'e' +// and returns any remaining bytes. +// Possible errors: +// - ErrShortBytes ('b' not long enough) +// - ExtensionTypeErorr{} (wire type not the same as e.Type()) +// - TypeErorr{} (next object not an extension) +// - InvalidPrefixError +// - An umarshal error returned from e.UnmarshalBinary +func ReadExtensionBytes(b []byte, e Extension) ([]byte, error) { + l := len(b) + if l < 3 { + return b, ErrShortBytes + } + lead := b[0] + var ( + sz int // size of 'data' + off int // offset of 'data' + typ int8 + ) + switch lead { + case mfixext1: + typ = int8(b[1]) + sz = 1 + off = 2 + case mfixext2: + typ = int8(b[1]) + sz = 2 + off = 2 + case mfixext4: + typ = int8(b[1]) + sz = 4 + off = 2 + case mfixext8: + typ = int8(b[1]) + sz = 8 + off = 2 + case mfixext16: + typ = int8(b[1]) + sz = 16 + off = 2 + case mext8: + sz = int(uint8(b[1])) + typ = int8(b[2]) + off = 3 + if sz == 0 { + return b[3:], e.UnmarshalBinary(b[3:3]) + } + case mext16: + if l < 4 { + return b, ErrShortBytes + } + sz = int(big.Uint16(b[1:])) + typ = int8(b[3]) + off = 4 + case mext32: + if l < 6 { + return b, ErrShortBytes + } + sz = int(big.Uint32(b[1:])) + typ = int8(b[5]) + off = 6 + default: + return b, badPrefix(ExtensionType, lead) + } + + if typ != e.ExtensionType() { + return b, errExt(typ, e.ExtensionType()) + } + + // the data of the extension starts + // at 'off' and is 'sz' bytes long + if len(b[off:]) < sz { + return b, ErrShortBytes + } + tot := off + sz + return b[tot:], e.UnmarshalBinary(b[off:tot]) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/file.go b/vendor/github.com/tinylib/msgp/msgp/file.go new file mode 100644 index 0000000000..8e7370ebc2 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/file.go @@ -0,0 +1,92 @@ +// +build linux darwin dragonfly freebsd netbsd openbsd +// +build !appengine + +package msgp + +import ( + "os" + "syscall" +) + +// ReadFile reads a file into 'dst' using +// a read-only memory mapping. Consequently, +// the file must be mmap-able, and the +// Unmarshaler should never write to +// the source memory. (Methods generated +// by the msgp tool obey that constraint, but +// user-defined implementations may not.) +// +// Reading and writing through file mappings +// is only efficient for large files; small +// files are best read and written using +// the ordinary streaming interfaces. +// +func ReadFile(dst Unmarshaler, file *os.File) error { + stat, err := file.Stat() + if err != nil { + return err + } + data, err := syscall.Mmap(int(file.Fd()), 0, int(stat.Size()), syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + return err + } + adviseRead(data) + _, err = dst.UnmarshalMsg(data) + uerr := syscall.Munmap(data) + if err == nil { + err = uerr + } + return err +} + +// MarshalSizer is the combination +// of the Marshaler and Sizer +// interfaces. +type MarshalSizer interface { + Marshaler + Sizer +} + +// WriteFile writes a file from 'src' using +// memory mapping. It overwrites the entire +// contents of the previous file. +// The mapping size is calculated +// using the `Msgsize()` method +// of 'src', so it must produce a result +// equal to or greater than the actual encoded +// size of the object. Otherwise, +// a fault (SIGBUS) will occur. +// +// Reading and writing through file mappings +// is only efficient for large files; small +// files are best read and written using +// the ordinary streaming interfaces. +// +// NOTE: The performance of this call +// is highly OS- and filesystem-dependent. +// Users should take care to test that this +// performs as expected in a production environment. +// (Linux users should run a kernel and filesystem +// that support fallocate(2) for the best results.) +func WriteFile(src MarshalSizer, file *os.File) error { + sz := src.Msgsize() + err := fallocate(file, int64(sz)) + if err != nil { + return err + } + data, err := syscall.Mmap(int(file.Fd()), 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) + if err != nil { + return err + } + adviseWrite(data) + chunk := data[:0] + chunk, err = src.MarshalMsg(chunk) + if err != nil { + return err + } + uerr := syscall.Munmap(data) + if uerr != nil { + return uerr + } + return file.Truncate(int64(len(chunk))) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/file_port.go b/vendor/github.com/tinylib/msgp/msgp/file_port.go new file mode 100644 index 0000000000..6e654dbdc2 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/file_port.go @@ -0,0 +1,47 @@ +// +build windows appengine + +package msgp + +import ( + "io/ioutil" + "os" +) + +// MarshalSizer is the combination +// of the Marshaler and Sizer +// interfaces. +type MarshalSizer interface { + Marshaler + Sizer +} + +func ReadFile(dst Unmarshaler, file *os.File) error { + if u, ok := dst.(Decodable); ok { + return u.DecodeMsg(NewReader(file)) + } + + data, err := ioutil.ReadAll(file) + if err != nil { + return err + } + _, err = dst.UnmarshalMsg(data) + return err +} + +func WriteFile(src MarshalSizer, file *os.File) error { + if e, ok := src.(Encodable); ok { + w := NewWriter(file) + err := e.EncodeMsg(w) + if err == nil { + err = w.Flush() + } + return err + } + + raw, err := src.MarshalMsg(nil) + if err != nil { + return err + } + _, err = file.Write(raw) + return err +} diff --git a/vendor/github.com/tinylib/msgp/msgp/integers.go b/vendor/github.com/tinylib/msgp/msgp/integers.go new file mode 100644 index 0000000000..f817d77598 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/integers.go @@ -0,0 +1,174 @@ +package msgp + +/* ---------------------------------- + integer encoding utilities + (inline-able) + + TODO(tinylib): there are faster, + albeit non-portable solutions + to the code below. implement + byteswap? + ---------------------------------- */ + +func putMint64(b []byte, i int64) { + b[0] = mint64 + b[1] = byte(i >> 56) + b[2] = byte(i >> 48) + b[3] = byte(i >> 40) + b[4] = byte(i >> 32) + b[5] = byte(i >> 24) + b[6] = byte(i >> 16) + b[7] = byte(i >> 8) + b[8] = byte(i) +} + +func getMint64(b []byte) int64 { + return (int64(b[1]) << 56) | (int64(b[2]) << 48) | + (int64(b[3]) << 40) | (int64(b[4]) << 32) | + (int64(b[5]) << 24) | (int64(b[6]) << 16) | + (int64(b[7]) << 8) | (int64(b[8])) +} + +func putMint32(b []byte, i int32) { + b[0] = mint32 + b[1] = byte(i >> 24) + b[2] = byte(i >> 16) + b[3] = byte(i >> 8) + b[4] = byte(i) +} + +func getMint32(b []byte) int32 { + return (int32(b[1]) << 24) | (int32(b[2]) << 16) | (int32(b[3]) << 8) | (int32(b[4])) +} + +func putMint16(b []byte, i int16) { + b[0] = mint16 + b[1] = byte(i >> 8) + b[2] = byte(i) +} + +func getMint16(b []byte) (i int16) { + return (int16(b[1]) << 8) | int16(b[2]) +} + +func putMint8(b []byte, i int8) { + b[0] = mint8 + b[1] = byte(i) +} + +func getMint8(b []byte) (i int8) { + return int8(b[1]) +} + +func putMuint64(b []byte, u uint64) { + b[0] = muint64 + b[1] = byte(u >> 56) + b[2] = byte(u >> 48) + b[3] = byte(u >> 40) + b[4] = byte(u >> 32) + b[5] = byte(u >> 24) + b[6] = byte(u >> 16) + b[7] = byte(u >> 8) + b[8] = byte(u) +} + +func getMuint64(b []byte) uint64 { + return (uint64(b[1]) << 56) | (uint64(b[2]) << 48) | + (uint64(b[3]) << 40) | (uint64(b[4]) << 32) | + (uint64(b[5]) << 24) | (uint64(b[6]) << 16) | + (uint64(b[7]) << 8) | (uint64(b[8])) +} + +func putMuint32(b []byte, u uint32) { + b[0] = muint32 + b[1] = byte(u >> 24) + b[2] = byte(u >> 16) + b[3] = byte(u >> 8) + b[4] = byte(u) +} + +func getMuint32(b []byte) uint32 { + return (uint32(b[1]) << 24) | (uint32(b[2]) << 16) | (uint32(b[3]) << 8) | (uint32(b[4])) +} + +func putMuint16(b []byte, u uint16) { + b[0] = muint16 + b[1] = byte(u >> 8) + b[2] = byte(u) +} + +func getMuint16(b []byte) uint16 { + return (uint16(b[1]) << 8) | uint16(b[2]) +} + +func putMuint8(b []byte, u uint8) { + b[0] = muint8 + b[1] = byte(u) +} + +func getMuint8(b []byte) uint8 { + return uint8(b[1]) +} + +func getUnix(b []byte) (sec int64, nsec int32) { + sec = (int64(b[0]) << 56) | (int64(b[1]) << 48) | + (int64(b[2]) << 40) | (int64(b[3]) << 32) | + (int64(b[4]) << 24) | (int64(b[5]) << 16) | + (int64(b[6]) << 8) | (int64(b[7])) + + nsec = (int32(b[8]) << 24) | (int32(b[9]) << 16) | (int32(b[10]) << 8) | (int32(b[11])) + return +} + +func putUnix(b []byte, sec int64, nsec int32) { + b[0] = byte(sec >> 56) + b[1] = byte(sec >> 48) + b[2] = byte(sec >> 40) + b[3] = byte(sec >> 32) + b[4] = byte(sec >> 24) + b[5] = byte(sec >> 16) + b[6] = byte(sec >> 8) + b[7] = byte(sec) + b[8] = byte(nsec >> 24) + b[9] = byte(nsec >> 16) + b[10] = byte(nsec >> 8) + b[11] = byte(nsec) +} + +/* ----------------------------- + prefix utilities + ----------------------------- */ + +// write prefix and uint8 +func prefixu8(b []byte, pre byte, sz uint8) { + b[0] = pre + b[1] = byte(sz) +} + +// write prefix and big-endian uint16 +func prefixu16(b []byte, pre byte, sz uint16) { + b[0] = pre + b[1] = byte(sz >> 8) + b[2] = byte(sz) +} + +// write prefix and big-endian uint32 +func prefixu32(b []byte, pre byte, sz uint32) { + b[0] = pre + b[1] = byte(sz >> 24) + b[2] = byte(sz >> 16) + b[3] = byte(sz >> 8) + b[4] = byte(sz) +} + +func prefixu64(b []byte, pre byte, sz uint64) { + b[0] = pre + b[1] = byte(sz >> 56) + b[2] = byte(sz >> 48) + b[3] = byte(sz >> 40) + b[4] = byte(sz >> 32) + b[5] = byte(sz >> 24) + b[6] = byte(sz >> 16) + b[7] = byte(sz >> 8) + b[8] = byte(sz) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/json.go b/vendor/github.com/tinylib/msgp/msgp/json.go new file mode 100644 index 0000000000..4325860ada --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/json.go @@ -0,0 +1,542 @@ +package msgp + +import ( + "bufio" + "encoding/base64" + "encoding/json" + "io" + "strconv" + "unicode/utf8" +) + +var ( + null = []byte("null") + hex = []byte("0123456789abcdef") +) + +var defuns [_maxtype]func(jsWriter, *Reader) (int, error) + +// note: there is an initialization loop if +// this isn't set up during init() +func init() { + // since none of these functions are inline-able, + // there is not much of a penalty to the indirect + // call. however, this is best expressed as a jump-table... + defuns = [_maxtype]func(jsWriter, *Reader) (int, error){ + StrType: rwString, + BinType: rwBytes, + MapType: rwMap, + ArrayType: rwArray, + Float64Type: rwFloat64, + Float32Type: rwFloat32, + BoolType: rwBool, + IntType: rwInt, + UintType: rwUint, + NilType: rwNil, + ExtensionType: rwExtension, + Complex64Type: rwExtension, + Complex128Type: rwExtension, + TimeType: rwTime, + } +} + +// this is the interface +// used to write json +type jsWriter interface { + io.Writer + io.ByteWriter + WriteString(string) (int, error) +} + +// CopyToJSON reads MessagePack from 'src' and copies it +// as JSON to 'dst' until EOF. +func CopyToJSON(dst io.Writer, src io.Reader) (n int64, err error) { + r := NewReader(src) + n, err = r.WriteToJSON(dst) + freeR(r) + return +} + +// WriteToJSON translates MessagePack from 'r' and writes it as +// JSON to 'w' until the underlying reader returns io.EOF. It returns +// the number of bytes written, and an error if it stopped before EOF. +func (r *Reader) WriteToJSON(w io.Writer) (n int64, err error) { + var j jsWriter + var bf *bufio.Writer + if jsw, ok := w.(jsWriter); ok { + j = jsw + } else { + bf = bufio.NewWriter(w) + j = bf + } + var nn int + for err == nil { + nn, err = rwNext(j, r) + n += int64(nn) + } + if err != io.EOF { + if bf != nil { + bf.Flush() + } + return + } + err = nil + if bf != nil { + err = bf.Flush() + } + return +} + +func rwNext(w jsWriter, src *Reader) (int, error) { + t, err := src.NextType() + if err != nil { + return 0, err + } + return defuns[t](w, src) +} + +func rwMap(dst jsWriter, src *Reader) (n int, err error) { + var comma bool + var sz uint32 + var field []byte + + sz, err = src.ReadMapHeader() + if err != nil { + return + } + + if sz == 0 { + return dst.WriteString("{}") + } + + err = dst.WriteByte('{') + if err != nil { + return + } + n++ + var nn int + for i := uint32(0); i < sz; i++ { + if comma { + err = dst.WriteByte(',') + if err != nil { + return + } + n++ + } + + field, err = src.ReadMapKeyPtr() + if err != nil { + return + } + nn, err = rwquoted(dst, field) + n += nn + if err != nil { + return + } + + err = dst.WriteByte(':') + if err != nil { + return + } + n++ + nn, err = rwNext(dst, src) + n += nn + if err != nil { + return + } + if !comma { + comma = true + } + } + + err = dst.WriteByte('}') + if err != nil { + return + } + n++ + return +} + +func rwArray(dst jsWriter, src *Reader) (n int, err error) { + err = dst.WriteByte('[') + if err != nil { + return + } + var sz uint32 + var nn int + sz, err = src.ReadArrayHeader() + if err != nil { + return + } + comma := false + for i := uint32(0); i < sz; i++ { + if comma { + err = dst.WriteByte(',') + if err != nil { + return + } + n++ + } + nn, err = rwNext(dst, src) + n += nn + if err != nil { + return + } + comma = true + } + + err = dst.WriteByte(']') + if err != nil { + return + } + n++ + return +} + +func rwNil(dst jsWriter, src *Reader) (int, error) { + err := src.ReadNil() + if err != nil { + return 0, err + } + return dst.Write(null) +} + +func rwFloat32(dst jsWriter, src *Reader) (int, error) { + f, err := src.ReadFloat32() + if err != nil { + return 0, err + } + src.scratch = strconv.AppendFloat(src.scratch[:0], float64(f), 'f', -1, 64) + return dst.Write(src.scratch) +} + +func rwFloat64(dst jsWriter, src *Reader) (int, error) { + f, err := src.ReadFloat64() + if err != nil { + return 0, err + } + src.scratch = strconv.AppendFloat(src.scratch[:0], f, 'f', -1, 32) + return dst.Write(src.scratch) +} + +func rwInt(dst jsWriter, src *Reader) (int, error) { + i, err := src.ReadInt64() + if err != nil { + return 0, err + } + src.scratch = strconv.AppendInt(src.scratch[:0], i, 10) + return dst.Write(src.scratch) +} + +func rwUint(dst jsWriter, src *Reader) (int, error) { + u, err := src.ReadUint64() + if err != nil { + return 0, err + } + src.scratch = strconv.AppendUint(src.scratch[:0], u, 10) + return dst.Write(src.scratch) +} + +func rwBool(dst jsWriter, src *Reader) (int, error) { + b, err := src.ReadBool() + if err != nil { + return 0, err + } + if b { + return dst.WriteString("true") + } + return dst.WriteString("false") +} + +func rwTime(dst jsWriter, src *Reader) (int, error) { + t, err := src.ReadTime() + if err != nil { + return 0, err + } + bts, err := t.MarshalJSON() + if err != nil { + return 0, err + } + return dst.Write(bts) +} + +func rwExtension(dst jsWriter, src *Reader) (n int, err error) { + et, err := src.peekExtensionType() + if err != nil { + return 0, err + } + + // registered extensions can override + // the JSON encoding + if j, ok := extensionReg[et]; ok { + var bts []byte + e := j() + err = src.ReadExtension(e) + if err != nil { + return + } + bts, err = json.Marshal(e) + if err != nil { + return + } + return dst.Write(bts) + } + + e := RawExtension{} + e.Type = et + err = src.ReadExtension(&e) + if err != nil { + return + } + + var nn int + err = dst.WriteByte('{') + if err != nil { + return + } + n++ + + nn, err = dst.WriteString(`"type:"`) + n += nn + if err != nil { + return + } + + src.scratch = strconv.AppendInt(src.scratch[0:0], int64(e.Type), 10) + nn, err = dst.Write(src.scratch) + n += nn + if err != nil { + return + } + + nn, err = dst.WriteString(`,"data":"`) + n += nn + if err != nil { + return + } + + enc := base64.NewEncoder(base64.StdEncoding, dst) + + nn, err = enc.Write(e.Data) + n += nn + if err != nil { + return + } + err = enc.Close() + if err != nil { + return + } + nn, err = dst.WriteString(`"}`) + n += nn + return +} + +func rwString(dst jsWriter, src *Reader) (n int, err error) { + var p []byte + p, err = src.R.Peek(1) + if err != nil { + return + } + lead := p[0] + var read int + + if isfixstr(lead) { + read = int(rfixstr(lead)) + src.R.Skip(1) + goto write + } + + switch lead { + case mstr8: + p, err = src.R.Next(2) + if err != nil { + return + } + read = int(uint8(p[1])) + case mstr16: + p, err = src.R.Next(3) + if err != nil { + return + } + read = int(big.Uint16(p[1:])) + case mstr32: + p, err = src.R.Next(5) + if err != nil { + return + } + read = int(big.Uint32(p[1:])) + default: + err = badPrefix(StrType, lead) + return + } +write: + p, err = src.R.Next(read) + if err != nil { + return + } + n, err = rwquoted(dst, p) + return +} + +func rwBytes(dst jsWriter, src *Reader) (n int, err error) { + var nn int + err = dst.WriteByte('"') + if err != nil { + return + } + n++ + src.scratch, err = src.ReadBytes(src.scratch[:0]) + if err != nil { + return + } + enc := base64.NewEncoder(base64.StdEncoding, dst) + nn, err = enc.Write(src.scratch) + n += nn + if err != nil { + return + } + err = enc.Close() + if err != nil { + return + } + err = dst.WriteByte('"') + if err != nil { + return + } + n++ + return +} + +// Below (c) The Go Authors, 2009-2014 +// Subject to the BSD-style license found at http://golang.org +// +// see: encoding/json/encode.go:(*encodeState).stringbytes() +func rwquoted(dst jsWriter, s []byte) (n int, err error) { + var nn int + err = dst.WriteByte('"') + if err != nil { + return + } + n++ + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { + i++ + continue + } + if start < i { + nn, err = dst.Write(s[start:i]) + n += nn + if err != nil { + return + } + } + switch b { + case '\\', '"': + err = dst.WriteByte('\\') + if err != nil { + return + } + n++ + err = dst.WriteByte(b) + if err != nil { + return + } + n++ + case '\n': + err = dst.WriteByte('\\') + if err != nil { + return + } + n++ + err = dst.WriteByte('n') + if err != nil { + return + } + n++ + case '\r': + err = dst.WriteByte('\\') + if err != nil { + return + } + n++ + err = dst.WriteByte('r') + if err != nil { + return + } + n++ + default: + nn, err = dst.WriteString(`\u00`) + n += nn + if err != nil { + return + } + err = dst.WriteByte(hex[b>>4]) + if err != nil { + return + } + n++ + err = dst.WriteByte(hex[b&0xF]) + if err != nil { + return + } + n++ + } + i++ + start = i + continue + } + c, size := utf8.DecodeRune(s[i:]) + if c == utf8.RuneError && size == 1 { + if start < i { + nn, err = dst.Write(s[start:i]) + n += nn + if err != nil { + return + } + nn, err = dst.WriteString(`\ufffd`) + n += nn + if err != nil { + return + } + i += size + start = i + continue + } + } + if c == '\u2028' || c == '\u2029' { + if start < i { + nn, err = dst.Write(s[start:i]) + n += nn + if err != nil { + return + } + nn, err = dst.WriteString(`\u202`) + n += nn + if err != nil { + return + } + err = dst.WriteByte(hex[c&0xF]) + if err != nil { + return + } + n++ + } + } + i += size + } + if start < len(s) { + nn, err = dst.Write(s[start:]) + n += nn + if err != nil { + return + } + } + err = dst.WriteByte('"') + if err != nil { + return + } + n++ + return +} diff --git a/vendor/github.com/tinylib/msgp/msgp/json_bytes.go b/vendor/github.com/tinylib/msgp/msgp/json_bytes.go new file mode 100644 index 0000000000..438caf5392 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/json_bytes.go @@ -0,0 +1,363 @@ +package msgp + +import ( + "bufio" + "encoding/base64" + "encoding/json" + "io" + "strconv" + "time" +) + +var unfuns [_maxtype]func(jsWriter, []byte, []byte) ([]byte, []byte, error) + +func init() { + + // NOTE(pmh): this is best expressed as a jump table, + // but gc doesn't do that yet. revisit post-go1.5. + unfuns = [_maxtype]func(jsWriter, []byte, []byte) ([]byte, []byte, error){ + StrType: rwStringBytes, + BinType: rwBytesBytes, + MapType: rwMapBytes, + ArrayType: rwArrayBytes, + Float64Type: rwFloat64Bytes, + Float32Type: rwFloat32Bytes, + BoolType: rwBoolBytes, + IntType: rwIntBytes, + UintType: rwUintBytes, + NilType: rwNullBytes, + ExtensionType: rwExtensionBytes, + Complex64Type: rwExtensionBytes, + Complex128Type: rwExtensionBytes, + TimeType: rwTimeBytes, + } +} + +// UnmarshalAsJSON takes raw messagepack and writes +// it as JSON to 'w'. If an error is returned, the +// bytes not translated will also be returned. If +// no errors are encountered, the length of the returned +// slice will be zero. +func UnmarshalAsJSON(w io.Writer, msg []byte) ([]byte, error) { + var ( + scratch []byte + cast bool + dst jsWriter + err error + ) + if jsw, ok := w.(jsWriter); ok { + dst = jsw + cast = true + } else { + dst = bufio.NewWriterSize(w, 512) + } + for len(msg) > 0 && err == nil { + msg, scratch, err = writeNext(dst, msg, scratch) + } + if !cast && err == nil { + err = dst.(*bufio.Writer).Flush() + } + return msg, err +} + +func writeNext(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + if len(msg) < 1 { + return msg, scratch, ErrShortBytes + } + t := getType(msg[0]) + if t == InvalidType { + return msg, scratch, InvalidPrefixError(msg[0]) + } + if t == ExtensionType { + et, err := peekExtension(msg) + if err != nil { + return nil, scratch, err + } + if et == TimeExtension { + t = TimeType + } + } + return unfuns[t](w, msg, scratch) +} + +func rwArrayBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + sz, msg, err := ReadArrayHeaderBytes(msg) + if err != nil { + return msg, scratch, err + } + err = w.WriteByte('[') + if err != nil { + return msg, scratch, err + } + for i := uint32(0); i < sz; i++ { + if i != 0 { + err = w.WriteByte(',') + if err != nil { + return msg, scratch, err + } + } + msg, scratch, err = writeNext(w, msg, scratch) + if err != nil { + return msg, scratch, err + } + } + err = w.WriteByte(']') + return msg, scratch, err +} + +func rwMapBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + sz, msg, err := ReadMapHeaderBytes(msg) + if err != nil { + return msg, scratch, err + } + err = w.WriteByte('{') + if err != nil { + return msg, scratch, err + } + for i := uint32(0); i < sz; i++ { + if i != 0 { + err = w.WriteByte(',') + if err != nil { + return msg, scratch, err + } + } + msg, scratch, err = rwMapKeyBytes(w, msg, scratch) + if err != nil { + return msg, scratch, err + } + err = w.WriteByte(':') + if err != nil { + return msg, scratch, err + } + msg, scratch, err = writeNext(w, msg, scratch) + if err != nil { + return msg, scratch, err + } + } + err = w.WriteByte('}') + return msg, scratch, err +} + +func rwMapKeyBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + msg, scratch, err := rwStringBytes(w, msg, scratch) + if err != nil { + if tperr, ok := err.(TypeError); ok && tperr.Encoded == BinType { + return rwBytesBytes(w, msg, scratch) + } + } + return msg, scratch, err +} + +func rwStringBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + str, msg, err := ReadStringZC(msg) + if err != nil { + return msg, scratch, err + } + _, err = rwquoted(w, str) + return msg, scratch, err +} + +func rwBytesBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + bts, msg, err := ReadBytesZC(msg) + if err != nil { + return msg, scratch, err + } + l := base64.StdEncoding.EncodedLen(len(bts)) + if cap(scratch) >= l { + scratch = scratch[0:l] + } else { + scratch = make([]byte, l) + } + base64.StdEncoding.Encode(scratch, bts) + err = w.WriteByte('"') + if err != nil { + return msg, scratch, err + } + _, err = w.Write(scratch) + if err != nil { + return msg, scratch, err + } + err = w.WriteByte('"') + return msg, scratch, err +} + +func rwNullBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + msg, err := ReadNilBytes(msg) + if err != nil { + return msg, scratch, err + } + _, err = w.Write(null) + return msg, scratch, err +} + +func rwBoolBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + b, msg, err := ReadBoolBytes(msg) + if err != nil { + return msg, scratch, err + } + if b { + _, err = w.WriteString("true") + return msg, scratch, err + } + _, err = w.WriteString("false") + return msg, scratch, err +} + +func rwIntBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + i, msg, err := ReadInt64Bytes(msg) + if err != nil { + return msg, scratch, err + } + scratch = strconv.AppendInt(scratch[0:0], i, 10) + _, err = w.Write(scratch) + return msg, scratch, err +} + +func rwUintBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + u, msg, err := ReadUint64Bytes(msg) + if err != nil { + return msg, scratch, err + } + scratch = strconv.AppendUint(scratch[0:0], u, 10) + _, err = w.Write(scratch) + return msg, scratch, err +} + +func rwFloatBytes(w jsWriter, msg []byte, f64 bool, scratch []byte) ([]byte, []byte, error) { + var f float64 + var err error + var sz int + if f64 { + sz = 64 + f, msg, err = ReadFloat64Bytes(msg) + } else { + sz = 32 + var v float32 + v, msg, err = ReadFloat32Bytes(msg) + f = float64(v) + } + if err != nil { + return msg, scratch, err + } + scratch = strconv.AppendFloat(scratch, f, 'f', -1, sz) + _, err = w.Write(scratch) + return msg, scratch, err +} + +func rwFloat32Bytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + var f float32 + var err error + f, msg, err = ReadFloat32Bytes(msg) + if err != nil { + return msg, scratch, err + } + scratch = strconv.AppendFloat(scratch[:0], float64(f), 'f', -1, 32) + _, err = w.Write(scratch) + return msg, scratch, err +} + +func rwFloat64Bytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + var f float64 + var err error + f, msg, err = ReadFloat64Bytes(msg) + if err != nil { + return msg, scratch, err + } + scratch = strconv.AppendFloat(scratch[:0], f, 'f', -1, 64) + _, err = w.Write(scratch) + return msg, scratch, err +} + +func rwTimeBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + var t time.Time + var err error + t, msg, err = ReadTimeBytes(msg) + if err != nil { + return msg, scratch, err + } + bts, err := t.MarshalJSON() + if err != nil { + return msg, scratch, err + } + _, err = w.Write(bts) + return msg, scratch, err +} + +func rwExtensionBytes(w jsWriter, msg []byte, scratch []byte) ([]byte, []byte, error) { + var err error + var et int8 + et, err = peekExtension(msg) + if err != nil { + return msg, scratch, err + } + + // if it's time.Time + if et == TimeExtension { + var tm time.Time + tm, msg, err = ReadTimeBytes(msg) + if err != nil { + return msg, scratch, err + } + bts, err := tm.MarshalJSON() + if err != nil { + return msg, scratch, err + } + _, err = w.Write(bts) + return msg, scratch, err + } + + // if the extension is registered, + // use its canonical JSON form + if f, ok := extensionReg[et]; ok { + e := f() + msg, err = ReadExtensionBytes(msg, e) + if err != nil { + return msg, scratch, err + } + bts, err := json.Marshal(e) + if err != nil { + return msg, scratch, err + } + _, err = w.Write(bts) + return msg, scratch, err + } + + // otherwise, write `{"type": <num>, "data": "<base64data>"}` + r := RawExtension{} + r.Type = et + msg, err = ReadExtensionBytes(msg, &r) + if err != nil { + return msg, scratch, err + } + scratch, err = writeExt(w, r, scratch) + return msg, scratch, err +} + +func writeExt(w jsWriter, r RawExtension, scratch []byte) ([]byte, error) { + _, err := w.WriteString(`{"type":`) + if err != nil { + return scratch, err + } + scratch = strconv.AppendInt(scratch[0:0], int64(r.Type), 10) + _, err = w.Write(scratch) + if err != nil { + return scratch, err + } + _, err = w.WriteString(`,"data":"`) + if err != nil { + return scratch, err + } + l := base64.StdEncoding.EncodedLen(len(r.Data)) + if cap(scratch) >= l { + scratch = scratch[0:l] + } else { + scratch = make([]byte, l) + } + base64.StdEncoding.Encode(scratch, r.Data) + _, err = w.Write(scratch) + if err != nil { + return scratch, err + } + _, err = w.WriteString(`"}`) + return scratch, err +} diff --git a/vendor/github.com/tinylib/msgp/msgp/number.go b/vendor/github.com/tinylib/msgp/msgp/number.go new file mode 100644 index 0000000000..ad07ef9958 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/number.go @@ -0,0 +1,267 @@ +package msgp + +import ( + "math" + "strconv" +) + +// The portable parts of the Number implementation + +// Number can be +// an int64, uint64, float32, +// or float64 internally. +// It can decode itself +// from any of the native +// messagepack number types. +// The zero-value of Number +// is Int(0). Using the equality +// operator with Number compares +// both the type and the value +// of the number. +type Number struct { + // internally, this + // is just a tagged union. + // the raw bits of the number + // are stored the same way regardless. + bits uint64 + typ Type +} + +// AsInt sets the number to an int64. +func (n *Number) AsInt(i int64) { + + // we always store int(0) + // as {0, InvalidType} in + // order to preserve + // the behavior of the == operator + if i == 0 { + n.typ = InvalidType + n.bits = 0 + return + } + + n.typ = IntType + n.bits = uint64(i) +} + +// AsUint sets the number to a uint64. +func (n *Number) AsUint(u uint64) { + n.typ = UintType + n.bits = u +} + +// AsFloat32 sets the value of the number +// to a float32. +func (n *Number) AsFloat32(f float32) { + n.typ = Float32Type + n.bits = uint64(math.Float32bits(f)) +} + +// AsFloat64 sets the value of the +// number to a float64. +func (n *Number) AsFloat64(f float64) { + n.typ = Float64Type + n.bits = math.Float64bits(f) +} + +// Int casts the number as an int64, and +// returns whether or not that was the +// underlying type. +func (n *Number) Int() (int64, bool) { + return int64(n.bits), n.typ == IntType || n.typ == InvalidType +} + +// Uint casts the number as a uint64, and returns +// whether or not that was the underlying type. +func (n *Number) Uint() (uint64, bool) { + return n.bits, n.typ == UintType +} + +// Float casts the number to a float64, and +// returns whether or not that was the underlying +// type (either a float64 or a float32). +func (n *Number) Float() (float64, bool) { + switch n.typ { + case Float32Type: + return float64(math.Float32frombits(uint32(n.bits))), true + case Float64Type: + return math.Float64frombits(n.bits), true + default: + return 0.0, false + } +} + +// Type will return one of: +// Float64Type, Float32Type, UintType, or IntType. +func (n *Number) Type() Type { + if n.typ == InvalidType { + return IntType + } + return n.typ +} + +// DecodeMsg implements msgp.Decodable +func (n *Number) DecodeMsg(r *Reader) error { + typ, err := r.NextType() + if err != nil { + return err + } + switch typ { + case Float32Type: + f, err := r.ReadFloat32() + if err != nil { + return err + } + n.AsFloat32(f) + return nil + case Float64Type: + f, err := r.ReadFloat64() + if err != nil { + return err + } + n.AsFloat64(f) + return nil + case IntType: + i, err := r.ReadInt64() + if err != nil { + return err + } + n.AsInt(i) + return nil + case UintType: + u, err := r.ReadUint64() + if err != nil { + return err + } + n.AsUint(u) + return nil + default: + return TypeError{Encoded: typ, Method: IntType} + } +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (n *Number) UnmarshalMsg(b []byte) ([]byte, error) { + typ := NextType(b) + switch typ { + case IntType: + i, o, err := ReadInt64Bytes(b) + if err != nil { + return b, err + } + n.AsInt(i) + return o, nil + case UintType: + u, o, err := ReadUint64Bytes(b) + if err != nil { + return b, err + } + n.AsUint(u) + return o, nil + case Float64Type: + f, o, err := ReadFloat64Bytes(b) + if err != nil { + return b, err + } + n.AsFloat64(f) + return o, nil + case Float32Type: + f, o, err := ReadFloat32Bytes(b) + if err != nil { + return b, err + } + n.AsFloat32(f) + return o, nil + default: + return b, TypeError{Method: IntType, Encoded: typ} + } +} + +// MarshalMsg implements msgp.Marshaler +func (n *Number) MarshalMsg(b []byte) ([]byte, error) { + switch n.typ { + case IntType: + return AppendInt64(b, int64(n.bits)), nil + case UintType: + return AppendUint64(b, uint64(n.bits)), nil + case Float64Type: + return AppendFloat64(b, math.Float64frombits(n.bits)), nil + case Float32Type: + return AppendFloat32(b, math.Float32frombits(uint32(n.bits))), nil + default: + return AppendInt64(b, 0), nil + } +} + +// EncodeMsg implements msgp.Encodable +func (n *Number) EncodeMsg(w *Writer) error { + switch n.typ { + case IntType: + return w.WriteInt64(int64(n.bits)) + case UintType: + return w.WriteUint64(n.bits) + case Float64Type: + return w.WriteFloat64(math.Float64frombits(n.bits)) + case Float32Type: + return w.WriteFloat32(math.Float32frombits(uint32(n.bits))) + default: + return w.WriteInt64(0) + } +} + +// Msgsize implements msgp.Sizer +func (n *Number) Msgsize() int { + switch n.typ { + case Float32Type: + return Float32Size + case Float64Type: + return Float64Size + case IntType: + return Int64Size + case UintType: + return Uint64Size + default: + return 1 // fixint(0) + } +} + +// MarshalJSON implements json.Marshaler +func (n *Number) MarshalJSON() ([]byte, error) { + t := n.Type() + if t == InvalidType { + return []byte{'0'}, nil + } + out := make([]byte, 0, 32) + switch t { + case Float32Type, Float64Type: + f, _ := n.Float() + return strconv.AppendFloat(out, f, 'f', -1, 64), nil + case IntType: + i, _ := n.Int() + return strconv.AppendInt(out, i, 10), nil + case UintType: + u, _ := n.Uint() + return strconv.AppendUint(out, u, 10), nil + default: + panic("(*Number).typ is invalid") + } +} + +// String implements fmt.Stringer +func (n *Number) String() string { + switch n.typ { + case InvalidType: + return "0" + case Float32Type, Float64Type: + f, _ := n.Float() + return strconv.FormatFloat(f, 'f', -1, 64) + case IntType: + i, _ := n.Int() + return strconv.FormatInt(i, 10) + case UintType: + u, _ := n.Uint() + return strconv.FormatUint(u, 10) + default: + panic("(*Number).typ is invalid") + } +} diff --git a/vendor/github.com/tinylib/msgp/msgp/purego.go b/vendor/github.com/tinylib/msgp/msgp/purego.go new file mode 100644 index 0000000000..c828f7ecad --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/purego.go @@ -0,0 +1,15 @@ +// +build purego appengine + +package msgp + +// let's just assume appengine +// uses 64-bit hardware... +const smallint = false + +func UnsafeString(b []byte) string { + return string(b) +} + +func UnsafeBytes(s string) []byte { + return []byte(s) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/read.go b/vendor/github.com/tinylib/msgp/msgp/read.go new file mode 100644 index 0000000000..aa668c5731 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/read.go @@ -0,0 +1,1358 @@ +package msgp + +import ( + "io" + "math" + "sync" + "time" + + "github.com/philhofer/fwd" +) + +// where we keep old *Readers +var readerPool = sync.Pool{New: func() interface{} { return &Reader{} }} + +// Type is a MessagePack wire type, +// including this package's built-in +// extension types. +type Type byte + +// MessagePack Types +// +// The zero value of Type +// is InvalidType. +const ( + InvalidType Type = iota + + // MessagePack built-in types + + StrType + BinType + MapType + ArrayType + Float64Type + Float32Type + BoolType + IntType + UintType + NilType + ExtensionType + + // pseudo-types provided + // by extensions + + Complex64Type + Complex128Type + TimeType + + _maxtype +) + +// String implements fmt.Stringer +func (t Type) String() string { + switch t { + case StrType: + return "str" + case BinType: + return "bin" + case MapType: + return "map" + case ArrayType: + return "array" + case Float64Type: + return "float64" + case Float32Type: + return "float32" + case BoolType: + return "bool" + case UintType: + return "uint" + case IntType: + return "int" + case ExtensionType: + return "ext" + case NilType: + return "nil" + default: + return "<invalid>" + } +} + +func freeR(m *Reader) { + readerPool.Put(m) +} + +// Unmarshaler is the interface fulfilled +// by objects that know how to unmarshal +// themselves from MessagePack. +// UnmarshalMsg unmarshals the object +// from binary, returing any leftover +// bytes and any errors encountered. +type Unmarshaler interface { + UnmarshalMsg([]byte) ([]byte, error) +} + +// Decodable is the interface fulfilled +// by objects that know how to read +// themselves from a *Reader. +type Decodable interface { + DecodeMsg(*Reader) error +} + +// Decode decodes 'd' from 'r'. +func Decode(r io.Reader, d Decodable) error { + rd := NewReader(r) + err := d.DecodeMsg(rd) + freeR(rd) + return err +} + +// NewReader returns a *Reader that +// reads from the provided reader. The +// reader will be buffered. +func NewReader(r io.Reader) *Reader { + p := readerPool.Get().(*Reader) + if p.R == nil { + p.R = fwd.NewReader(r) + } else { + p.R.Reset(r) + } + return p +} + +// NewReaderSize returns a *Reader with a buffer of the given size. +// (This is vastly preferable to passing the decoder a reader that is already buffered.) +func NewReaderSize(r io.Reader, sz int) *Reader { + return &Reader{R: fwd.NewReaderSize(r, sz)} +} + +// Reader wraps an io.Reader and provides +// methods to read MessagePack-encoded values +// from it. Readers are buffered. +type Reader struct { + // R is the buffered reader + // that the Reader uses + // to decode MessagePack. + // The Reader itself + // is stateless; all the + // buffering is done + // within R. + R *fwd.Reader + scratch []byte +} + +// Read implements `io.Reader` +func (m *Reader) Read(p []byte) (int, error) { + return m.R.Read(p) +} + +// CopyNext reads the next object from m without decoding it and writes it to w. +// It avoids unnecessary copies internally. +func (m *Reader) CopyNext(w io.Writer) (int64, error) { + sz, o, err := getNextSize(m.R) + if err != nil { + return 0, err + } + + var n int64 + // Opportunistic optimization: if we can fit the whole thing in the m.R + // buffer, then just get a pointer to that, and pass it to w.Write, + // avoiding an allocation. + if int(sz) <= m.R.BufferSize() { + var nn int + var buf []byte + buf, err = m.R.Next(int(sz)) + if err != nil { + if err == io.ErrUnexpectedEOF { + err = ErrShortBytes + } + return 0, err + } + nn, err = w.Write(buf) + n += int64(nn) + } else { + // Fall back to io.CopyN. + // May avoid allocating if w is a ReaderFrom (e.g. bytes.Buffer) + n, err = io.CopyN(w, m.R, int64(sz)) + if err == io.ErrUnexpectedEOF { + err = ErrShortBytes + } + } + if err != nil { + return n, err + } else if n < int64(sz) { + return n, io.ErrShortWrite + } + + // for maps and slices, read elements + for x := uintptr(0); x < o; x++ { + var n2 int64 + n2, err = m.CopyNext(w) + if err != nil { + return n, err + } + n += n2 + } + return n, nil +} + +// ReadFull implements `io.ReadFull` +func (m *Reader) ReadFull(p []byte) (int, error) { + return m.R.ReadFull(p) +} + +// Reset resets the underlying reader. +func (m *Reader) Reset(r io.Reader) { m.R.Reset(r) } + +// Buffered returns the number of bytes currently in the read buffer. +func (m *Reader) Buffered() int { return m.R.Buffered() } + +// BufferSize returns the capacity of the read buffer. +func (m *Reader) BufferSize() int { return m.R.BufferSize() } + +// NextType returns the next object type to be decoded. +func (m *Reader) NextType() (Type, error) { + p, err := m.R.Peek(1) + if err != nil { + return InvalidType, err + } + t := getType(p[0]) + if t == InvalidType { + return t, InvalidPrefixError(p[0]) + } + if t == ExtensionType { + v, err := m.peekExtensionType() + if err != nil { + return InvalidType, err + } + switch v { + case Complex64Extension: + return Complex64Type, nil + case Complex128Extension: + return Complex128Type, nil + case TimeExtension: + return TimeType, nil + } + } + return t, nil +} + +// IsNil returns whether or not +// the next byte is a null messagepack byte +func (m *Reader) IsNil() bool { + p, err := m.R.Peek(1) + return err == nil && p[0] == mnil +} + +// getNextSize returns the size of the next object on the wire. +// returns (obj size, obj elements, error) +// only maps and arrays have non-zero obj elements +// for maps and arrays, obj size does not include elements +// +// use uintptr b/c it's guaranteed to be large enough +// to hold whatever we can fit in memory. +func getNextSize(r *fwd.Reader) (uintptr, uintptr, error) { + b, err := r.Peek(1) + if err != nil { + return 0, 0, err + } + lead := b[0] + spec := &sizes[lead] + size, mode := spec.size, spec.extra + if size == 0 { + return 0, 0, InvalidPrefixError(lead) + } + if mode >= 0 { + return uintptr(size), uintptr(mode), nil + } + b, err = r.Peek(int(size)) + if err != nil { + return 0, 0, err + } + switch mode { + case extra8: + return uintptr(size) + uintptr(b[1]), 0, nil + case extra16: + return uintptr(size) + uintptr(big.Uint16(b[1:])), 0, nil + case extra32: + return uintptr(size) + uintptr(big.Uint32(b[1:])), 0, nil + case map16v: + return uintptr(size), 2 * uintptr(big.Uint16(b[1:])), nil + case map32v: + return uintptr(size), 2 * uintptr(big.Uint32(b[1:])), nil + case array16v: + return uintptr(size), uintptr(big.Uint16(b[1:])), nil + case array32v: + return uintptr(size), uintptr(big.Uint32(b[1:])), nil + default: + return 0, 0, fatal + } +} + +// Skip skips over the next object, regardless of +// its type. If it is an array or map, the whole array +// or map will be skipped. +func (m *Reader) Skip() error { + var ( + v uintptr // bytes + o uintptr // objects + err error + p []byte + ) + + // we can use the faster + // method if we have enough + // buffered data + if m.R.Buffered() >= 5 { + p, err = m.R.Peek(5) + if err != nil { + return err + } + v, o, err = getSize(p) + if err != nil { + return err + } + } else { + v, o, err = getNextSize(m.R) + if err != nil { + return err + } + } + + // 'v' is always non-zero + // if err == nil + _, err = m.R.Skip(int(v)) + if err != nil { + return err + } + + // for maps and slices, skip elements + for x := uintptr(0); x < o; x++ { + err = m.Skip() + if err != nil { + return err + } + } + return nil +} + +// ReadMapHeader reads the next object +// as a map header and returns the size +// of the map and the number of bytes written. +// It will return a TypeError{} if the next +// object is not a map. +func (m *Reader) ReadMapHeader() (sz uint32, err error) { + var p []byte + var lead byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + if isfixmap(lead) { + sz = uint32(rfixmap(lead)) + _, err = m.R.Skip(1) + return + } + switch lead { + case mmap16: + p, err = m.R.Next(3) + if err != nil { + return + } + sz = uint32(big.Uint16(p[1:])) + return + case mmap32: + p, err = m.R.Next(5) + if err != nil { + return + } + sz = big.Uint32(p[1:]) + return + default: + err = badPrefix(MapType, lead) + return + } +} + +// ReadMapKey reads either a 'str' or 'bin' field from +// the reader and returns the value as a []byte. It uses +// scratch for storage if it is large enough. +func (m *Reader) ReadMapKey(scratch []byte) ([]byte, error) { + out, err := m.ReadStringAsBytes(scratch) + if err != nil { + if tperr, ok := err.(TypeError); ok && tperr.Encoded == BinType { + return m.ReadBytes(scratch) + } + return nil, err + } + return out, nil +} + +// MapKeyPtr returns a []byte pointing to the contents +// of a valid map key. The key cannot be empty, and it +// must be shorter than the total buffer size of the +// *Reader. Additionally, the returned slice is only +// valid until the next *Reader method call. Users +// should exercise extreme care when using this +// method; writing into the returned slice may +// corrupt future reads. +func (m *Reader) ReadMapKeyPtr() ([]byte, error) { + p, err := m.R.Peek(1) + if err != nil { + return nil, err + } + lead := p[0] + var read int + if isfixstr(lead) { + read = int(rfixstr(lead)) + m.R.Skip(1) + goto fill + } + switch lead { + case mstr8, mbin8: + p, err = m.R.Next(2) + if err != nil { + return nil, err + } + read = int(p[1]) + case mstr16, mbin16: + p, err = m.R.Next(3) + if err != nil { + return nil, err + } + read = int(big.Uint16(p[1:])) + case mstr32, mbin32: + p, err = m.R.Next(5) + if err != nil { + return nil, err + } + read = int(big.Uint32(p[1:])) + default: + return nil, badPrefix(StrType, lead) + } +fill: + if read == 0 { + return nil, ErrShortBytes + } + return m.R.Next(read) +} + +// ReadArrayHeader reads the next object as an +// array header and returns the size of the array +// and the number of bytes read. +func (m *Reader) ReadArrayHeader() (sz uint32, err error) { + var lead byte + var p []byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + if isfixarray(lead) { + sz = uint32(rfixarray(lead)) + _, err = m.R.Skip(1) + return + } + switch lead { + case marray16: + p, err = m.R.Next(3) + if err != nil { + return + } + sz = uint32(big.Uint16(p[1:])) + return + + case marray32: + p, err = m.R.Next(5) + if err != nil { + return + } + sz = big.Uint32(p[1:]) + return + + default: + err = badPrefix(ArrayType, lead) + return + } +} + +// ReadNil reads a 'nil' MessagePack byte from the reader +func (m *Reader) ReadNil() error { + p, err := m.R.Peek(1) + if err != nil { + return err + } + if p[0] != mnil { + return badPrefix(NilType, p[0]) + } + _, err = m.R.Skip(1) + return err +} + +// ReadFloat64 reads a float64 from the reader. +// (If the value on the wire is encoded as a float32, +// it will be up-cast to a float64.) +func (m *Reader) ReadFloat64() (f float64, err error) { + var p []byte + p, err = m.R.Peek(9) + if err != nil { + // we'll allow a coversion from float32 to float64, + // since we don't lose any precision + if err == io.EOF && len(p) > 0 && p[0] == mfloat32 { + ef, err := m.ReadFloat32() + return float64(ef), err + } + return + } + if p[0] != mfloat64 { + // see above + if p[0] == mfloat32 { + ef, err := m.ReadFloat32() + return float64(ef), err + } + err = badPrefix(Float64Type, p[0]) + return + } + f = math.Float64frombits(getMuint64(p)) + _, err = m.R.Skip(9) + return +} + +// ReadFloat32 reads a float32 from the reader +func (m *Reader) ReadFloat32() (f float32, err error) { + var p []byte + p, err = m.R.Peek(5) + if err != nil { + return + } + if p[0] != mfloat32 { + err = badPrefix(Float32Type, p[0]) + return + } + f = math.Float32frombits(getMuint32(p)) + _, err = m.R.Skip(5) + return +} + +// ReadBool reads a bool from the reader +func (m *Reader) ReadBool() (b bool, err error) { + var p []byte + p, err = m.R.Peek(1) + if err != nil { + return + } + switch p[0] { + case mtrue: + b = true + case mfalse: + default: + err = badPrefix(BoolType, p[0]) + return + } + _, err = m.R.Skip(1) + return +} + +// ReadInt64 reads an int64 from the reader +func (m *Reader) ReadInt64() (i int64, err error) { + var p []byte + var lead byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + + if isfixint(lead) { + i = int64(rfixint(lead)) + _, err = m.R.Skip(1) + return + } else if isnfixint(lead) { + i = int64(rnfixint(lead)) + _, err = m.R.Skip(1) + return + } + + switch lead { + case mint8: + p, err = m.R.Next(2) + if err != nil { + return + } + i = int64(getMint8(p)) + return + + case muint8: + p, err = m.R.Next(2) + if err != nil { + return + } + i = int64(getMuint8(p)) + return + + case mint16: + p, err = m.R.Next(3) + if err != nil { + return + } + i = int64(getMint16(p)) + return + + case muint16: + p, err = m.R.Next(3) + if err != nil { + return + } + i = int64(getMuint16(p)) + return + + case mint32: + p, err = m.R.Next(5) + if err != nil { + return + } + i = int64(getMint32(p)) + return + + case muint32: + p, err = m.R.Next(5) + if err != nil { + return + } + i = int64(getMuint32(p)) + return + + case mint64: + p, err = m.R.Next(9) + if err != nil { + return + } + i = getMint64(p) + return + + case muint64: + p, err = m.R.Next(9) + if err != nil { + return + } + u := getMuint64(p) + if u > math.MaxInt64 { + err = UintOverflow{Value: u, FailedBitsize: 64} + return + } + i = int64(u) + return + + default: + err = badPrefix(IntType, lead) + return + } +} + +// ReadInt32 reads an int32 from the reader +func (m *Reader) ReadInt32() (i int32, err error) { + var in int64 + in, err = m.ReadInt64() + if in > math.MaxInt32 || in < math.MinInt32 { + err = IntOverflow{Value: in, FailedBitsize: 32} + return + } + i = int32(in) + return +} + +// ReadInt16 reads an int16 from the reader +func (m *Reader) ReadInt16() (i int16, err error) { + var in int64 + in, err = m.ReadInt64() + if in > math.MaxInt16 || in < math.MinInt16 { + err = IntOverflow{Value: in, FailedBitsize: 16} + return + } + i = int16(in) + return +} + +// ReadInt8 reads an int8 from the reader +func (m *Reader) ReadInt8() (i int8, err error) { + var in int64 + in, err = m.ReadInt64() + if in > math.MaxInt8 || in < math.MinInt8 { + err = IntOverflow{Value: in, FailedBitsize: 8} + return + } + i = int8(in) + return +} + +// ReadInt reads an int from the reader +func (m *Reader) ReadInt() (i int, err error) { + if smallint { + var in int32 + in, err = m.ReadInt32() + i = int(in) + return + } + var in int64 + in, err = m.ReadInt64() + i = int(in) + return +} + +// ReadUint64 reads a uint64 from the reader +func (m *Reader) ReadUint64() (u uint64, err error) { + var p []byte + var lead byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + if isfixint(lead) { + u = uint64(rfixint(lead)) + _, err = m.R.Skip(1) + return + } + switch lead { + case mint8: + p, err = m.R.Next(2) + if err != nil { + return + } + v := int64(getMint8(p)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + return + + case muint8: + p, err = m.R.Next(2) + if err != nil { + return + } + u = uint64(getMuint8(p)) + return + + case mint16: + p, err = m.R.Next(3) + if err != nil { + return + } + v := int64(getMint16(p)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + return + + case muint16: + p, err = m.R.Next(3) + if err != nil { + return + } + u = uint64(getMuint16(p)) + return + + case mint32: + p, err = m.R.Next(5) + if err != nil { + return + } + v := int64(getMint32(p)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + return + + case muint32: + p, err = m.R.Next(5) + if err != nil { + return + } + u = uint64(getMuint32(p)) + return + + case mint64: + p, err = m.R.Next(9) + if err != nil { + return + } + v := int64(getMint64(p)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + return + + case muint64: + p, err = m.R.Next(9) + if err != nil { + return + } + u = getMuint64(p) + return + + default: + if isnfixint(lead) { + err = UintBelowZero{Value: int64(rnfixint(lead))} + } else { + err = badPrefix(UintType, lead) + } + return + + } +} + +// ReadUint32 reads a uint32 from the reader +func (m *Reader) ReadUint32() (u uint32, err error) { + var in uint64 + in, err = m.ReadUint64() + if in > math.MaxUint32 { + err = UintOverflow{Value: in, FailedBitsize: 32} + return + } + u = uint32(in) + return +} + +// ReadUint16 reads a uint16 from the reader +func (m *Reader) ReadUint16() (u uint16, err error) { + var in uint64 + in, err = m.ReadUint64() + if in > math.MaxUint16 { + err = UintOverflow{Value: in, FailedBitsize: 16} + return + } + u = uint16(in) + return +} + +// ReadUint8 reads a uint8 from the reader +func (m *Reader) ReadUint8() (u uint8, err error) { + var in uint64 + in, err = m.ReadUint64() + if in > math.MaxUint8 { + err = UintOverflow{Value: in, FailedBitsize: 8} + return + } + u = uint8(in) + return +} + +// ReadUint reads a uint from the reader +func (m *Reader) ReadUint() (u uint, err error) { + if smallint { + var un uint32 + un, err = m.ReadUint32() + u = uint(un) + return + } + var un uint64 + un, err = m.ReadUint64() + u = uint(un) + return +} + +// ReadByte is analogous to ReadUint8. +// +// NOTE: this is *not* an implementation +// of io.ByteReader. +func (m *Reader) ReadByte() (b byte, err error) { + var in uint64 + in, err = m.ReadUint64() + if in > math.MaxUint8 { + err = UintOverflow{Value: in, FailedBitsize: 8} + return + } + b = byte(in) + return +} + +// ReadBytes reads a MessagePack 'bin' object +// from the reader and returns its value. It may +// use 'scratch' for storage if it is non-nil. +func (m *Reader) ReadBytes(scratch []byte) (b []byte, err error) { + var p []byte + var lead byte + p, err = m.R.Peek(2) + if err != nil { + return + } + lead = p[0] + var read int64 + switch lead { + case mbin8: + read = int64(p[1]) + m.R.Skip(2) + case mbin16: + p, err = m.R.Next(3) + if err != nil { + return + } + read = int64(big.Uint16(p[1:])) + case mbin32: + p, err = m.R.Next(5) + if err != nil { + return + } + read = int64(big.Uint32(p[1:])) + default: + err = badPrefix(BinType, lead) + return + } + if int64(cap(scratch)) < read { + b = make([]byte, read) + } else { + b = scratch[0:read] + } + _, err = m.R.ReadFull(b) + return +} + +// ReadBytesHeader reads the size header +// of a MessagePack 'bin' object. The user +// is responsible for dealing with the next +// 'sz' bytes from the reader in an application-specific +// way. +func (m *Reader) ReadBytesHeader() (sz uint32, err error) { + var p []byte + p, err = m.R.Peek(1) + if err != nil { + return + } + switch p[0] { + case mbin8: + p, err = m.R.Next(2) + if err != nil { + return + } + sz = uint32(p[1]) + return + case mbin16: + p, err = m.R.Next(3) + if err != nil { + return + } + sz = uint32(big.Uint16(p[1:])) + return + case mbin32: + p, err = m.R.Next(5) + if err != nil { + return + } + sz = uint32(big.Uint32(p[1:])) + return + default: + err = badPrefix(BinType, p[0]) + return + } +} + +// ReadExactBytes reads a MessagePack 'bin'-encoded +// object off of the wire into the provided slice. An +// ArrayError will be returned if the object is not +// exactly the length of the input slice. +func (m *Reader) ReadExactBytes(into []byte) error { + p, err := m.R.Peek(2) + if err != nil { + return err + } + lead := p[0] + var read int64 // bytes to read + var skip int // prefix size to skip + switch lead { + case mbin8: + read = int64(p[1]) + skip = 2 + case mbin16: + p, err = m.R.Peek(3) + if err != nil { + return err + } + read = int64(big.Uint16(p[1:])) + skip = 3 + case mbin32: + p, err = m.R.Peek(5) + if err != nil { + return err + } + read = int64(big.Uint32(p[1:])) + skip = 5 + default: + return badPrefix(BinType, lead) + } + if read != int64(len(into)) { + return ArrayError{Wanted: uint32(len(into)), Got: uint32(read)} + } + m.R.Skip(skip) + _, err = m.R.ReadFull(into) + return err +} + +// ReadStringAsBytes reads a MessagePack 'str' (utf-8) string +// and returns its value as bytes. It may use 'scratch' for storage +// if it is non-nil. +func (m *Reader) ReadStringAsBytes(scratch []byte) (b []byte, err error) { + var p []byte + var lead byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + var read int64 + + if isfixstr(lead) { + read = int64(rfixstr(lead)) + m.R.Skip(1) + goto fill + } + + switch lead { + case mstr8: + p, err = m.R.Next(2) + if err != nil { + return + } + read = int64(uint8(p[1])) + case mstr16: + p, err = m.R.Next(3) + if err != nil { + return + } + read = int64(big.Uint16(p[1:])) + case mstr32: + p, err = m.R.Next(5) + if err != nil { + return + } + read = int64(big.Uint32(p[1:])) + default: + err = badPrefix(StrType, lead) + return + } +fill: + if int64(cap(scratch)) < read { + b = make([]byte, read) + } else { + b = scratch[0:read] + } + _, err = m.R.ReadFull(b) + return +} + +// ReadStringHeader reads a string header +// off of the wire. The user is then responsible +// for dealing with the next 'sz' bytes from +// the reader in an application-specific manner. +func (m *Reader) ReadStringHeader() (sz uint32, err error) { + var p []byte + p, err = m.R.Peek(1) + if err != nil { + return + } + lead := p[0] + if isfixstr(lead) { + sz = uint32(rfixstr(lead)) + m.R.Skip(1) + return + } + switch lead { + case mstr8: + p, err = m.R.Next(2) + if err != nil { + return + } + sz = uint32(p[1]) + return + case mstr16: + p, err = m.R.Next(3) + if err != nil { + return + } + sz = uint32(big.Uint16(p[1:])) + return + case mstr32: + p, err = m.R.Next(5) + if err != nil { + return + } + sz = big.Uint32(p[1:]) + return + default: + err = badPrefix(StrType, lead) + return + } +} + +// ReadString reads a utf-8 string from the reader +func (m *Reader) ReadString() (s string, err error) { + var p []byte + var lead byte + var read int64 + p, err = m.R.Peek(1) + if err != nil { + return + } + lead = p[0] + + if isfixstr(lead) { + read = int64(rfixstr(lead)) + m.R.Skip(1) + goto fill + } + + switch lead { + case mstr8: + p, err = m.R.Next(2) + if err != nil { + return + } + read = int64(uint8(p[1])) + case mstr16: + p, err = m.R.Next(3) + if err != nil { + return + } + read = int64(big.Uint16(p[1:])) + case mstr32: + p, err = m.R.Next(5) + if err != nil { + return + } + read = int64(big.Uint32(p[1:])) + default: + err = badPrefix(StrType, lead) + return + } +fill: + if read == 0 { + s, err = "", nil + return + } + // reading into the memory + // that will become the string + // itself has vastly superior + // worst-case performance, because + // the reader buffer doesn't have + // to be large enough to hold the string. + // the idea here is to make it more + // difficult for someone malicious + // to cause the system to run out of + // memory by sending very large strings. + // + // NOTE: this works because the argument + // passed to (*fwd.Reader).ReadFull escapes + // to the heap; its argument may, in turn, + // be passed to the underlying reader, and + // thus escape analysis *must* conclude that + // 'out' escapes. + out := make([]byte, read) + _, err = m.R.ReadFull(out) + if err != nil { + return + } + s = UnsafeString(out) + return +} + +// ReadComplex64 reads a complex64 from the reader +func (m *Reader) ReadComplex64() (f complex64, err error) { + var p []byte + p, err = m.R.Peek(10) + if err != nil { + return + } + if p[0] != mfixext8 { + err = badPrefix(Complex64Type, p[0]) + return + } + if int8(p[1]) != Complex64Extension { + err = errExt(int8(p[1]), Complex64Extension) + return + } + f = complex(math.Float32frombits(big.Uint32(p[2:])), + math.Float32frombits(big.Uint32(p[6:]))) + _, err = m.R.Skip(10) + return +} + +// ReadComplex128 reads a complex128 from the reader +func (m *Reader) ReadComplex128() (f complex128, err error) { + var p []byte + p, err = m.R.Peek(18) + if err != nil { + return + } + if p[0] != mfixext16 { + err = badPrefix(Complex128Type, p[0]) + return + } + if int8(p[1]) != Complex128Extension { + err = errExt(int8(p[1]), Complex128Extension) + return + } + f = complex(math.Float64frombits(big.Uint64(p[2:])), + math.Float64frombits(big.Uint64(p[10:]))) + _, err = m.R.Skip(18) + return +} + +// ReadMapStrIntf reads a MessagePack map into a map[string]interface{}. +// (You must pass a non-nil map into the function.) +func (m *Reader) ReadMapStrIntf(mp map[string]interface{}) (err error) { + var sz uint32 + sz, err = m.ReadMapHeader() + if err != nil { + return + } + for key := range mp { + delete(mp, key) + } + for i := uint32(0); i < sz; i++ { + var key string + var val interface{} + key, err = m.ReadString() + if err != nil { + return + } + val, err = m.ReadIntf() + if err != nil { + return + } + mp[key] = val + } + return +} + +// ReadTime reads a time.Time object from the reader. +// The returned time's location will be set to time.Local. +func (m *Reader) ReadTime() (t time.Time, err error) { + var p []byte + p, err = m.R.Peek(15) + if err != nil { + return + } + if p[0] != mext8 || p[1] != 12 { + err = badPrefix(TimeType, p[0]) + return + } + if int8(p[2]) != TimeExtension { + err = errExt(int8(p[2]), TimeExtension) + return + } + sec, nsec := getUnix(p[3:]) + t = time.Unix(sec, int64(nsec)).Local() + _, err = m.R.Skip(15) + return +} + +// ReadIntf reads out the next object as a raw interface{}. +// Arrays are decoded as []interface{}, and maps are decoded +// as map[string]interface{}. Integers are decoded as int64 +// and unsigned integers are decoded as uint64. +func (m *Reader) ReadIntf() (i interface{}, err error) { + var t Type + t, err = m.NextType() + if err != nil { + return + } + switch t { + case BoolType: + i, err = m.ReadBool() + return + + case IntType: + i, err = m.ReadInt64() + return + + case UintType: + i, err = m.ReadUint64() + return + + case BinType: + i, err = m.ReadBytes(nil) + return + + case StrType: + i, err = m.ReadString() + return + + case Complex64Type: + i, err = m.ReadComplex64() + return + + case Complex128Type: + i, err = m.ReadComplex128() + return + + case TimeType: + i, err = m.ReadTime() + return + + case ExtensionType: + var t int8 + t, err = m.peekExtensionType() + if err != nil { + return + } + f, ok := extensionReg[t] + if ok { + e := f() + err = m.ReadExtension(e) + i = e + return + } + var e RawExtension + e.Type = t + err = m.ReadExtension(&e) + i = &e + return + + case MapType: + mp := make(map[string]interface{}) + err = m.ReadMapStrIntf(mp) + i = mp + return + + case NilType: + err = m.ReadNil() + i = nil + return + + case Float32Type: + i, err = m.ReadFloat32() + return + + case Float64Type: + i, err = m.ReadFloat64() + return + + case ArrayType: + var sz uint32 + sz, err = m.ReadArrayHeader() + + if err != nil { + return + } + out := make([]interface{}, int(sz)) + for j := range out { + out[j], err = m.ReadIntf() + if err != nil { + return + } + } + i = out + return + + default: + return nil, fatal // unreachable + } +} diff --git a/vendor/github.com/tinylib/msgp/msgp/read_bytes.go b/vendor/github.com/tinylib/msgp/msgp/read_bytes.go new file mode 100644 index 0000000000..f53f84d013 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/read_bytes.go @@ -0,0 +1,1197 @@ +package msgp + +import ( + "bytes" + "encoding/binary" + "math" + "time" +) + +var big = binary.BigEndian + +// NextType returns the type of the next +// object in the slice. If the length +// of the input is zero, it returns +// InvalidType. +func NextType(b []byte) Type { + if len(b) == 0 { + return InvalidType + } + spec := sizes[b[0]] + t := spec.typ + if t == ExtensionType && len(b) > int(spec.size) { + var tp int8 + if spec.extra == constsize { + tp = int8(b[1]) + } else { + tp = int8(b[spec.size-1]) + } + switch tp { + case TimeExtension: + return TimeType + case Complex128Extension: + return Complex128Type + case Complex64Extension: + return Complex64Type + default: + return ExtensionType + } + } + return t +} + +// IsNil returns true if len(b)>0 and +// the leading byte is a 'nil' MessagePack +// byte; false otherwise +func IsNil(b []byte) bool { + if len(b) != 0 && b[0] == mnil { + return true + } + return false +} + +// Raw is raw MessagePack. +// Raw allows you to read and write +// data without interpreting its contents. +type Raw []byte + +// MarshalMsg implements msgp.Marshaler. +// It appends the raw contents of 'raw' +// to the provided byte slice. If 'raw' +// is 0 bytes, 'nil' will be appended instead. +func (r Raw) MarshalMsg(b []byte) ([]byte, error) { + i := len(r) + if i == 0 { + return AppendNil(b), nil + } + o, l := ensure(b, i) + copy(o[l:], []byte(r)) + return o, nil +} + +// UnmarshalMsg implements msgp.Unmarshaler. +// It sets the contents of *Raw to be the next +// object in the provided byte slice. +func (r *Raw) UnmarshalMsg(b []byte) ([]byte, error) { + l := len(b) + out, err := Skip(b) + if err != nil { + return b, err + } + rlen := l - len(out) + if IsNil(b[:rlen]) { + rlen = 0 + } + if cap(*r) < rlen { + *r = make(Raw, rlen) + } else { + *r = (*r)[0:rlen] + } + copy(*r, b[:rlen]) + return out, nil +} + +// EncodeMsg implements msgp.Encodable. +// It writes the raw bytes to the writer. +// If r is empty, it writes 'nil' instead. +func (r Raw) EncodeMsg(w *Writer) error { + if len(r) == 0 { + return w.WriteNil() + } + _, err := w.Write([]byte(r)) + return err +} + +// DecodeMsg implements msgp.Decodable. +// It sets the value of *Raw to be the +// next object on the wire. +func (r *Raw) DecodeMsg(f *Reader) error { + *r = (*r)[:0] + err := appendNext(f, (*[]byte)(r)) + if IsNil(*r) { + *r = (*r)[:0] + } + return err +} + +// Msgsize implements msgp.Sizer +func (r Raw) Msgsize() int { + l := len(r) + if l == 0 { + return 1 // for 'nil' + } + return l +} + +func appendNext(f *Reader, d *[]byte) error { + amt, o, err := getNextSize(f.R) + if err != nil { + return err + } + var i int + *d, i = ensure(*d, int(amt)) + _, err = f.R.ReadFull((*d)[i:]) + if err != nil { + return err + } + for o > 0 { + err = appendNext(f, d) + if err != nil { + return err + } + o-- + } + return nil +} + +// MarshalJSON implements json.Marshaler +func (r *Raw) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + _, err := UnmarshalAsJSON(&buf, []byte(*r)) + return buf.Bytes(), err +} + +// ReadMapHeaderBytes reads a map header size +// from 'b' and returns the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a map) +func ReadMapHeaderBytes(b []byte) (sz uint32, o []byte, err error) { + l := len(b) + if l < 1 { + err = ErrShortBytes + return + } + + lead := b[0] + if isfixmap(lead) { + sz = uint32(rfixmap(lead)) + o = b[1:] + return + } + + switch lead { + case mmap16: + if l < 3 { + err = ErrShortBytes + return + } + sz = uint32(big.Uint16(b[1:])) + o = b[3:] + return + + case mmap32: + if l < 5 { + err = ErrShortBytes + return + } + sz = big.Uint32(b[1:]) + o = b[5:] + return + + default: + err = badPrefix(MapType, lead) + return + } +} + +// ReadMapKeyZC attempts to read a map key +// from 'b' and returns the key bytes and the remaining bytes +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a str or bin) +func ReadMapKeyZC(b []byte) ([]byte, []byte, error) { + o, b, err := ReadStringZC(b) + if err != nil { + if tperr, ok := err.(TypeError); ok && tperr.Encoded == BinType { + return ReadBytesZC(b) + } + return nil, b, err + } + return o, b, nil +} + +// ReadArrayHeaderBytes attempts to read +// the array header size off of 'b' and return +// the size and remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not an array) +func ReadArrayHeaderBytes(b []byte) (sz uint32, o []byte, err error) { + if len(b) < 1 { + return 0, nil, ErrShortBytes + } + lead := b[0] + if isfixarray(lead) { + sz = uint32(rfixarray(lead)) + o = b[1:] + return + } + + switch lead { + case marray16: + if len(b) < 3 { + err = ErrShortBytes + return + } + sz = uint32(big.Uint16(b[1:])) + o = b[3:] + return + + case marray32: + if len(b) < 5 { + err = ErrShortBytes + return + } + sz = big.Uint32(b[1:]) + o = b[5:] + return + + default: + err = badPrefix(ArrayType, lead) + return + } +} + +// ReadNilBytes tries to read a "nil" byte +// off of 'b' and return the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a 'nil') +// - InvalidPrefixError +func ReadNilBytes(b []byte) ([]byte, error) { + if len(b) < 1 { + return nil, ErrShortBytes + } + if b[0] != mnil { + return b, badPrefix(NilType, b[0]) + } + return b[1:], nil +} + +// ReadFloat64Bytes tries to read a float64 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a float64) +func ReadFloat64Bytes(b []byte) (f float64, o []byte, err error) { + if len(b) < 9 { + if len(b) >= 5 && b[0] == mfloat32 { + var tf float32 + tf, o, err = ReadFloat32Bytes(b) + f = float64(tf) + return + } + err = ErrShortBytes + return + } + + if b[0] != mfloat64 { + if b[0] == mfloat32 { + var tf float32 + tf, o, err = ReadFloat32Bytes(b) + f = float64(tf) + return + } + err = badPrefix(Float64Type, b[0]) + return + } + + f = math.Float64frombits(getMuint64(b)) + o = b[9:] + return +} + +// ReadFloat32Bytes tries to read a float64 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a float32) +func ReadFloat32Bytes(b []byte) (f float32, o []byte, err error) { + if len(b) < 5 { + err = ErrShortBytes + return + } + + if b[0] != mfloat32 { + err = TypeError{Method: Float32Type, Encoded: getType(b[0])} + return + } + + f = math.Float32frombits(getMuint32(b)) + o = b[5:] + return +} + +// ReadBoolBytes tries to read a float64 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a bool) +func ReadBoolBytes(b []byte) (bool, []byte, error) { + if len(b) < 1 { + return false, b, ErrShortBytes + } + switch b[0] { + case mtrue: + return true, b[1:], nil + case mfalse: + return false, b[1:], nil + default: + return false, b, badPrefix(BoolType, b[0]) + } +} + +// ReadInt64Bytes tries to read an int64 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError (not a int) +func ReadInt64Bytes(b []byte) (i int64, o []byte, err error) { + l := len(b) + if l < 1 { + return 0, nil, ErrShortBytes + } + + lead := b[0] + if isfixint(lead) { + i = int64(rfixint(lead)) + o = b[1:] + return + } + if isnfixint(lead) { + i = int64(rnfixint(lead)) + o = b[1:] + return + } + + switch lead { + case mint8: + if l < 2 { + err = ErrShortBytes + return + } + i = int64(getMint8(b)) + o = b[2:] + return + + case muint8: + if l < 2 { + err = ErrShortBytes + return + } + i = int64(getMuint8(b)) + o = b[2:] + return + + case mint16: + if l < 3 { + err = ErrShortBytes + return + } + i = int64(getMint16(b)) + o = b[3:] + return + + case muint16: + if l < 3 { + err = ErrShortBytes + return + } + i = int64(getMuint16(b)) + o = b[3:] + return + + case mint32: + if l < 5 { + err = ErrShortBytes + return + } + i = int64(getMint32(b)) + o = b[5:] + return + + case muint32: + if l < 5 { + err = ErrShortBytes + return + } + i = int64(getMuint32(b)) + o = b[5:] + return + + case mint64: + if l < 9 { + err = ErrShortBytes + return + } + i = int64(getMint64(b)) + o = b[9:] + return + + case muint64: + if l < 9 { + err = ErrShortBytes + return + } + u := getMuint64(b) + if u > math.MaxInt64 { + err = UintOverflow{Value: u, FailedBitsize: 64} + return + } + i = int64(u) + o = b[9:] + return + + default: + err = badPrefix(IntType, lead) + return + } +} + +// ReadInt32Bytes tries to read an int32 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a int) +// - IntOverflow{} (value doesn't fit in int32) +func ReadInt32Bytes(b []byte) (int32, []byte, error) { + i, o, err := ReadInt64Bytes(b) + if i > math.MaxInt32 || i < math.MinInt32 { + return 0, o, IntOverflow{Value: i, FailedBitsize: 32} + } + return int32(i), o, err +} + +// ReadInt16Bytes tries to read an int16 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a int) +// - IntOverflow{} (value doesn't fit in int16) +func ReadInt16Bytes(b []byte) (int16, []byte, error) { + i, o, err := ReadInt64Bytes(b) + if i > math.MaxInt16 || i < math.MinInt16 { + return 0, o, IntOverflow{Value: i, FailedBitsize: 16} + } + return int16(i), o, err +} + +// ReadInt8Bytes tries to read an int16 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a int) +// - IntOverflow{} (value doesn't fit in int8) +func ReadInt8Bytes(b []byte) (int8, []byte, error) { + i, o, err := ReadInt64Bytes(b) + if i > math.MaxInt8 || i < math.MinInt8 { + return 0, o, IntOverflow{Value: i, FailedBitsize: 8} + } + return int8(i), o, err +} + +// ReadIntBytes tries to read an int +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a int) +// - IntOverflow{} (value doesn't fit in int; 32-bit platforms only) +func ReadIntBytes(b []byte) (int, []byte, error) { + if smallint { + i, b, err := ReadInt32Bytes(b) + return int(i), b, err + } + i, b, err := ReadInt64Bytes(b) + return int(i), b, err +} + +// ReadUint64Bytes tries to read a uint64 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a uint) +func ReadUint64Bytes(b []byte) (u uint64, o []byte, err error) { + l := len(b) + if l < 1 { + return 0, nil, ErrShortBytes + } + + lead := b[0] + if isfixint(lead) { + u = uint64(rfixint(lead)) + o = b[1:] + return + } + + switch lead { + case mint8: + if l < 2 { + err = ErrShortBytes + return + } + v := int64(getMint8(b)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + o = b[2:] + return + + case muint8: + if l < 2 { + err = ErrShortBytes + return + } + u = uint64(getMuint8(b)) + o = b[2:] + return + + case mint16: + if l < 3 { + err = ErrShortBytes + return + } + v := int64(getMint16(b)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + o = b[3:] + return + + case muint16: + if l < 3 { + err = ErrShortBytes + return + } + u = uint64(getMuint16(b)) + o = b[3:] + return + + case mint32: + if l < 5 { + err = ErrShortBytes + return + } + v := int64(getMint32(b)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + o = b[5:] + return + + case muint32: + if l < 5 { + err = ErrShortBytes + return + } + u = uint64(getMuint32(b)) + o = b[5:] + return + + case mint64: + if l < 9 { + err = ErrShortBytes + return + } + v := int64(getMint64(b)) + if v < 0 { + err = UintBelowZero{Value: v} + return + } + u = uint64(v) + o = b[9:] + return + + case muint64: + if l < 9 { + err = ErrShortBytes + return + } + u = getMuint64(b) + o = b[9:] + return + + default: + if isnfixint(lead) { + err = UintBelowZero{Value: int64(rnfixint(lead))} + } else { + err = badPrefix(UintType, lead) + } + return + } +} + +// ReadUint32Bytes tries to read a uint32 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a uint) +// - UintOverflow{} (value too large for uint32) +func ReadUint32Bytes(b []byte) (uint32, []byte, error) { + v, o, err := ReadUint64Bytes(b) + if v > math.MaxUint32 { + return 0, nil, UintOverflow{Value: v, FailedBitsize: 32} + } + return uint32(v), o, err +} + +// ReadUint16Bytes tries to read a uint16 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a uint) +// - UintOverflow{} (value too large for uint16) +func ReadUint16Bytes(b []byte) (uint16, []byte, error) { + v, o, err := ReadUint64Bytes(b) + if v > math.MaxUint16 { + return 0, nil, UintOverflow{Value: v, FailedBitsize: 16} + } + return uint16(v), o, err +} + +// ReadUint8Bytes tries to read a uint8 +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a uint) +// - UintOverflow{} (value too large for uint8) +func ReadUint8Bytes(b []byte) (uint8, []byte, error) { + v, o, err := ReadUint64Bytes(b) + if v > math.MaxUint8 { + return 0, nil, UintOverflow{Value: v, FailedBitsize: 8} + } + return uint8(v), o, err +} + +// ReadUintBytes tries to read a uint +// from 'b' and return the value and the remaining bytes. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a uint) +// - UintOverflow{} (value too large for uint; 32-bit platforms only) +func ReadUintBytes(b []byte) (uint, []byte, error) { + if smallint { + u, b, err := ReadUint32Bytes(b) + return uint(u), b, err + } + u, b, err := ReadUint64Bytes(b) + return uint(u), b, err +} + +// ReadByteBytes is analogous to ReadUint8Bytes +func ReadByteBytes(b []byte) (byte, []byte, error) { + return ReadUint8Bytes(b) +} + +// ReadBytesBytes reads a 'bin' object +// from 'b' and returns its vaue and +// the remaining bytes in 'b'. +// Possible errors: +// - ErrShortBytes (too few bytes) +// - TypeError{} (not a 'bin' object) +func ReadBytesBytes(b []byte, scratch []byte) (v []byte, o []byte, err error) { + return readBytesBytes(b, scratch, false) +} + +func readBytesBytes(b []byte, scratch []byte, zc bool) (v []byte, o []byte, err error) { + l := len(b) + if l < 1 { + return nil, nil, ErrShortBytes + } + + lead := b[0] + var read int + switch lead { + case mbin8: + if l < 2 { + err = ErrShortBytes + return + } + + read = int(b[1]) + b = b[2:] + + case mbin16: + if l < 3 { + err = ErrShortBytes + return + } + read = int(big.Uint16(b[1:])) + b = b[3:] + + case mbin32: + if l < 5 { + err = ErrShortBytes + return + } + read = int(big.Uint32(b[1:])) + b = b[5:] + + default: + err = badPrefix(BinType, lead) + return + } + + if len(b) < read { + err = ErrShortBytes + return + } + + // zero-copy + if zc { + v = b[0:read] + o = b[read:] + return + } + + if cap(scratch) >= read { + v = scratch[0:read] + } else { + v = make([]byte, read) + } + + o = b[copy(v, b):] + return +} + +// ReadBytesZC extracts the messagepack-encoded +// binary field without copying. The returned []byte +// points to the same memory as the input slice. +// Possible errors: +// - ErrShortBytes (b not long enough) +// - TypeError{} (object not 'bin') +func ReadBytesZC(b []byte) (v []byte, o []byte, err error) { + return readBytesBytes(b, nil, true) +} + +func ReadExactBytes(b []byte, into []byte) (o []byte, err error) { + l := len(b) + if l < 1 { + err = ErrShortBytes + return + } + + lead := b[0] + var read uint32 + var skip int + switch lead { + case mbin8: + if l < 2 { + err = ErrShortBytes + return + } + + read = uint32(b[1]) + skip = 2 + + case mbin16: + if l < 3 { + err = ErrShortBytes + return + } + read = uint32(big.Uint16(b[1:])) + skip = 3 + + case mbin32: + if l < 5 { + err = ErrShortBytes + return + } + read = uint32(big.Uint32(b[1:])) + skip = 5 + + default: + err = badPrefix(BinType, lead) + return + } + + if read != uint32(len(into)) { + err = ArrayError{Wanted: uint32(len(into)), Got: read} + return + } + + o = b[skip+copy(into, b[skip:]):] + return +} + +// ReadStringZC reads a messagepack string field +// without copying. The returned []byte points +// to the same memory as the input slice. +// Possible errors: +// - ErrShortBytes (b not long enough) +// - TypeError{} (object not 'str') +func ReadStringZC(b []byte) (v []byte, o []byte, err error) { + l := len(b) + if l < 1 { + return nil, nil, ErrShortBytes + } + + lead := b[0] + var read int + + if isfixstr(lead) { + read = int(rfixstr(lead)) + b = b[1:] + } else { + switch lead { + case mstr8: + if l < 2 { + err = ErrShortBytes + return + } + read = int(b[1]) + b = b[2:] + + case mstr16: + if l < 3 { + err = ErrShortBytes + return + } + read = int(big.Uint16(b[1:])) + b = b[3:] + + case mstr32: + if l < 5 { + err = ErrShortBytes + return + } + read = int(big.Uint32(b[1:])) + b = b[5:] + + default: + err = TypeError{Method: StrType, Encoded: getType(lead)} + return + } + } + + if len(b) < read { + err = ErrShortBytes + return + } + + v = b[0:read] + o = b[read:] + return +} + +// ReadStringBytes reads a 'str' object +// from 'b' and returns its value and the +// remaining bytes in 'b'. +// Possible errors: +// - ErrShortBytes (b not long enough) +// - TypeError{} (not 'str' type) +// - InvalidPrefixError +func ReadStringBytes(b []byte) (string, []byte, error) { + v, o, err := ReadStringZC(b) + return string(v), o, err +} + +// ReadStringAsBytes reads a 'str' object +// into a slice of bytes. 'v' is the value of +// the 'str' object, which may reside in memory +// pointed to by 'scratch.' 'o' is the remaining bytes +// in 'b.'' +// Possible errors: +// - ErrShortBytes (b not long enough) +// - TypeError{} (not 'str' type) +// - InvalidPrefixError (unknown type marker) +func ReadStringAsBytes(b []byte, scratch []byte) (v []byte, o []byte, err error) { + var tmp []byte + tmp, o, err = ReadStringZC(b) + v = append(scratch[:0], tmp...) + return +} + +// ReadComplex128Bytes reads a complex128 +// extension object from 'b' and returns the +// remaining bytes. +// Possible errors: +// - ErrShortBytes (not enough bytes in 'b') +// - TypeError{} (object not a complex128) +// - InvalidPrefixError +// - ExtensionTypeError{} (object an extension of the correct size, but not a complex128) +func ReadComplex128Bytes(b []byte) (c complex128, o []byte, err error) { + if len(b) < 18 { + err = ErrShortBytes + return + } + if b[0] != mfixext16 { + err = badPrefix(Complex128Type, b[0]) + return + } + if int8(b[1]) != Complex128Extension { + err = errExt(int8(b[1]), Complex128Extension) + return + } + c = complex(math.Float64frombits(big.Uint64(b[2:])), + math.Float64frombits(big.Uint64(b[10:]))) + o = b[18:] + return +} + +// ReadComplex64Bytes reads a complex64 +// extension object from 'b' and returns the +// remaining bytes. +// Possible errors: +// - ErrShortBytes (not enough bytes in 'b') +// - TypeError{} (object not a complex64) +// - ExtensionTypeError{} (object an extension of the correct size, but not a complex64) +func ReadComplex64Bytes(b []byte) (c complex64, o []byte, err error) { + if len(b) < 10 { + err = ErrShortBytes + return + } + if b[0] != mfixext8 { + err = badPrefix(Complex64Type, b[0]) + return + } + if b[1] != Complex64Extension { + err = errExt(int8(b[1]), Complex64Extension) + return + } + c = complex(math.Float32frombits(big.Uint32(b[2:])), + math.Float32frombits(big.Uint32(b[6:]))) + o = b[10:] + return +} + +// ReadTimeBytes reads a time.Time +// extension object from 'b' and returns the +// remaining bytes. +// Possible errors: +// - ErrShortBytes (not enough bytes in 'b') +// - TypeError{} (object not a complex64) +// - ExtensionTypeError{} (object an extension of the correct size, but not a time.Time) +func ReadTimeBytes(b []byte) (t time.Time, o []byte, err error) { + if len(b) < 15 { + err = ErrShortBytes + return + } + if b[0] != mext8 || b[1] != 12 { + err = badPrefix(TimeType, b[0]) + return + } + if int8(b[2]) != TimeExtension { + err = errExt(int8(b[2]), TimeExtension) + return + } + sec, nsec := getUnix(b[3:]) + t = time.Unix(sec, int64(nsec)).Local() + o = b[15:] + return +} + +// ReadMapStrIntfBytes reads a map[string]interface{} +// out of 'b' and returns the map and remaining bytes. +// If 'old' is non-nil, the values will be read into that map. +func ReadMapStrIntfBytes(b []byte, old map[string]interface{}) (v map[string]interface{}, o []byte, err error) { + var sz uint32 + o = b + sz, o, err = ReadMapHeaderBytes(o) + + if err != nil { + return + } + + if old != nil { + for key := range old { + delete(old, key) + } + v = old + } else { + v = make(map[string]interface{}, int(sz)) + } + + for z := uint32(0); z < sz; z++ { + if len(o) < 1 { + err = ErrShortBytes + return + } + var key []byte + key, o, err = ReadMapKeyZC(o) + if err != nil { + return + } + var val interface{} + val, o, err = ReadIntfBytes(o) + if err != nil { + return + } + v[string(key)] = val + } + return +} + +// ReadIntfBytes attempts to read +// the next object out of 'b' as a raw interface{} and +// return the remaining bytes. +func ReadIntfBytes(b []byte) (i interface{}, o []byte, err error) { + if len(b) < 1 { + err = ErrShortBytes + return + } + + k := NextType(b) + + switch k { + case MapType: + i, o, err = ReadMapStrIntfBytes(b, nil) + return + + case ArrayType: + var sz uint32 + sz, o, err = ReadArrayHeaderBytes(b) + if err != nil { + return + } + j := make([]interface{}, int(sz)) + i = j + for d := range j { + j[d], o, err = ReadIntfBytes(o) + if err != nil { + return + } + } + return + + case Float32Type: + i, o, err = ReadFloat32Bytes(b) + return + + case Float64Type: + i, o, err = ReadFloat64Bytes(b) + return + + case IntType: + i, o, err = ReadInt64Bytes(b) + return + + case UintType: + i, o, err = ReadUint64Bytes(b) + return + + case BoolType: + i, o, err = ReadBoolBytes(b) + return + + case TimeType: + i, o, err = ReadTimeBytes(b) + return + + case Complex64Type: + i, o, err = ReadComplex64Bytes(b) + return + + case Complex128Type: + i, o, err = ReadComplex128Bytes(b) + return + + case ExtensionType: + var t int8 + t, err = peekExtension(b) + if err != nil { + return + } + // use a user-defined extension, + // if it's been registered + f, ok := extensionReg[t] + if ok { + e := f() + o, err = ReadExtensionBytes(b, e) + i = e + return + } + // last resort is a raw extension + e := RawExtension{} + e.Type = int8(t) + o, err = ReadExtensionBytes(b, &e) + i = &e + return + + case NilType: + o, err = ReadNilBytes(b) + return + + case BinType: + i, o, err = ReadBytesBytes(b, nil) + return + + case StrType: + i, o, err = ReadStringBytes(b) + return + + default: + err = InvalidPrefixError(b[0]) + return + } +} + +// Skip skips the next object in 'b' and +// returns the remaining bytes. If the object +// is a map or array, all of its elements +// will be skipped. +// Possible Errors: +// - ErrShortBytes (not enough bytes in b) +// - InvalidPrefixError (bad encoding) +func Skip(b []byte) ([]byte, error) { + sz, asz, err := getSize(b) + if err != nil { + return b, err + } + if uintptr(len(b)) < sz { + return b, ErrShortBytes + } + b = b[sz:] + for asz > 0 { + b, err = Skip(b) + if err != nil { + return b, err + } + asz-- + } + return b, nil +} + +// returns (skip N bytes, skip M objects, error) +func getSize(b []byte) (uintptr, uintptr, error) { + l := len(b) + if l == 0 { + return 0, 0, ErrShortBytes + } + lead := b[0] + spec := &sizes[lead] // get type information + size, mode := spec.size, spec.extra + if size == 0 { + return 0, 0, InvalidPrefixError(lead) + } + if mode >= 0 { // fixed composites + return uintptr(size), uintptr(mode), nil + } + if l < int(size) { + return 0, 0, ErrShortBytes + } + switch mode { + case extra8: + return uintptr(size) + uintptr(b[1]), 0, nil + case extra16: + return uintptr(size) + uintptr(big.Uint16(b[1:])), 0, nil + case extra32: + return uintptr(size) + uintptr(big.Uint32(b[1:])), 0, nil + case map16v: + return uintptr(size), 2 * uintptr(big.Uint16(b[1:])), nil + case map32v: + return uintptr(size), 2 * uintptr(big.Uint32(b[1:])), nil + case array16v: + return uintptr(size), uintptr(big.Uint16(b[1:])), nil + case array32v: + return uintptr(size), uintptr(big.Uint32(b[1:])), nil + default: + return 0, 0, fatal + } +} diff --git a/vendor/github.com/tinylib/msgp/msgp/size.go b/vendor/github.com/tinylib/msgp/msgp/size.go new file mode 100644 index 0000000000..ce2f8b16ff --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/size.go @@ -0,0 +1,38 @@ +package msgp + +// The sizes provided +// are the worst-case +// encoded sizes for +// each type. For variable- +// length types ([]byte, string), +// the total encoded size is +// the prefix size plus the +// length of the object. +const ( + Int64Size = 9 + IntSize = Int64Size + UintSize = Int64Size + Int8Size = 2 + Int16Size = 3 + Int32Size = 5 + Uint8Size = 2 + ByteSize = Uint8Size + Uint16Size = 3 + Uint32Size = 5 + Uint64Size = Int64Size + Float64Size = 9 + Float32Size = 5 + Complex64Size = 10 + Complex128Size = 18 + + TimeSize = 15 + BoolSize = 1 + NilSize = 1 + + MapHeaderSize = 5 + ArrayHeaderSize = 5 + + BytesPrefixSize = 5 + StringPrefixSize = 5 + ExtensionPrefixSize = 6 +) diff --git a/vendor/github.com/tinylib/msgp/msgp/unsafe.go b/vendor/github.com/tinylib/msgp/msgp/unsafe.go new file mode 100644 index 0000000000..3978b6ff6f --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/unsafe.go @@ -0,0 +1,41 @@ +// +build !purego,!appengine + +package msgp + +import ( + "reflect" + "unsafe" +) + +// NOTE: +// all of the definition in this file +// should be repeated in appengine.go, +// but without using unsafe + +const ( + // spec says int and uint are always + // the same size, but that int/uint + // size may not be machine word size + smallint = unsafe.Sizeof(int(0)) == 4 +) + +// UnsafeString returns the byte slice as a volatile string +// THIS SHOULD ONLY BE USED BY THE CODE GENERATOR. +// THIS IS EVIL CODE. +// YOU HAVE BEEN WARNED. +func UnsafeString(b []byte) string { + sh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + return *(*string)(unsafe.Pointer(&reflect.StringHeader{Data: sh.Data, Len: sh.Len})) +} + +// UnsafeBytes returns the string as a byte slice +// THIS SHOULD ONLY BE USED BY THE CODE GENERATOR. +// THIS IS EVIL CODE. +// YOU HAVE BEEN WARNED. +func UnsafeBytes(s string) []byte { + return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ + Len: len(s), + Cap: len(s), + Data: (*(*reflect.StringHeader)(unsafe.Pointer(&s))).Data, + })) +} diff --git a/vendor/github.com/tinylib/msgp/msgp/write.go b/vendor/github.com/tinylib/msgp/msgp/write.go new file mode 100644 index 0000000000..da9099c2e9 --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/write.go @@ -0,0 +1,845 @@ +package msgp + +import ( + "errors" + "fmt" + "io" + "math" + "reflect" + "sync" + "time" +) + +// Sizer is an interface implemented +// by types that can estimate their +// size when MessagePack encoded. +// This interface is optional, but +// encoding/marshaling implementations +// may use this as a way to pre-allocate +// memory for serialization. +type Sizer interface { + Msgsize() int +} + +var ( + // Nowhere is an io.Writer to nowhere + Nowhere io.Writer = nwhere{} + + btsType = reflect.TypeOf(([]byte)(nil)) + writerPool = sync.Pool{ + New: func() interface{} { + return &Writer{buf: make([]byte, 2048)} + }, + } +) + +func popWriter(w io.Writer) *Writer { + wr := writerPool.Get().(*Writer) + wr.Reset(w) + return wr +} + +func pushWriter(wr *Writer) { + wr.w = nil + wr.wloc = 0 + writerPool.Put(wr) +} + +// freeW frees a writer for use +// by other processes. It is not necessary +// to call freeW on a writer. However, maintaining +// a reference to a *Writer after calling freeW on +// it will cause undefined behavior. +func freeW(w *Writer) { pushWriter(w) } + +// Require ensures that cap(old)-len(old) >= extra. +func Require(old []byte, extra int) []byte { + l := len(old) + c := cap(old) + r := l + extra + if c >= r { + return old + } else if l == 0 { + return make([]byte, 0, extra) + } + // the new size is the greater + // of double the old capacity + // and the sum of the old length + // and the number of new bytes + // necessary. + c <<= 1 + if c < r { + c = r + } + n := make([]byte, l, c) + copy(n, old) + return n +} + +// nowhere writer +type nwhere struct{} + +func (n nwhere) Write(p []byte) (int, error) { return len(p), nil } + +// Marshaler is the interface implemented +// by types that know how to marshal themselves +// as MessagePack. MarshalMsg appends the marshalled +// form of the object to the provided +// byte slice, returning the extended +// slice and any errors encountered. +type Marshaler interface { + MarshalMsg([]byte) ([]byte, error) +} + +// Encodable is the interface implemented +// by types that know how to write themselves +// as MessagePack using a *msgp.Writer. +type Encodable interface { + EncodeMsg(*Writer) error +} + +// Writer is a buffered writer +// that can be used to write +// MessagePack objects to an io.Writer. +// You must call *Writer.Flush() in order +// to flush all of the buffered data +// to the underlying writer. +type Writer struct { + w io.Writer + buf []byte + wloc int +} + +// NewWriter returns a new *Writer. +func NewWriter(w io.Writer) *Writer { + if wr, ok := w.(*Writer); ok { + return wr + } + return popWriter(w) +} + +// NewWriterSize returns a writer with a custom buffer size. +func NewWriterSize(w io.Writer, sz int) *Writer { + // we must be able to require() 18 + // contiguous bytes, so that is the + // practical minimum buffer size + if sz < 18 { + sz = 18 + } + + return &Writer{ + w: w, + buf: make([]byte, sz), + } +} + +// Encode encodes an Encodable to an io.Writer. +func Encode(w io.Writer, e Encodable) error { + wr := NewWriter(w) + err := e.EncodeMsg(wr) + if err == nil { + err = wr.Flush() + } + freeW(wr) + return err +} + +func (mw *Writer) flush() error { + if mw.wloc == 0 { + return nil + } + n, err := mw.w.Write(mw.buf[:mw.wloc]) + if err != nil { + if n > 0 { + mw.wloc = copy(mw.buf, mw.buf[n:mw.wloc]) + } + return err + } + mw.wloc = 0 + return nil +} + +// Flush flushes all of the buffered +// data to the underlying writer. +func (mw *Writer) Flush() error { return mw.flush() } + +// Buffered returns the number bytes in the write buffer +func (mw *Writer) Buffered() int { return len(mw.buf) - mw.wloc } + +func (mw *Writer) avail() int { return len(mw.buf) - mw.wloc } + +func (mw *Writer) bufsize() int { return len(mw.buf) } + +// NOTE: this should only be called with +// a number that is guaranteed to be less than +// len(mw.buf). typically, it is called with a constant. +// +// NOTE: this is a hot code path +func (mw *Writer) require(n int) (int, error) { + c := len(mw.buf) + wl := mw.wloc + if c-wl < n { + if err := mw.flush(); err != nil { + return 0, err + } + wl = mw.wloc + } + mw.wloc += n + return wl, nil +} + +func (mw *Writer) Append(b ...byte) error { + if mw.avail() < len(b) { + err := mw.flush() + if err != nil { + return err + } + } + mw.wloc += copy(mw.buf[mw.wloc:], b) + return nil +} + +// push one byte onto the buffer +// +// NOTE: this is a hot code path +func (mw *Writer) push(b byte) error { + if mw.wloc == len(mw.buf) { + if err := mw.flush(); err != nil { + return err + } + } + mw.buf[mw.wloc] = b + mw.wloc++ + return nil +} + +func (mw *Writer) prefix8(b byte, u uint8) error { + const need = 2 + if len(mw.buf)-mw.wloc < need { + if err := mw.flush(); err != nil { + return err + } + } + prefixu8(mw.buf[mw.wloc:], b, u) + mw.wloc += need + return nil +} + +func (mw *Writer) prefix16(b byte, u uint16) error { + const need = 3 + if len(mw.buf)-mw.wloc < need { + if err := mw.flush(); err != nil { + return err + } + } + prefixu16(mw.buf[mw.wloc:], b, u) + mw.wloc += need + return nil +} + +func (mw *Writer) prefix32(b byte, u uint32) error { + const need = 5 + if len(mw.buf)-mw.wloc < need { + if err := mw.flush(); err != nil { + return err + } + } + prefixu32(mw.buf[mw.wloc:], b, u) + mw.wloc += need + return nil +} + +func (mw *Writer) prefix64(b byte, u uint64) error { + const need = 9 + if len(mw.buf)-mw.wloc < need { + if err := mw.flush(); err != nil { + return err + } + } + prefixu64(mw.buf[mw.wloc:], b, u) + mw.wloc += need + return nil +} + +// Write implements io.Writer, and writes +// data directly to the buffer. +func (mw *Writer) Write(p []byte) (int, error) { + l := len(p) + if mw.avail() < l { + if err := mw.flush(); err != nil { + return 0, err + } + if l > len(mw.buf) { + return mw.w.Write(p) + } + } + mw.wloc += copy(mw.buf[mw.wloc:], p) + return l, nil +} + +// implements io.WriteString +func (mw *Writer) writeString(s string) error { + l := len(s) + if mw.avail() < l { + if err := mw.flush(); err != nil { + return err + } + if l > len(mw.buf) { + _, err := io.WriteString(mw.w, s) + return err + } + } + mw.wloc += copy(mw.buf[mw.wloc:], s) + return nil +} + +// Reset changes the underlying writer used by the Writer +func (mw *Writer) Reset(w io.Writer) { + mw.buf = mw.buf[:cap(mw.buf)] + mw.w = w + mw.wloc = 0 +} + +// WriteMapHeader writes a map header of the given +// size to the writer +func (mw *Writer) WriteMapHeader(sz uint32) error { + switch { + case sz <= 15: + return mw.push(wfixmap(uint8(sz))) + case sz <= math.MaxUint16: + return mw.prefix16(mmap16, uint16(sz)) + default: + return mw.prefix32(mmap32, sz) + } +} + +// WriteArrayHeader writes an array header of the +// given size to the writer +func (mw *Writer) WriteArrayHeader(sz uint32) error { + switch { + case sz <= 15: + return mw.push(wfixarray(uint8(sz))) + case sz <= math.MaxUint16: + return mw.prefix16(marray16, uint16(sz)) + default: + return mw.prefix32(marray32, sz) + } +} + +// WriteNil writes a nil byte to the buffer +func (mw *Writer) WriteNil() error { + return mw.push(mnil) +} + +// WriteFloat64 writes a float64 to the writer +func (mw *Writer) WriteFloat64(f float64) error { + return mw.prefix64(mfloat64, math.Float64bits(f)) +} + +// WriteFloat32 writes a float32 to the writer +func (mw *Writer) WriteFloat32(f float32) error { + return mw.prefix32(mfloat32, math.Float32bits(f)) +} + +// WriteInt64 writes an int64 to the writer +func (mw *Writer) WriteInt64(i int64) error { + if i >= 0 { + switch { + case i <= math.MaxInt8: + return mw.push(wfixint(uint8(i))) + case i <= math.MaxInt16: + return mw.prefix16(mint16, uint16(i)) + case i <= math.MaxInt32: + return mw.prefix32(mint32, uint32(i)) + default: + return mw.prefix64(mint64, uint64(i)) + } + } + switch { + case i >= -32: + return mw.push(wnfixint(int8(i))) + case i >= math.MinInt8: + return mw.prefix8(mint8, uint8(i)) + case i >= math.MinInt16: + return mw.prefix16(mint16, uint16(i)) + case i >= math.MinInt32: + return mw.prefix32(mint32, uint32(i)) + default: + return mw.prefix64(mint64, uint64(i)) + } +} + +// WriteInt8 writes an int8 to the writer +func (mw *Writer) WriteInt8(i int8) error { return mw.WriteInt64(int64(i)) } + +// WriteInt16 writes an int16 to the writer +func (mw *Writer) WriteInt16(i int16) error { return mw.WriteInt64(int64(i)) } + +// WriteInt32 writes an int32 to the writer +func (mw *Writer) WriteInt32(i int32) error { return mw.WriteInt64(int64(i)) } + +// WriteInt writes an int to the writer +func (mw *Writer) WriteInt(i int) error { return mw.WriteInt64(int64(i)) } + +// WriteUint64 writes a uint64 to the writer +func (mw *Writer) WriteUint64(u uint64) error { + switch { + case u <= (1<<7)-1: + return mw.push(wfixint(uint8(u))) + case u <= math.MaxUint8: + return mw.prefix8(muint8, uint8(u)) + case u <= math.MaxUint16: + return mw.prefix16(muint16, uint16(u)) + case u <= math.MaxUint32: + return mw.prefix32(muint32, uint32(u)) + default: + return mw.prefix64(muint64, u) + } +} + +// WriteByte is analogous to WriteUint8 +func (mw *Writer) WriteByte(u byte) error { return mw.WriteUint8(uint8(u)) } + +// WriteUint8 writes a uint8 to the writer +func (mw *Writer) WriteUint8(u uint8) error { return mw.WriteUint64(uint64(u)) } + +// WriteUint16 writes a uint16 to the writer +func (mw *Writer) WriteUint16(u uint16) error { return mw.WriteUint64(uint64(u)) } + +// WriteUint32 writes a uint32 to the writer +func (mw *Writer) WriteUint32(u uint32) error { return mw.WriteUint64(uint64(u)) } + +// WriteUint writes a uint to the writer +func (mw *Writer) WriteUint(u uint) error { return mw.WriteUint64(uint64(u)) } + +// WriteBytes writes binary as 'bin' to the writer +func (mw *Writer) WriteBytes(b []byte) error { + sz := uint32(len(b)) + var err error + switch { + case sz <= math.MaxUint8: + err = mw.prefix8(mbin8, uint8(sz)) + case sz <= math.MaxUint16: + err = mw.prefix16(mbin16, uint16(sz)) + default: + err = mw.prefix32(mbin32, sz) + } + if err != nil { + return err + } + _, err = mw.Write(b) + return err +} + +// WriteBytesHeader writes just the size header +// of a MessagePack 'bin' object. The user is responsible +// for then writing 'sz' more bytes into the stream. +func (mw *Writer) WriteBytesHeader(sz uint32) error { + switch { + case sz <= math.MaxUint8: + return mw.prefix8(mbin8, uint8(sz)) + case sz <= math.MaxUint16: + return mw.prefix16(mbin16, uint16(sz)) + default: + return mw.prefix32(mbin32, sz) + } +} + +// WriteBool writes a bool to the writer +func (mw *Writer) WriteBool(b bool) error { + if b { + return mw.push(mtrue) + } + return mw.push(mfalse) +} + +// WriteString writes a messagepack string to the writer. +// (This is NOT an implementation of io.StringWriter) +func (mw *Writer) WriteString(s string) error { + sz := uint32(len(s)) + var err error + switch { + case sz <= 31: + err = mw.push(wfixstr(uint8(sz))) + case sz <= math.MaxUint8: + err = mw.prefix8(mstr8, uint8(sz)) + case sz <= math.MaxUint16: + err = mw.prefix16(mstr16, uint16(sz)) + default: + err = mw.prefix32(mstr32, sz) + } + if err != nil { + return err + } + return mw.writeString(s) +} + +// WriteStringHeader writes just the string size +// header of a MessagePack 'str' object. The user +// is responsible for writing 'sz' more valid UTF-8 +// bytes to the stream. +func (mw *Writer) WriteStringHeader(sz uint32) error { + switch { + case sz <= 31: + return mw.push(wfixstr(uint8(sz))) + case sz <= math.MaxUint8: + return mw.prefix8(mstr8, uint8(sz)) + case sz <= math.MaxUint16: + return mw.prefix16(mstr16, uint16(sz)) + default: + return mw.prefix32(mstr32, sz) + } +} + +// WriteStringFromBytes writes a 'str' object +// from a []byte. +func (mw *Writer) WriteStringFromBytes(str []byte) error { + sz := uint32(len(str)) + var err error + switch { + case sz <= 31: + err = mw.push(wfixstr(uint8(sz))) + case sz <= math.MaxUint8: + err = mw.prefix8(mstr8, uint8(sz)) + case sz <= math.MaxUint16: + err = mw.prefix16(mstr16, uint16(sz)) + default: + err = mw.prefix32(mstr32, sz) + } + if err != nil { + return err + } + _, err = mw.Write(str) + return err +} + +// WriteComplex64 writes a complex64 to the writer +func (mw *Writer) WriteComplex64(f complex64) error { + o, err := mw.require(10) + if err != nil { + return err + } + mw.buf[o] = mfixext8 + mw.buf[o+1] = Complex64Extension + big.PutUint32(mw.buf[o+2:], math.Float32bits(real(f))) + big.PutUint32(mw.buf[o+6:], math.Float32bits(imag(f))) + return nil +} + +// WriteComplex128 writes a complex128 to the writer +func (mw *Writer) WriteComplex128(f complex128) error { + o, err := mw.require(18) + if err != nil { + return err + } + mw.buf[o] = mfixext16 + mw.buf[o+1] = Complex128Extension + big.PutUint64(mw.buf[o+2:], math.Float64bits(real(f))) + big.PutUint64(mw.buf[o+10:], math.Float64bits(imag(f))) + return nil +} + +// WriteMapStrStr writes a map[string]string to the writer +func (mw *Writer) WriteMapStrStr(mp map[string]string) (err error) { + err = mw.WriteMapHeader(uint32(len(mp))) + if err != nil { + return + } + for key, val := range mp { + err = mw.WriteString(key) + if err != nil { + return + } + err = mw.WriteString(val) + if err != nil { + return + } + } + return nil +} + +// WriteMapStrIntf writes a map[string]interface to the writer +func (mw *Writer) WriteMapStrIntf(mp map[string]interface{}) (err error) { + err = mw.WriteMapHeader(uint32(len(mp))) + if err != nil { + return + } + for key, val := range mp { + err = mw.WriteString(key) + if err != nil { + return + } + err = mw.WriteIntf(val) + if err != nil { + return + } + } + return +} + +// WriteTime writes a time.Time object to the wire. +// +// Time is encoded as Unix time, which means that +// location (time zone) data is removed from the object. +// The encoded object itself is 12 bytes: 8 bytes for +// a big-endian 64-bit integer denoting seconds +// elapsed since "zero" Unix time, followed by 4 bytes +// for a big-endian 32-bit signed integer denoting +// the nanosecond offset of the time. This encoding +// is intended to ease portability across languages. +// (Note that this is *not* the standard time.Time +// binary encoding, because its implementation relies +// heavily on the internal representation used by the +// time package.) +func (mw *Writer) WriteTime(t time.Time) error { + t = t.UTC() + o, err := mw.require(15) + if err != nil { + return err + } + mw.buf[o] = mext8 + mw.buf[o+1] = 12 + mw.buf[o+2] = TimeExtension + putUnix(mw.buf[o+3:], t.Unix(), int32(t.Nanosecond())) + return nil +} + +// WriteIntf writes the concrete type of 'v'. +// WriteIntf will error if 'v' is not one of the following: +// - A bool, float, string, []byte, int, uint, or complex +// - A map of supported types (with string keys) +// - An array or slice of supported types +// - A pointer to a supported type +// - A type that satisfies the msgp.Encodable interface +// - A type that satisfies the msgp.Extension interface +func (mw *Writer) WriteIntf(v interface{}) error { + if v == nil { + return mw.WriteNil() + } + switch v := v.(type) { + + // preferred interfaces + + case Encodable: + return v.EncodeMsg(mw) + case Extension: + return mw.WriteExtension(v) + + // concrete types + + case bool: + return mw.WriteBool(v) + case float32: + return mw.WriteFloat32(v) + case float64: + return mw.WriteFloat64(v) + case complex64: + return mw.WriteComplex64(v) + case complex128: + return mw.WriteComplex128(v) + case uint8: + return mw.WriteUint8(v) + case uint16: + return mw.WriteUint16(v) + case uint32: + return mw.WriteUint32(v) + case uint64: + return mw.WriteUint64(v) + case uint: + return mw.WriteUint(v) + case int8: + return mw.WriteInt8(v) + case int16: + return mw.WriteInt16(v) + case int32: + return mw.WriteInt32(v) + case int64: + return mw.WriteInt64(v) + case int: + return mw.WriteInt(v) + case string: + return mw.WriteString(v) + case []byte: + return mw.WriteBytes(v) + case map[string]string: + return mw.WriteMapStrStr(v) + case map[string]interface{}: + return mw.WriteMapStrIntf(v) + case time.Time: + return mw.WriteTime(v) + } + + val := reflect.ValueOf(v) + if !isSupported(val.Kind()) || !val.IsValid() { + return fmt.Errorf("msgp: type %s not supported", val) + } + + switch val.Kind() { + case reflect.Ptr: + if val.IsNil() { + return mw.WriteNil() + } + return mw.WriteIntf(val.Elem().Interface()) + case reflect.Slice: + return mw.writeSlice(val) + case reflect.Map: + return mw.writeMap(val) + } + return &ErrUnsupportedType{val.Type()} +} + +func (mw *Writer) writeMap(v reflect.Value) (err error) { + if v.Type().Key().Kind() != reflect.String { + return errors.New("msgp: map keys must be strings") + } + ks := v.MapKeys() + err = mw.WriteMapHeader(uint32(len(ks))) + if err != nil { + return + } + for _, key := range ks { + val := v.MapIndex(key) + err = mw.WriteString(key.String()) + if err != nil { + return + } + err = mw.WriteIntf(val.Interface()) + if err != nil { + return + } + } + return +} + +func (mw *Writer) writeSlice(v reflect.Value) (err error) { + // is []byte + if v.Type().ConvertibleTo(btsType) { + return mw.WriteBytes(v.Bytes()) + } + + sz := uint32(v.Len()) + err = mw.WriteArrayHeader(sz) + if err != nil { + return + } + for i := uint32(0); i < sz; i++ { + err = mw.WriteIntf(v.Index(int(i)).Interface()) + if err != nil { + return + } + } + return +} + +func (mw *Writer) writeStruct(v reflect.Value) error { + if enc, ok := v.Interface().(Encodable); ok { + return enc.EncodeMsg(mw) + } + return fmt.Errorf("msgp: unsupported type: %s", v.Type()) +} + +func (mw *Writer) writeVal(v reflect.Value) error { + if !isSupported(v.Kind()) { + return fmt.Errorf("msgp: msgp/enc: type %q not supported", v.Type()) + } + + // shortcut for nil values + if v.IsNil() { + return mw.WriteNil() + } + switch v.Kind() { + case reflect.Bool: + return mw.WriteBool(v.Bool()) + + case reflect.Float32, reflect.Float64: + return mw.WriteFloat64(v.Float()) + + case reflect.Complex64, reflect.Complex128: + return mw.WriteComplex128(v.Complex()) + + case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int8: + return mw.WriteInt64(v.Int()) + + case reflect.Interface, reflect.Ptr: + if v.IsNil() { + mw.WriteNil() + } + return mw.writeVal(v.Elem()) + + case reflect.Map: + return mw.writeMap(v) + + case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint8: + return mw.WriteUint64(v.Uint()) + + case reflect.String: + return mw.WriteString(v.String()) + + case reflect.Slice, reflect.Array: + return mw.writeSlice(v) + + case reflect.Struct: + return mw.writeStruct(v) + + } + return fmt.Errorf("msgp: msgp/enc: type %q not supported", v.Type()) +} + +// is the reflect.Kind encodable? +func isSupported(k reflect.Kind) bool { + switch k { + case reflect.Func, reflect.Chan, reflect.Invalid, reflect.UnsafePointer: + return false + default: + return true + } +} + +// GuessSize guesses the size of the underlying +// value of 'i'. If the underlying value is not +// a simple builtin (or []byte), GuessSize defaults +// to 512. +func GuessSize(i interface{}) int { + if i == nil { + return NilSize + } + + switch i := i.(type) { + case Sizer: + return i.Msgsize() + case Extension: + return ExtensionPrefixSize + i.Len() + case float64: + return Float64Size + case float32: + return Float32Size + case uint8, uint16, uint32, uint64, uint: + return UintSize + case int8, int16, int32, int64, int: + return IntSize + case []byte: + return BytesPrefixSize + len(i) + case string: + return StringPrefixSize + len(i) + case complex64: + return Complex64Size + case complex128: + return Complex128Size + case bool: + return BoolSize + case map[string]interface{}: + s := MapHeaderSize + for key, val := range i { + s += StringPrefixSize + len(key) + GuessSize(val) + } + return s + case map[string]string: + s := MapHeaderSize + for key, val := range i { + s += 2*StringPrefixSize + len(key) + len(val) + } + return s + default: + return 512 + } +} diff --git a/vendor/github.com/tinylib/msgp/msgp/write_bytes.go b/vendor/github.com/tinylib/msgp/msgp/write_bytes.go new file mode 100644 index 0000000000..eaa03c46eb --- /dev/null +++ b/vendor/github.com/tinylib/msgp/msgp/write_bytes.go @@ -0,0 +1,411 @@ +package msgp + +import ( + "math" + "reflect" + "time" +) + +// ensure 'sz' extra bytes in 'b' btw len(b) and cap(b) +func ensure(b []byte, sz int) ([]byte, int) { + l := len(b) + c := cap(b) + if c-l < sz { + o := make([]byte, (2*c)+sz) // exponential growth + n := copy(o, b) + return o[:n+sz], n + } + return b[:l+sz], l +} + +// AppendMapHeader appends a map header with the +// given size to the slice +func AppendMapHeader(b []byte, sz uint32) []byte { + switch { + case sz <= 15: + return append(b, wfixmap(uint8(sz))) + + case sz <= math.MaxUint16: + o, n := ensure(b, 3) + prefixu16(o[n:], mmap16, uint16(sz)) + return o + + default: + o, n := ensure(b, 5) + prefixu32(o[n:], mmap32, sz) + return o + } +} + +// AppendArrayHeader appends an array header with +// the given size to the slice +func AppendArrayHeader(b []byte, sz uint32) []byte { + switch { + case sz <= 15: + return append(b, wfixarray(uint8(sz))) + + case sz <= math.MaxUint16: + o, n := ensure(b, 3) + prefixu16(o[n:], marray16, uint16(sz)) + return o + + default: + o, n := ensure(b, 5) + prefixu32(o[n:], marray32, sz) + return o + } +} + +// AppendNil appends a 'nil' byte to the slice +func AppendNil(b []byte) []byte { return append(b, mnil) } + +// AppendFloat64 appends a float64 to the slice +func AppendFloat64(b []byte, f float64) []byte { + o, n := ensure(b, Float64Size) + prefixu64(o[n:], mfloat64, math.Float64bits(f)) + return o +} + +// AppendFloat32 appends a float32 to the slice +func AppendFloat32(b []byte, f float32) []byte { + o, n := ensure(b, Float32Size) + prefixu32(o[n:], mfloat32, math.Float32bits(f)) + return o +} + +// AppendInt64 appends an int64 to the slice +func AppendInt64(b []byte, i int64) []byte { + if i >= 0 { + switch { + case i <= math.MaxInt8: + return append(b, wfixint(uint8(i))) + case i <= math.MaxInt16: + o, n := ensure(b, 3) + putMint16(o[n:], int16(i)) + return o + case i <= math.MaxInt32: + o, n := ensure(b, 5) + putMint32(o[n:], int32(i)) + return o + default: + o, n := ensure(b, 9) + putMint64(o[n:], i) + return o + } + } + switch { + case i >= -32: + return append(b, wnfixint(int8(i))) + case i >= math.MinInt8: + o, n := ensure(b, 2) + putMint8(o[n:], int8(i)) + return o + case i >= math.MinInt16: + o, n := ensure(b, 3) + putMint16(o[n:], int16(i)) + return o + case i >= math.MinInt32: + o, n := ensure(b, 5) + putMint32(o[n:], int32(i)) + return o + default: + o, n := ensure(b, 9) + putMint64(o[n:], i) + return o + } +} + +// AppendInt appends an int to the slice +func AppendInt(b []byte, i int) []byte { return AppendInt64(b, int64(i)) } + +// AppendInt8 appends an int8 to the slice +func AppendInt8(b []byte, i int8) []byte { return AppendInt64(b, int64(i)) } + +// AppendInt16 appends an int16 to the slice +func AppendInt16(b []byte, i int16) []byte { return AppendInt64(b, int64(i)) } + +// AppendInt32 appends an int32 to the slice +func AppendInt32(b []byte, i int32) []byte { return AppendInt64(b, int64(i)) } + +// AppendUint64 appends a uint64 to the slice +func AppendUint64(b []byte, u uint64) []byte { + switch { + case u <= (1<<7)-1: + return append(b, wfixint(uint8(u))) + + case u <= math.MaxUint8: + o, n := ensure(b, 2) + putMuint8(o[n:], uint8(u)) + return o + + case u <= math.MaxUint16: + o, n := ensure(b, 3) + putMuint16(o[n:], uint16(u)) + return o + + case u <= math.MaxUint32: + o, n := ensure(b, 5) + putMuint32(o[n:], uint32(u)) + return o + + default: + o, n := ensure(b, 9) + putMuint64(o[n:], u) + return o + + } +} + +// AppendUint appends a uint to the slice +func AppendUint(b []byte, u uint) []byte { return AppendUint64(b, uint64(u)) } + +// AppendUint8 appends a uint8 to the slice +func AppendUint8(b []byte, u uint8) []byte { return AppendUint64(b, uint64(u)) } + +// AppendByte is analogous to AppendUint8 +func AppendByte(b []byte, u byte) []byte { return AppendUint8(b, uint8(u)) } + +// AppendUint16 appends a uint16 to the slice +func AppendUint16(b []byte, u uint16) []byte { return AppendUint64(b, uint64(u)) } + +// AppendUint32 appends a uint32 to the slice +func AppendUint32(b []byte, u uint32) []byte { return AppendUint64(b, uint64(u)) } + +// AppendBytes appends bytes to the slice as MessagePack 'bin' data +func AppendBytes(b []byte, bts []byte) []byte { + sz := len(bts) + var o []byte + var n int + switch { + case sz <= math.MaxUint8: + o, n = ensure(b, 2+sz) + prefixu8(o[n:], mbin8, uint8(sz)) + n += 2 + case sz <= math.MaxUint16: + o, n = ensure(b, 3+sz) + prefixu16(o[n:], mbin16, uint16(sz)) + n += 3 + default: + o, n = ensure(b, 5+sz) + prefixu32(o[n:], mbin32, uint32(sz)) + n += 5 + } + return o[:n+copy(o[n:], bts)] +} + +// AppendBool appends a bool to the slice +func AppendBool(b []byte, t bool) []byte { + if t { + return append(b, mtrue) + } + return append(b, mfalse) +} + +// AppendString appends a string as a MessagePack 'str' to the slice +func AppendString(b []byte, s string) []byte { + sz := len(s) + var n int + var o []byte + switch { + case sz <= 31: + o, n = ensure(b, 1+sz) + o[n] = wfixstr(uint8(sz)) + n++ + case sz <= math.MaxUint8: + o, n = ensure(b, 2+sz) + prefixu8(o[n:], mstr8, uint8(sz)) + n += 2 + case sz <= math.MaxUint16: + o, n = ensure(b, 3+sz) + prefixu16(o[n:], mstr16, uint16(sz)) + n += 3 + default: + o, n = ensure(b, 5+sz) + prefixu32(o[n:], mstr32, uint32(sz)) + n += 5 + } + return o[:n+copy(o[n:], s)] +} + +// AppendStringFromBytes appends a []byte +// as a MessagePack 'str' to the slice 'b.' +func AppendStringFromBytes(b []byte, str []byte) []byte { + sz := len(str) + var n int + var o []byte + switch { + case sz <= 31: + o, n = ensure(b, 1+sz) + o[n] = wfixstr(uint8(sz)) + n++ + case sz <= math.MaxUint8: + o, n = ensure(b, 2+sz) + prefixu8(o[n:], mstr8, uint8(sz)) + n += 2 + case sz <= math.MaxUint16: + o, n = ensure(b, 3+sz) + prefixu16(o[n:], mstr16, uint16(sz)) + n += 3 + default: + o, n = ensure(b, 5+sz) + prefixu32(o[n:], mstr32, uint32(sz)) + n += 5 + } + return o[:n+copy(o[n:], str)] +} + +// AppendComplex64 appends a complex64 to the slice as a MessagePack extension +func AppendComplex64(b []byte, c complex64) []byte { + o, n := ensure(b, Complex64Size) + o[n] = mfixext8 + o[n+1] = Complex64Extension + big.PutUint32(o[n+2:], math.Float32bits(real(c))) + big.PutUint32(o[n+6:], math.Float32bits(imag(c))) + return o +} + +// AppendComplex128 appends a complex128 to the slice as a MessagePack extension +func AppendComplex128(b []byte, c complex128) []byte { + o, n := ensure(b, Complex128Size) + o[n] = mfixext16 + o[n+1] = Complex128Extension + big.PutUint64(o[n+2:], math.Float64bits(real(c))) + big.PutUint64(o[n+10:], math.Float64bits(imag(c))) + return o +} + +// AppendTime appends a time.Time to the slice as a MessagePack extension +func AppendTime(b []byte, t time.Time) []byte { + o, n := ensure(b, TimeSize) + t = t.UTC() + o[n] = mext8 + o[n+1] = 12 + o[n+2] = TimeExtension + putUnix(o[n+3:], t.Unix(), int32(t.Nanosecond())) + return o +} + +// AppendMapStrStr appends a map[string]string to the slice +// as a MessagePack map with 'str'-type keys and values +func AppendMapStrStr(b []byte, m map[string]string) []byte { + sz := uint32(len(m)) + b = AppendMapHeader(b, sz) + for key, val := range m { + b = AppendString(b, key) + b = AppendString(b, val) + } + return b +} + +// AppendMapStrIntf appends a map[string]interface{} to the slice +// as a MessagePack map with 'str'-type keys. +func AppendMapStrIntf(b []byte, m map[string]interface{}) ([]byte, error) { + sz := uint32(len(m)) + b = AppendMapHeader(b, sz) + var err error + for key, val := range m { + b = AppendString(b, key) + b, err = AppendIntf(b, val) + if err != nil { + return b, err + } + } + return b, nil +} + +// AppendIntf appends the concrete type of 'i' to the +// provided []byte. 'i' must be one of the following: +// - 'nil' +// - A bool, float, string, []byte, int, uint, or complex +// - A map[string]interface{} or map[string]string +// - A []T, where T is another supported type +// - A *T, where T is another supported type +// - A type that satisfieds the msgp.Marshaler interface +// - A type that satisfies the msgp.Extension interface +func AppendIntf(b []byte, i interface{}) ([]byte, error) { + if i == nil { + return AppendNil(b), nil + } + + // all the concrete types + // for which we have methods + switch i := i.(type) { + case Marshaler: + return i.MarshalMsg(b) + case Extension: + return AppendExtension(b, i) + case bool: + return AppendBool(b, i), nil + case float32: + return AppendFloat32(b, i), nil + case float64: + return AppendFloat64(b, i), nil + case complex64: + return AppendComplex64(b, i), nil + case complex128: + return AppendComplex128(b, i), nil + case string: + return AppendString(b, i), nil + case []byte: + return AppendBytes(b, i), nil + case int8: + return AppendInt8(b, i), nil + case int16: + return AppendInt16(b, i), nil + case int32: + return AppendInt32(b, i), nil + case int64: + return AppendInt64(b, i), nil + case int: + return AppendInt64(b, int64(i)), nil + case uint: + return AppendUint64(b, uint64(i)), nil + case uint8: + return AppendUint8(b, i), nil + case uint16: + return AppendUint16(b, i), nil + case uint32: + return AppendUint32(b, i), nil + case uint64: + return AppendUint64(b, i), nil + case time.Time: + return AppendTime(b, i), nil + case map[string]interface{}: + return AppendMapStrIntf(b, i) + case map[string]string: + return AppendMapStrStr(b, i), nil + case []interface{}: + b = AppendArrayHeader(b, uint32(len(i))) + var err error + for _, k := range i { + b, err = AppendIntf(b, k) + if err != nil { + return b, err + } + } + return b, nil + } + + var err error + v := reflect.ValueOf(i) + switch v.Kind() { + case reflect.Array, reflect.Slice: + l := v.Len() + b = AppendArrayHeader(b, uint32(l)) + for i := 0; i < l; i++ { + b, err = AppendIntf(b, v.Index(i).Interface()) + if err != nil { + return b, err + } + } + return b, nil + case reflect.Ptr: + if v.IsNil() { + return AppendNil(b), err + } + b, err = AppendIntf(b, v.Elem().Interface()) + return b, err + default: + return b, &ErrUnsupportedType{T: v.Type()} + } +} diff --git a/vendor/github.com/willf/bitset/LICENSE b/vendor/github.com/willf/bitset/LICENSE new file mode 100644 index 0000000000..59cab8a939 --- /dev/null +++ b/vendor/github.com/willf/bitset/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2014 Will Fitzgerald. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/willf/bitset/Makefile b/vendor/github.com/willf/bitset/Makefile new file mode 100644 index 0000000000..e3fd5479d2 --- /dev/null +++ b/vendor/github.com/willf/bitset/Makefile @@ -0,0 +1,197 @@ +# MAKEFILE +# +# @author Nicola Asuni <info@tecnick.com> +# @link https://github.com/willf/bitset +# ------------------------------------------------------------------------------ + +# List special make targets that are not associated with files +.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke + +# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS). +SHELL=/bin/bash + +# CVS path (path to the parent dir containing the project) +CVSPATH=github.com/willf + +# Project owner +OWNER=willf + +# Project vendor +VENDOR=willf + +# Project name +PROJECT=bitset + +# Project version +VERSION=$(shell cat VERSION) + +# Name of RPM or DEB package +PKGNAME=${VENDOR}-${PROJECT} + +# Current directory +CURRENTDIR=$(shell pwd) + +# GO lang path +ifneq ($(GOPATH),) + ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),) + # the defined GOPATH is not valid + GOPATH= + endif +endif +ifeq ($(GOPATH),) + # extract the GOPATH + GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR))) +endif + +# --- MAKE TARGETS --- + +# Display general help about this command +help: + @echo "" + @echo "$(PROJECT) Makefile." + @echo "GOPATH=$(GOPATH)" + @echo "The following commands are available:" + @echo "" + @echo " make qa : Run all the tests" + @echo " make test : Run the unit tests" + @echo "" + @echo " make format : Format the source code" + @echo " make fmtcheck : Check if the source code has been formatted" + @echo " make vet : Check for suspicious constructs" + @echo " make lint : Check for style errors" + @echo " make coverage : Generate the coverage report" + @echo " make cyclo : Generate the cyclomatic complexity report" + @echo " make ineffassign : Detect ineffectual assignments" + @echo " make misspell : Detect commonly misspelled words in source files" + @echo " make structcheck : Find unused struct fields" + @echo " make varcheck : Find unused global variables and constants" + @echo " make errcheck : Check that error return values are used" + @echo " make gosimple : Suggest code simplifications" + @echo " make astscan : GO AST scanner" + @echo "" + @echo " make docs : Generate source code documentation" + @echo "" + @echo " make deps : Get the dependencies" + @echo " make clean : Remove any build artifact" + @echo " make nuke : Deletes any intermediate file" + @echo "" + +# Alias for help target +all: help + +# Run the unit tests +test: + @mkdir -p target/test + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go test \ + -covermode=atomic \ + -bench=. \ + -race \ + -cpuprofile=target/report/cpu.out \ + -memprofile=target/report/mem.out \ + -mutexprofile=target/report/mutex.out \ + -coverprofile=target/report/coverage.out \ + -v ./... | \ + tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \ + test $${PIPESTATUS[0]} -eq 0 + +# Format the source code +format: + @find . -type f -name "*.go" -exec gofmt -s -w {} \; + +# Check if the source code has been formatted +fmtcheck: + @mkdir -p target + @find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff + @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; } + +# Check for syntax errors +vet: + GOPATH=$(GOPATH) go vet . + +# Check for style errors +lint: + GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint . + +# Generate the coverage report +coverage: + @mkdir -p target/report + GOPATH=$(GOPATH) \ + go tool cover -html=target/report/coverage.out -o target/report/coverage.html + +# Report cyclomatic complexity +cyclo: + @mkdir -p target/report + GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect ineffectual assignments +ineffassign: + @mkdir -p target/report + GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Detect commonly misspelled words in source files +misspell: + @mkdir -p target/report + GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Find unused struct fields +structcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt + +# Find unused global variables and constants +varcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt + +# Check that error return values are used +errcheck: + @mkdir -p target/report + GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt + +# Suggest code simplifications +gosimple: + @mkdir -p target/report + GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt + +# AST scanner +astscan: + @mkdir -p target/report + GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0 + +# Generate source docs +docs: + @mkdir -p target/docs + nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 & + wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060` + @echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html + +# Alias to run all quality-assurance checks +qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple + +# --- INSTALL --- + +# Get the dependencies +deps: + GOPATH=$(GOPATH) go get ./... + GOPATH=$(GOPATH) go get github.com/golang/lint/golint + GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report + GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov + GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo + GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign + GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck + GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck + GOPATH=$(GOPATH) go get github.com/kisielk/errcheck + GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple + GOPATH=$(GOPATH) go get github.com/GoASTScanner/gas + +# Remove any build artifact +clean: + GOPATH=$(GOPATH) go clean ./... + +# Deletes any intermediate file +nuke: + rm -rf ./target + GOPATH=$(GOPATH) go clean -i ./... diff --git a/vendor/github.com/willf/bitset/README.md b/vendor/github.com/willf/bitset/README.md new file mode 100644 index 0000000000..6c62b20c6c --- /dev/null +++ b/vendor/github.com/willf/bitset/README.md @@ -0,0 +1,96 @@ +# bitset + +*Go language library to map between non-negative integers and boolean values* + +[![Master Build Status](https://secure.travis-ci.org/willf/bitset.png?branch=master)](https://travis-ci.org/willf/bitset?branch=master) +[![Master Coverage Status](https://coveralls.io/repos/willf/bitset/badge.svg?branch=master&service=github)](https://coveralls.io/github/willf/bitset?branch=master) +[![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset) +[![GoDoc](https://godoc.org/github.com/willf/bitset?status.svg)](http://godoc.org/github.com/willf/bitset) + + +## Description + +Package bitset implements bitsets, a mapping between non-negative integers and boolean values. +It should be more efficient than map[uint] bool. + +It provides methods for setting, clearing, flipping, and testing individual integers. + +But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits. + +BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used. + +Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining. + +### Example use: + +```go +package main + +import ( + "fmt" + "math/rand" + + "github.com/willf/bitset" +) + +func main() { + fmt.Printf("Hello from BitSet!\n") + var b bitset.BitSet + // play some Go Fish + for i := 0; i < 100; i++ { + card1 := uint(rand.Intn(52)) + card2 := uint(rand.Intn(52)) + b.Set(card1) + if b.Test(card2) { + fmt.Println("Go Fish!") + } + b.Clear(card1) + } + + // Chaining + b.Set(10).Set(11) + + for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) { + fmt.Println("The following bit is set:", i) + } + if b.Intersection(bitset.New(100).Set(10)).Count() == 1 { + fmt.Println("Intersection works.") + } else { + fmt.Println("Intersection doesn't work???") + } +} +``` + +As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. + +Godoc documentation is at: https://godoc.org/github.com/willf/bitset + + +## Implementation Note + +Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. + +It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. + +## Installation + +```bash +go get github.com/willf/bitset +``` + +## Contributing + +If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") + +This project include a Makefile that allows you to test and build the project with simple commands. +To see all available options: +```bash +make help +``` + +## Running all tests + +Before committing the code, please check if it passes all tests using (note: this will install some dependencies): +```bash +make qa +``` diff --git a/vendor/github.com/willf/bitset/VERSION b/vendor/github.com/willf/bitset/VERSION new file mode 100644 index 0000000000..781dcb07cd --- /dev/null +++ b/vendor/github.com/willf/bitset/VERSION @@ -0,0 +1 @@ +1.1.3 diff --git a/vendor/github.com/willf/bitset/bitset.go b/vendor/github.com/willf/bitset/bitset.go new file mode 100644 index 0000000000..65ef6851d1 --- /dev/null +++ b/vendor/github.com/willf/bitset/bitset.go @@ -0,0 +1,759 @@ +/* +Package bitset implements bitsets, a mapping +between non-negative integers and boolean values. It should be more +efficient than map[uint] bool. + +It provides methods for setting, clearing, flipping, and testing +individual integers. + +But it also provides set intersection, union, difference, +complement, and symmetric operations, as well as tests to +check whether any, all, or no bits are set, and querying a +bitset's current length and number of positive bits. + +BitSets are expanded to the size of the largest set bit; the +memory allocation is approximately Max bits, where Max is +the largest set bit. BitSets are never shrunk. On creation, +a hint can be given for the number of bits that will be used. + +Many of the methods, including Set,Clear, and Flip, return +a BitSet pointer, which allows for chaining. + +Example use: + + import "bitset" + var b BitSet + b.Set(10).Set(11) + if b.Test(1000) { + b.Clear(1000) + } + if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { + fmt.Println("Intersection works.") + } + +As an alternative to BitSets, one should check out the 'big' package, +which provides a (less set-theoretical) view of bitsets. + +*/ +package bitset + +import ( + "bufio" + "bytes" + "encoding/base64" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "io" + "strconv" +) + +// the wordSize of a bit set +const wordSize = uint(64) + +// log2WordSize is lg(wordSize) +const log2WordSize = uint(6) + +// allBits has every bit set +const allBits uint64 = 0xffffffffffffffff + +// A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0. +type BitSet struct { + length uint + set []uint64 +} + +// Error is used to distinguish errors (panics) generated in this package. +type Error string + +// safeSet will fixup b.set to be non-nil and return the field value +func (b *BitSet) safeSet() []uint64 { + if b.set == nil { + b.set = make([]uint64, wordsNeeded(0)) + } + return b.set +} + +// From is a constructor used to create a BitSet from an array of integers +func From(buf []uint64) *BitSet { + return &BitSet{uint(len(buf)) * 64, buf} +} + +// Bytes returns the bitset as array of integers +func (b *BitSet) Bytes() []uint64 { + return b.set +} + +// wordsNeeded calculates the number of words needed for i bits +func wordsNeeded(i uint) int { + if i > (Cap() - wordSize + 1) { + return int(Cap() >> log2WordSize) + } + return int((i + (wordSize - 1)) >> log2WordSize) +} + +// New creates a new BitSet with a hint that length bits will be required +func New(length uint) (bset *BitSet) { + defer func() { + if r := recover(); r != nil { + bset = &BitSet{ + 0, + make([]uint64, 0), + } + } + }() + + bset = &BitSet{ + length, + make([]uint64, wordsNeeded(length)), + } + + return bset +} + +// Cap returns the total possible capacity, or number of bits +func Cap() uint { + return ^uint(0) +} + +// Len returns the length of the BitSet in words +func (b *BitSet) Len() uint { + return b.length +} + +// extendSetMaybe adds additional words to incorporate new bits if needed +func (b *BitSet) extendSetMaybe(i uint) { + if i >= b.length { // if we need more bits, make 'em + nsize := wordsNeeded(i + 1) + if b.set == nil { + b.set = make([]uint64, nsize) + } else if cap(b.set) >= nsize { + b.set = b.set[:nsize] // fast resize + } else if len(b.set) < nsize { + newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x + copy(newset, b.set) + b.set = newset + } + b.length = i + 1 + } +} + +// Test whether bit i is set. +func (b *BitSet) Test(i uint) bool { + if i >= b.length { + return false + } + return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 +} + +// Set bit i to 1 +func (b *BitSet) Set(i uint) *BitSet { + b.extendSetMaybe(i) + b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1)) + return b +} + +// Clear bit i to 0 +func (b *BitSet) Clear(i uint) *BitSet { + if i >= b.length { + return b + } + b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) + return b +} + +// SetTo sets bit i to value +func (b *BitSet) SetTo(i uint, value bool) *BitSet { + if value { + return b.Set(i) + } + return b.Clear(i) +} + +// Flip bit at i +func (b *BitSet) Flip(i uint) *BitSet { + if i >= b.length { + return b.Set(i) + } + b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) + return b +} + +// String creates a string representation of the Bitmap +func (b *BitSet) String() string { + // follows code from https://github.com/RoaringBitmap/roaring + var buffer bytes.Buffer + start := []byte("{") + buffer.Write(start) + counter := 0 + i, e := b.NextSet(0) + for e { + counter = counter + 1 + // to avoid exhausting the memory + if counter > 0x40000 { + buffer.WriteString("...") + break + } + buffer.WriteString(strconv.FormatInt(int64(i), 10)) + i, e = b.NextSet(i + 1) + if e { + buffer.WriteString(",") + } + } + buffer.WriteString("}") + return buffer.String() +} + +// NextSet returns the next bit set from the specified index, +// including possibly the current index +// along with an error code (true = valid, false = no set bit found) +// for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} +func (b *BitSet) NextSet(i uint) (uint, bool) { + x := int(i >> log2WordSize) + if x >= len(b.set) { + return 0, false + } + w := b.set[x] + w = w >> (i & (wordSize - 1)) + if w != 0 { + return i + trailingZeroes64(w), true + } + x = x + 1 + for x < len(b.set) { + if b.set[x] != 0 { + return uint(x)*wordSize + trailingZeroes64(b.set[x]), true + } + x = x + 1 + + } + return 0, false +} + +// NextSetMany returns many next bit sets from the specified index, +// including possibly the current index and up to cap(buffer). +// If the returned slice has len zero, then no more set bits were found +// +// buffer := make([]uint, 256) +// j := uint(0) +// j, buffer = bitmap.NextSetMany(j, buffer) +// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { +// for k := range buffer { +// do something with buffer[k] +// } +// j += 1 +// } +// +func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { + myanswer := buffer[:0] + + x := int(i >> log2WordSize) + if x >= len(b.set) { + return 0, myanswer + } + w := b.set[x] + w = w >> (i & (wordSize - 1)) + base := uint(x << 6) + capacity := cap(buffer) + for len(myanswer) < capacity { + for w != 0 { + t := w & ((^w) + 1) + r := trailingZeroes64(w) + myanswer = append(myanswer, r+base) + if len(myanswer) == capacity { + goto End + } + w = w ^ t + } + x += 1 + if x == len(b.set) { + break + } + base += 64 + w = b.set[x] + } +End: + if len(myanswer) > 0 { + return myanswer[len(myanswer)-1], myanswer + } else { + return 0, myanswer + } +} + +// NextClear returns the next clear bit from the specified index, +// including possibly the current index +// along with an error code (true = valid, false = no bit found i.e. all bits are set) +func (b *BitSet) NextClear(i uint) (uint, bool) { + x := int(i >> log2WordSize) + if x >= len(b.set) { + return 0, false + } + w := b.set[x] + w = w >> (i & (wordSize - 1)) + wA := allBits >> (i & (wordSize - 1)) + index := i + trailingZeroes64(^w) + if w != wA && index < b.length { + return index, true + } + x++ + for x < len(b.set) { + index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) + if b.set[x] != allBits && index < b.length { + return index, true + } + x++ + } + return 0, false +} + +// ClearAll clears the entire BitSet +func (b *BitSet) ClearAll() *BitSet { + if b != nil && b.set != nil { + for i := range b.set { + b.set[i] = 0 + } + } + return b +} + +// wordCount returns the number of words used in a bit set +func (b *BitSet) wordCount() int { + return len(b.set) +} + +// Clone this BitSet +func (b *BitSet) Clone() *BitSet { + c := New(b.length) + if b.set != nil { // Clone should not modify current object + copy(c.set, b.set) + } + return c +} + +// Copy into a destination BitSet +// Returning the size of the destination BitSet +// like array copy +func (b *BitSet) Copy(c *BitSet) (count uint) { + if c == nil { + return + } + if b.set != nil { // Copy should not modify current object + copy(c.set, b.set) + } + count = c.length + if b.length < c.length { + count = b.length + } + return +} + +// Count (number of set bits) +func (b *BitSet) Count() uint { + if b != nil && b.set != nil { + return uint(popcntSlice(b.set)) + } + return 0 +} + +// Equal tests the equvalence of two BitSets. +// False if they are of different sizes, otherwise true +// only if all the same bits are set +func (b *BitSet) Equal(c *BitSet) bool { + if c == nil { + return false + } + if b.length != c.length { + return false + } + if b.length == 0 { // if they have both length == 0, then could have nil set + return true + } + // testing for equality shoud not transform the bitset (no call to safeSet) + + for p, v := range b.set { + if c.set[p] != v { + return false + } + } + return true +} + +func panicIfNull(b *BitSet) { + if b == nil { + panic(Error("BitSet must not be null")) + } +} + +// Difference of base set and other set +// This is the BitSet equivalent of &^ (and not) +func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + result = b.Clone() // clone b (in case b is bigger than compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + result.set[i] = b.set[i] &^ compare.set[i] + } + return +} + +// DifferenceCardinality computes the cardinality of the differnce +func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + cnt := uint64(0) + cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) + cnt += popcntSlice(b.set[l:]) + return uint(cnt) +} + +// InPlaceDifference computes the difference of base set and other set +// This is the BitSet equivalent of &^ (and not) +func (b *BitSet) InPlaceDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + b.set[i] &^= compare.set[i] + } +} + +// Convenience function: return two bitsets ordered by +// increasing length. Note: neither can be nil +func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { + if a.length <= b.length { + ap, bp = a, b + } else { + ap, bp = b, a + } + return +} + +// Intersection of base set and other set +// This is the BitSet equivalent of & (and) +func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + result = New(b.length) + for i, word := range b.set { + result.set[i] = word & compare.set[i] + } + return +} + +// IntersectionCardinality computes the cardinality of the union +func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntAndSlice(b.set, compare.set) + return uint(cnt) +} + +// InPlaceIntersection destructively computes the intersection of +// base set and the compare set. +// This is the BitSet equivalent of & (and) +func (b *BitSet) InPlaceIntersection(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l; i++ { + b.set[i] &= compare.set[i] + } + for i := l; i < len(b.set); i++ { + b.set[i] = 0 + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } +} + +// Union of base set and other set +// This is the BitSet equivalent of | (or) +func (b *BitSet) Union(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + result = compare.Clone() + for i, word := range b.set { + result.set[i] = word | compare.set[i] + } + return +} + +// UnionCardinality computes the cardinality of the uniton of the base set +// and the compare set. +func (b *BitSet) UnionCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntOrSlice(b.set, compare.set) + if len(compare.set) > len(b.set) { + cnt += popcntSlice(compare.set[len(b.set):]) + } + return uint(cnt) +} + +// InPlaceUnion creates the destructive union of base set and compare set. +// This is the BitSet equivalent of | (or). +func (b *BitSet) InPlaceUnion(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l; i++ { + b.set[i] |= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } + } +} + +// SymmetricDifference of base set and other set +// This is the BitSet equivalent of ^ (xor) +func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + // compare is bigger, so clone it + result = compare.Clone() + for i, word := range b.set { + result.set[i] = word ^ compare.set[i] + } + return +} + +// SymmetricDifferenceCardinality computes the cardinality of the symmetric difference +func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := popcntXorSlice(b.set, compare.set) + if len(compare.set) > len(b.set) { + cnt += popcntSlice(compare.set[len(b.set):]) + } + return uint(cnt) +} + +// InPlaceSymmetricDifference creates the destructive SymmetricDifference of base set and other set +// This is the BitSet equivalent of ^ (xor) +func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l; i++ { + b.set[i] ^= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } + } +} + +// Is the length an exact multiple of word sizes? +func (b *BitSet) isLenExactMultiple() bool { + return b.length%wordSize == 0 +} + +// Clean last word by setting unused bits to 0 +func (b *BitSet) cleanLastWord() { + if !b.isLenExactMultiple() { + b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) + } +} + +// Complement computes the (local) complement of a biset (up to length bits) +func (b *BitSet) Complement() (result *BitSet) { + panicIfNull(b) + result = New(b.length) + for i, word := range b.set { + result.set[i] = ^word + } + result.cleanLastWord() + return +} + +// All returns true if all bits are set, false otherwise. Returns true for +// empty sets. +func (b *BitSet) All() bool { + panicIfNull(b) + return b.Count() == b.length +} + +// None returns true if no bit is set, false otherwise. Retursn true for +// empty sets. +func (b *BitSet) None() bool { + panicIfNull(b) + if b != nil && b.set != nil { + for _, word := range b.set { + if word > 0 { + return false + } + } + return true + } + return true +} + +// Any returns true if any bit is set, false otherwise +func (b *BitSet) Any() bool { + panicIfNull(b) + return !b.None() +} + +// IsSuperSet returns true if this is a superset of the other set +func (b *BitSet) IsSuperSet(other *BitSet) bool { + for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { + if !b.Test(i) { + return false + } + } + return true +} + +// IsStrictSuperSet returns true if this is a strict superset of the other set +func (b *BitSet) IsStrictSuperSet(other *BitSet) bool { + return b.Count() > other.Count() && b.IsSuperSet(other) +} + +// DumpAsBits dumps a bit set as a string of bits +func (b *BitSet) DumpAsBits() string { + if b.set == nil { + return "." + } + buffer := bytes.NewBufferString("") + i := len(b.set) - 1 + for ; i >= 0; i-- { + fmt.Fprintf(buffer, "%064b.", b.set[i]) + } + return string(buffer.Bytes()) +} + +// BinaryStorageSize returns the binary storage requirements +func (b *BitSet) BinaryStorageSize() int { + return binary.Size(uint64(0)) + binary.Size(b.set) +} + +// WriteTo writes a BitSet to a stream +func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { + length := uint64(b.length) + + // Write length + err := binary.Write(stream, binary.BigEndian, length) + if err != nil { + return 0, err + } + + // Write set + err = binary.Write(stream, binary.BigEndian, b.set) + return int64(b.BinaryStorageSize()), err +} + +// ReadFrom reads a BitSet from a stream written using WriteTo +func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { + var length uint64 + + // Read length first + err := binary.Read(stream, binary.BigEndian, &length) + if err != nil { + return 0, err + } + newset := New(uint(length)) + + if uint64(newset.length) != length { + return 0, errors.New("Unmarshalling error: type mismatch") + } + + // Read remaining bytes as set + err = binary.Read(stream, binary.BigEndian, newset.set) + if err != nil { + return 0, err + } + + *b = *newset + return int64(b.BinaryStorageSize()), nil +} + +// MarshalBinary encodes a BitSet into a binary form and returns the result. +func (b *BitSet) MarshalBinary() ([]byte, error) { + var buf bytes.Buffer + writer := bufio.NewWriter(&buf) + + _, err := b.WriteTo(writer) + if err != nil { + return []byte{}, err + } + + err = writer.Flush() + + return buf.Bytes(), err +} + +// UnmarshalBinary decodes the binary form generated by MarshalBinary. +func (b *BitSet) UnmarshalBinary(data []byte) error { + buf := bytes.NewReader(data) + reader := bufio.NewReader(buf) + + _, err := b.ReadFrom(reader) + + return err +} + +// MarshalJSON marshals a BitSet as a JSON structure +func (b *BitSet) MarshalJSON() ([]byte, error) { + buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) + _, err := b.WriteTo(buffer) + if err != nil { + return nil, err + } + + // URLEncode all bytes + return json.Marshal(base64.URLEncoding.EncodeToString(buffer.Bytes())) +} + +// UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON +func (b *BitSet) UnmarshalJSON(data []byte) error { + // Unmarshal as string + var s string + err := json.Unmarshal(data, &s) + if err != nil { + return err + } + + // URLDecode string + buf, err := base64.URLEncoding.DecodeString(s) + if err != nil { + return err + } + + _, err = b.ReadFrom(bytes.NewReader(buf)) + return err +} diff --git a/vendor/github.com/willf/bitset/popcnt.go b/vendor/github.com/willf/bitset/popcnt.go new file mode 100644 index 0000000000..76577a8382 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt.go @@ -0,0 +1,53 @@ +package bitset + +// bit population count, take from +// https://code.google.com/p/go/issues/detail?id=4988#c11 +// credit: https://code.google.com/u/arnehormann/ +func popcount(x uint64) (n uint64) { + x -= (x >> 1) & 0x5555555555555555 + x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 + x += x >> 4 + x &= 0x0f0f0f0f0f0f0f0f + x *= 0x0101010101010101 + return x >> 56 +} + +func popcntSliceGo(s []uint64) uint64 { + cnt := uint64(0) + for _, x := range s { + cnt += popcount(x) + } + return cnt +} + +func popcntMaskSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] &^ m[i]) + } + return cnt +} + +func popcntAndSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] & m[i]) + } + return cnt +} + +func popcntOrSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] | m[i]) + } + return cnt +} + +func popcntXorSliceGo(s, m []uint64) uint64 { + cnt := uint64(0) + for i := range s { + cnt += popcount(s[i] ^ m[i]) + } + return cnt +} diff --git a/vendor/github.com/willf/bitset/popcnt_19.go b/vendor/github.com/willf/bitset/popcnt_19.go new file mode 100644 index 0000000000..fc8ff4f367 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_19.go @@ -0,0 +1,45 @@ +// +build go1.9 + +package bitset + +import "math/bits" + +func popcntSlice(s []uint64) uint64 { + var cnt int + for _, x := range s { + cnt += bits.OnesCount64(x) + } + return uint64(cnt) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] &^ m[i]) + } + return uint64(cnt) +} + +func popcntAndSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] & m[i]) + } + return uint64(cnt) +} + +func popcntOrSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] | m[i]) + } + return uint64(cnt) +} + +func popcntXorSlice(s, m []uint64) uint64 { + var cnt int + for i := range s { + cnt += bits.OnesCount64(s[i] ^ m[i]) + } + return uint64(cnt) +} diff --git a/vendor/github.com/willf/bitset/popcnt_amd64.go b/vendor/github.com/willf/bitset/popcnt_amd64.go new file mode 100644 index 0000000000..4cf64f24ad --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_amd64.go @@ -0,0 +1,68 @@ +// +build !go1.9 +// +build amd64,!appengine + +package bitset + +// *** the following functions are defined in popcnt_amd64.s + +//go:noescape + +func hasAsm() bool + +// useAsm is a flag used to select the GO or ASM implementation of the popcnt function +var useAsm = hasAsm() + +//go:noescape + +func popcntSliceAsm(s []uint64) uint64 + +//go:noescape + +func popcntMaskSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntAndSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntOrSliceAsm(s, m []uint64) uint64 + +//go:noescape + +func popcntXorSliceAsm(s, m []uint64) uint64 + +func popcntSlice(s []uint64) uint64 { + if useAsm { + return popcntSliceAsm(s) + } + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + if useAsm { + return popcntMaskSliceAsm(s, m) + } + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + if useAsm { + return popcntAndSliceAsm(s, m) + } + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + if useAsm { + return popcntOrSliceAsm(s, m) + } + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + if useAsm { + return popcntXorSliceAsm(s, m) + } + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/willf/bitset/popcnt_amd64.s b/vendor/github.com/willf/bitset/popcnt_amd64.s new file mode 100644 index 0000000000..666c0dcc17 --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_amd64.s @@ -0,0 +1,104 @@ +// +build !go1.9 +// +build amd64,!appengine + +TEXT ·hasAsm(SB),4,$0-1 +MOVQ $1, AX +CPUID +SHRQ $23, CX +ANDQ $1, CX +MOVB CX, ret+0(FP) +RET + +#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 + +TEXT ·popcntSliceAsm(SB),4,$0-32 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntSliceEnd +popcntSliceLoop: +BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX +ADDQ DX, AX +ADDQ $8, SI +LOOP popcntSliceLoop +popcntSliceEnd: +MOVQ AX, ret+24(FP) +RET + +TEXT ·popcntMaskSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntMaskSliceEnd +MOVQ m+24(FP), DI +popcntMaskSliceLoop: +MOVQ (DI), DX +NOTQ DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntMaskSliceLoop +popcntMaskSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntAndSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntAndSliceEnd +MOVQ m+24(FP), DI +popcntAndSliceLoop: +MOVQ (DI), DX +ANDQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntAndSliceLoop +popcntAndSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntOrSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntOrSliceEnd +MOVQ m+24(FP), DI +popcntOrSliceLoop: +MOVQ (DI), DX +ORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntOrSliceLoop +popcntOrSliceEnd: +MOVQ AX, ret+48(FP) +RET + +TEXT ·popcntXorSliceAsm(SB),4,$0-56 +XORQ AX, AX +MOVQ s+0(FP), SI +MOVQ s_len+8(FP), CX +TESTQ CX, CX +JZ popcntXorSliceEnd +MOVQ m+24(FP), DI +popcntXorSliceLoop: +MOVQ (DI), DX +XORQ (SI), DX +POPCNTQ_DX_DX +ADDQ DX, AX +ADDQ $8, SI +ADDQ $8, DI +LOOP popcntXorSliceLoop +popcntXorSliceEnd: +MOVQ AX, ret+48(FP) +RET diff --git a/vendor/github.com/willf/bitset/popcnt_generic.go b/vendor/github.com/willf/bitset/popcnt_generic.go new file mode 100644 index 0000000000..21e0ff7b4f --- /dev/null +++ b/vendor/github.com/willf/bitset/popcnt_generic.go @@ -0,0 +1,24 @@ +// +build !go1.9 +// +build !amd64 appengine + +package bitset + +func popcntSlice(s []uint64) uint64 { + return popcntSliceGo(s) +} + +func popcntMaskSlice(s, m []uint64) uint64 { + return popcntMaskSliceGo(s, m) +} + +func popcntAndSlice(s, m []uint64) uint64 { + return popcntAndSliceGo(s, m) +} + +func popcntOrSlice(s, m []uint64) uint64 { + return popcntOrSliceGo(s, m) +} + +func popcntXorSlice(s, m []uint64) uint64 { + return popcntXorSliceGo(s, m) +} diff --git a/vendor/github.com/willf/bitset/trailing_zeros_18.go b/vendor/github.com/willf/bitset/trailing_zeros_18.go new file mode 100644 index 0000000000..c52b61be9f --- /dev/null +++ b/vendor/github.com/willf/bitset/trailing_zeros_18.go @@ -0,0 +1,14 @@ +// +build !go1.9 + +package bitset + +var deBruijn = [...]byte{ + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +} + +func trailingZeroes64(v uint64) uint { + return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58]) +} diff --git a/vendor/github.com/willf/bitset/trailing_zeros_19.go b/vendor/github.com/willf/bitset/trailing_zeros_19.go new file mode 100644 index 0000000000..36a988e714 --- /dev/null +++ b/vendor/github.com/willf/bitset/trailing_zeros_19.go @@ -0,0 +1,9 @@ +// +build go1.9 + +package bitset + +import "math/bits" + +func trailingZeroes64(v uint64) uint { + return uint(bits.TrailingZeros64(v)) +} |