summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase
diff options
context:
space:
mode:
authorAntoine GIRARD <sapk@users.noreply.github.com>2018-05-19 14:49:46 +0200
committerLunny Xiao <xiaolunwen@gmail.com>2018-05-19 20:49:46 +0800
commit917b9641eca3fa1b1676ba1b4fd77a4e958ee153 (patch)
tree2caf049dfebccf5ccbc44316630a6c9220062d78 /vendor/github.com/couchbase
parent1b7cd3d0b0d3652e0660489b9c4da72619400c98 (diff)
downloadgitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.tar.gz
gitea-917b9641eca3fa1b1676ba1b4fd77a4e958ee153.zip
Update to last common bleve (#3986)
Diffstat (limited to 'vendor/github.com/couchbase')
-rw-r--r--vendor/github.com/couchbase/vellum/CONTRIBUTING.md16
-rw-r--r--vendor/github.com/couchbase/vellum/LICENSE202
-rw-r--r--vendor/github.com/couchbase/vellum/README.md168
-rw-r--r--vendor/github.com/couchbase/vellum/automaton.go85
-rw-r--r--vendor/github.com/couchbase/vellum/builder.go453
-rw-r--r--vendor/github.com/couchbase/vellum/common.go547
-rw-r--r--vendor/github.com/couchbase/vellum/decoder_v1.go316
-rw-r--r--vendor/github.com/couchbase/vellum/encoder_v1.go227
-rw-r--r--vendor/github.com/couchbase/vellum/encoding.go87
-rw-r--r--vendor/github.com/couchbase/vellum/fst.go254
-rw-r--r--vendor/github.com/couchbase/vellum/fst_iterator.go276
-rw-r--r--vendor/github.com/couchbase/vellum/merge_iterator.go188
-rw-r--r--vendor/github.com/couchbase/vellum/pack.go55
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/compile.go316
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/dfa.go188
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/inst.go62
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/regexp.go113
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/sparse.go54
-rw-r--r--vendor/github.com/couchbase/vellum/registry.go116
-rw-r--r--vendor/github.com/couchbase/vellum/transducer.go55
-rw-r--r--vendor/github.com/couchbase/vellum/utf8/utf8.go246
-rw-r--r--vendor/github.com/couchbase/vellum/vellum.go111
-rw-r--r--vendor/github.com/couchbase/vellum/vellum_mmap.go60
-rw-r--r--vendor/github.com/couchbase/vellum/vellum_nommap.go27
-rw-r--r--vendor/github.com/couchbase/vellum/writer.go92
25 files changed, 4314 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/CONTRIBUTING.md b/vendor/github.com/couchbase/vellum/CONTRIBUTING.md
new file mode 100644
index 0000000000..b85ec82b6b
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/CONTRIBUTING.md
@@ -0,0 +1,16 @@
+# Contributing to Vellum
+
+We look forward to your contributions, but ask that you first review these guidelines.
+
+### Sign the CLA
+
+As Vellum is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Vellum project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
+
+### Submitting a Pull Request
+
+All types of contributions are welcome, but please keep the following in mind:
+
+- If you're planning a large change, you should really discuss it in a github issue first. This helps avoid duplicate effort and spending time on something that may not be merged.
+- Existing tests should continue to pass, new tests for the contribution are nice to have.
+- All code should have gone through `go fmt`
+- All code should pass `go vet`
diff --git a/vendor/github.com/couchbase/vellum/LICENSE b/vendor/github.com/couchbase/vellum/LICENSE
new file mode 100644
index 0000000000..7a4a3ea242
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. \ No newline at end of file
diff --git a/vendor/github.com/couchbase/vellum/README.md b/vendor/github.com/couchbase/vellum/README.md
new file mode 100644
index 0000000000..0c0759a9b5
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/README.md
@@ -0,0 +1,168 @@
+# ![vellum](docs/logo.png) vellum
+
+[![Build Status](https://travis-ci.org/couchbase/vellum.svg?branch=master)](https://travis-ci.org/couchbase/vellum)
+[![Coverage Status](https://coveralls.io/repos/github/couchbase/vellum/badge.svg?branch=master)](https://coveralls.io/github/couchbase/vellum?branch=master)
+[![GoDoc](https://godoc.org/github.com/couchbase/vellum?status.svg)](https://godoc.org/github.com/couchbase/vellum)
+[![Go Report Card](https://goreportcard.com/badge/github.com/couchbase/vellum)](https://goreportcard.com/report/github.com/couchbase/vellum)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+A Go library implementing an FST (finite state transducer) capable of:
+ - mapping between keys ([]byte) and a value (uint64)
+ - enumerating keys in lexicographic order
+
+Some additional goals of this implementation:
+ - bounded memory use while building the FST
+ - streaming out FST data while building
+ - mmap FST runtime to support very large FTSs (optional)
+
+## Usage
+
+### Building an FST
+
+To build an FST, create a new builder using the `New()` method. This method takes an `io.Writer` as an argument. As the FST is being built, data will be streamed to the writer as soon as possible. With this builder you **MUST** insert keys in lexicographic order. Inserting keys out of order will result in an error. After inserting the last key into the builder, you **MUST** call `Close()` on the builder. This will flush all remaining data to the underlying writer.
+
+In memory:
+```go
+ var buf bytes.Buffer
+ builder, err := vellum.New(&buf, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+```
+
+To disk:
+```go
+ f, err := os.Create("/tmp/vellum.fst")
+ if err != nil {
+ log.Fatal(err)
+ }
+ builder, err := vellum.New(f, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+```
+
+**MUST** insert keys in lexicographic order:
+```go
+err = builder.Insert([]byte("cat"), 1)
+if err != nil {
+ log.Fatal(err)
+}
+
+err = builder.Insert([]byte("dog"), 2)
+if err != nil {
+ log.Fatal(err)
+}
+
+err = builder.Insert([]byte("fish"), 3)
+if err != nil {
+ log.Fatal(err)
+}
+
+err = builder.Close()
+if err != nil {
+ log.Fatal(err)
+}
+```
+
+### Using an FST
+
+After closing the builder, the data can be used to instantiate an FST. If the data was written to disk, you can use the `Open()` method to mmap the file. If the data is already in memory, or you wish to load/mmap the data yourself, you can instantiate the FST with the `Load()` method.
+
+Load in memory:
+```go
+ fst, err := vellum.Load(buf.Bytes())
+ if err != nil {
+ log.Fatal(err)
+ }
+```
+
+Open from disk:
+```go
+ fst, err := vellum.Open("/tmp/vellum.fst")
+ if err != nil {
+ log.Fatal(err)
+ }
+```
+
+Get key/value:
+```go
+ val, exists, err = fst.Get([]byte("dog"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ if exists {
+ fmt.Printf("contains dog with val: %d\n", val)
+ } else {
+ fmt.Printf("does not contain dog")
+ }
+```
+
+Iterate key/values:
+```go
+ itr, err := fst.Iterator(startKeyInclusive, endKeyExclusive)
+ for err == nil {
+ key, val := itr.Current()
+ fmt.Printf("contains key: %s val: %d", key, val)
+ err = itr.Next()
+ }
+ if err != nil {
+ log.Fatal(err)
+ }
+```
+
+### How does the FST get built?
+
+A full example of the implementation is beyond the scope of this README, but let's consider a small example where we want to insert 3 key/value pairs.
+
+First we insert "are" with the value 4.
+
+![step1](docs/demo1.png)
+
+Next, we insert "ate" with the value 2.
+
+![step2](docs/demo2.png)
+
+Notice how the values associated with the transitions were adjusted so that by summing them while traversing we still get the expected value.
+
+At this point, we see that state 5 looks like state 3, and state 4 looks like state 2. But, we cannot yet combine them because future inserts could change this.
+
+Now, we insert "see" with value 3. Once it has been added, we now know that states 5 and 4 can longer change. Since they are identical to 3 and 2, we replace them.
+
+![step3](docs/demo3.png)
+
+Again, we see that states 7 and 8 appear to be identical to 2 and 3.
+
+Having inserted our last key, we call `Close()` on the builder.
+
+![step4](docs/demo4.png)
+
+Now, states 7 and 8 can safely be replaced with 2 and 3.
+
+For additional information, see the references at the bottom of this document.
+
+### What does the serialized format look like?
+
+We've broken out a separate document on the [vellum disk format v1](docs/format.md).
+
+### What if I want to use this on a system that doesn't have mmap?
+
+The mmap library itself is guarded with system/architecture build tags, but we've also added an additional build tag in vellum. If you'd like to Open() a file based representation of an FST, but not use mmap, you can build the library with the `nommap` build tag. NOTE: if you do this, the entire FST will be read into memory.
+
+### Can I use this with Unicode strings?
+
+Yes, however this implementation is only aware of the byte representation you choose. In order to find matches, you must work with some canonical byte representation of the string. In the future, some encoding-aware traversals may be possible on top of the lower-level byte transitions.
+
+### How did this library come to be?
+
+In my work on the [Bleve](https://github.com/blevesearch/bleve) project I became aware of the power of the FST for many search-related tasks. The obvious starting point for such a thing in Go was the [mafsa](https://github.com/smartystreets/mafsa) project. While working with mafsa I encountered some issues. First, it did not stream data to disk while building. Second, it chose to use a rune as the fundamental unit of transition in the FST, but I felt using a byte would be more powerful in the end. My hope is that higher-level encoding-aware traversals will be possible when necessary. Finally, as I reported bugs and submitted PRs I learned that the mafsa project was mainly a research project and no longer being maintained. I wanted to build something that could be used in production. As the project advanced more and more techniques from the [BurntSushi/fst](https://github.com/BurntSushi/fst) were adapted to our implementation.
+
+## Related Work
+
+Much credit goes to two existing projects:
+ - [mafsa](https://github.com/smartystreets/mafsa)
+ - [BurntSushi/fst](https://github.com/BurntSushi/fst)
+
+Most of the original implementation here started with my digging into the internals of mafsa. As the implementation progressed, I continued to borrow ideas/approaches from the BurntSushi/fst library as well.
+
+For a great introduction to this topic, please read the blog post [Index 1,600,000,000 Keys with Automata and Rust](http://blog.burntsushi.net/transducers/)
diff --git a/vendor/github.com/couchbase/vellum/automaton.go b/vendor/github.com/couchbase/vellum/automaton.go
new file mode 100644
index 0000000000..47526595bc
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/automaton.go
@@ -0,0 +1,85 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+// Automaton represents the general contract of a byte-based finite automaton
+type Automaton interface {
+
+ // Start returns the start state
+ Start() int
+
+ // IsMatch returns true if and only if the state is a match
+ IsMatch(int) bool
+
+ // CanMatch returns true if and only if it is possible to reach a match
+ // in zero or more steps
+ CanMatch(int) bool
+
+ // WillAlwaysMatch returns true if and only if the current state matches
+ // and will always match no matter what steps are taken
+ WillAlwaysMatch(int) bool
+
+ // Accept returns the next state given the input to the specified state
+ Accept(int, byte) int
+}
+
+// AutomatonContains implements an generic Contains() method which works
+// on any implementation of Automaton
+func AutomatonContains(a Automaton, k []byte) bool {
+ i := 0
+ curr := a.Start()
+ for a.CanMatch(curr) && i < len(k) {
+ curr = a.Accept(curr, k[i])
+ if curr == noneAddr {
+ break
+ }
+ i++
+ }
+ if i != len(k) {
+ return false
+ }
+ return a.IsMatch(curr)
+}
+
+// AlwaysMatch is an Automaton implementation which always matches
+type AlwaysMatch struct{}
+
+// Start returns the AlwaysMatch start state
+func (m *AlwaysMatch) Start() int {
+ return 0
+}
+
+// IsMatch always returns true
+func (m *AlwaysMatch) IsMatch(int) bool {
+ return true
+}
+
+// CanMatch always returns true
+func (m *AlwaysMatch) CanMatch(int) bool {
+ return true
+}
+
+// WillAlwaysMatch always returns true
+func (m *AlwaysMatch) WillAlwaysMatch(int) bool {
+ return true
+}
+
+// Accept returns the next AlwaysMatch state
+func (m *AlwaysMatch) Accept(int, byte) int {
+ return 0
+}
+
+// creating an alwaysMatchAutomaton to avoid unnecesary repeated allocations.
+var alwaysMatchAutomaton = &AlwaysMatch{}
diff --git a/vendor/github.com/couchbase/vellum/builder.go b/vendor/github.com/couchbase/vellum/builder.go
new file mode 100644
index 0000000000..b21db98072
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/builder.go
@@ -0,0 +1,453 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "bytes"
+ "io"
+)
+
+var defaultBuilderOpts = &BuilderOpts{
+ Encoder: 1,
+ RegistryTableSize: 10000,
+ RegistryMRUSize: 2,
+}
+
+// A Builder is used to build a new FST. When possible data is
+// streamed out to the underlying Writer as soon as possible.
+type Builder struct {
+ unfinished *unfinishedNodes
+ registry *registry
+ last []byte
+ len int
+
+ lastAddr int
+
+ encoder encoder
+ opts *BuilderOpts
+
+ builderNodePool builderNodePool
+ transitionPool transitionPool
+}
+
+const noneAddr = 1
+const emptyAddr = 0
+
+// NewBuilder returns a new Builder which will stream out the
+// underlying representation to the provided Writer as the set is built.
+func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
+ if opts == nil {
+ opts = defaultBuilderOpts
+ }
+ rv := &Builder{
+ registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize),
+ opts: opts,
+ lastAddr: noneAddr,
+ }
+ rv.unfinished = newUnfinishedNodes(&rv.builderNodePool)
+
+ var err error
+ rv.encoder, err = loadEncoder(opts.Encoder, w)
+ if err != nil {
+ return nil, err
+ }
+ err = rv.encoder.start()
+ if err != nil {
+ return nil, err
+ }
+ return rv, nil
+}
+
+func (b *Builder) Reset(w io.Writer) error {
+ b.transitionPool.reset()
+ b.builderNodePool.reset()
+ b.unfinished.Reset(&b.builderNodePool)
+ b.registry.Reset()
+ b.lastAddr = noneAddr
+ b.encoder.reset(w)
+ b.last = nil
+ b.len = 0
+
+ err := b.encoder.start()
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// Insert the provided value to the set being built.
+// NOTE: values must be inserted in lexicographical order.
+func (b *Builder) Insert(key []byte, val uint64) error {
+ // ensure items are added in lexicographic order
+ if bytes.Compare(key, b.last) < 0 {
+ return ErrOutOfOrder
+ }
+ if len(key) == 0 {
+ b.len = 1
+ b.unfinished.setRootOutput(val)
+ return nil
+ }
+
+ prefixLen, out := b.unfinished.findCommonPrefixAndSetOutput(key, val)
+ b.len++
+ err := b.compileFrom(prefixLen)
+ if err != nil {
+ return err
+ }
+ b.copyLastKey(key)
+ b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool)
+
+ return nil
+}
+
+func (b *Builder) copyLastKey(key []byte) {
+ if b.last == nil {
+ b.last = make([]byte, 0, 64)
+ } else {
+ b.last = b.last[:0]
+ }
+ b.last = append(b.last, key...)
+}
+
+// Close MUST be called after inserting all values.
+func (b *Builder) Close() error {
+ err := b.compileFrom(0)
+ if err != nil {
+ return err
+ }
+ root := b.unfinished.popRoot()
+ rootAddr, err := b.compile(root)
+ if err != nil {
+ return err
+ }
+ return b.encoder.finish(b.len, rootAddr)
+}
+
+func (b *Builder) compileFrom(iState int) error {
+ addr := noneAddr
+ for iState+1 < len(b.unfinished.stack) {
+ var node *builderNode
+ if addr == noneAddr {
+ node = b.unfinished.popEmpty()
+ } else {
+ node = b.unfinished.popFreeze(addr, &b.transitionPool)
+ }
+ var err error
+ addr, err = b.compile(node)
+ if err != nil {
+ return nil
+ }
+ }
+ b.unfinished.topLastFreeze(addr, &b.transitionPool)
+ return nil
+}
+
+func (b *Builder) compile(node *builderNode) (int, error) {
+ if node.final && len(node.trans) == 0 &&
+ node.finalOutput == 0 {
+ return 0, nil
+ }
+ found, addr, entry := b.registry.entry(node)
+ if found {
+ return addr, nil
+ }
+ addr, err := b.encoder.encodeState(node, b.lastAddr)
+ if err != nil {
+ return 0, err
+ }
+
+ b.lastAddr = addr
+ entry.addr = addr
+ return addr, nil
+}
+
+type unfinishedNodes struct {
+ stack []*builderNodeUnfinished
+
+ // cache allocates a reasonable number of builderNodeUnfinished
+ // objects up front and tries to keep reusing them
+ // because the main data structure is a stack, we assume the
+ // same access pattern, and don't track items separately
+ // this means calls get() and pushXYZ() must be paired,
+ // as well as calls put() and popXYZ()
+ cache []builderNodeUnfinished
+}
+
+func (u *unfinishedNodes) Reset(p *builderNodePool) {
+ u.stack = u.stack[:0]
+ for i := 0; i < len(u.cache); i++ {
+ u.cache[i] = builderNodeUnfinished{}
+ }
+ u.pushEmpty(false, p)
+}
+
+func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes {
+ rv := &unfinishedNodes{
+ stack: make([]*builderNodeUnfinished, 0, 64),
+ cache: make([]builderNodeUnfinished, 64),
+ }
+ rv.pushEmpty(false, p)
+ return rv
+}
+
+// get new builderNodeUnfinished, reusing cache if possible
+func (u *unfinishedNodes) get() *builderNodeUnfinished {
+ if len(u.stack) < len(u.cache) {
+ return &u.cache[len(u.stack)]
+ }
+ // full now allocate a new one
+ return &builderNodeUnfinished{}
+}
+
+// return builderNodeUnfinished, clearing it for reuse
+func (u *unfinishedNodes) put() {
+ if len(u.stack) >= len(u.cache) {
+ return
+ // do nothing, not part of cache
+ }
+ u.cache[len(u.stack)] = builderNodeUnfinished{}
+}
+
+func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte,
+ out uint64) (int, uint64) {
+ var i int
+ for i < len(key) {
+ if i >= len(u.stack) {
+ break
+ }
+ var addPrefix uint64
+ if !u.stack[i].hasLastT {
+ break
+ }
+ if u.stack[i].lastIn == key[i] {
+ commonPre := outputPrefix(u.stack[i].lastOut, out)
+ addPrefix = outputSub(u.stack[i].lastOut, commonPre)
+ out = outputSub(out, commonPre)
+ u.stack[i].lastOut = commonPre
+ i++
+ } else {
+ break
+ }
+
+ if addPrefix != 0 {
+ u.stack[i].addOutputPrefix(addPrefix)
+ }
+ }
+
+ return i, out
+}
+
+func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) {
+ next := u.get()
+ next.node = p.alloc()
+ next.node.final = final
+ u.stack = append(u.stack, next)
+}
+
+func (u *unfinishedNodes) popRoot() *builderNode {
+ l := len(u.stack)
+ var unfinished *builderNodeUnfinished
+ u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
+ rv := unfinished.node
+ u.put()
+ return rv
+}
+
+func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode {
+ l := len(u.stack)
+ var unfinished *builderNodeUnfinished
+ u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
+ unfinished.lastCompiled(addr, tp)
+ rv := unfinished.node
+ u.put()
+ return rv
+}
+
+func (u *unfinishedNodes) popEmpty() *builderNode {
+ l := len(u.stack)
+ var unfinished *builderNodeUnfinished
+ u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
+ rv := unfinished.node
+ u.put()
+ return rv
+}
+
+func (u *unfinishedNodes) setRootOutput(out uint64) {
+ u.stack[0].node.final = true
+ u.stack[0].node.finalOutput = out
+}
+
+func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) {
+ last := len(u.stack) - 1
+ u.stack[last].lastCompiled(addr, tp)
+}
+
+func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
+ if len(bs) == 0 {
+ return
+ }
+ last := len(u.stack) - 1
+ u.stack[last].hasLastT = true
+ u.stack[last].lastIn = bs[0]
+ u.stack[last].lastOut = out
+ for _, b := range bs[1:] {
+ next := u.get()
+ next.node = p.alloc()
+ next.hasLastT = true
+ next.lastIn = b
+ next.lastOut = 0
+ u.stack = append(u.stack, next)
+ }
+ u.pushEmpty(true, p)
+}
+
+type builderNodeUnfinished struct {
+ node *builderNode
+ lastOut uint64
+ lastIn byte
+ hasLastT bool
+}
+
+func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) {
+ if b.hasLastT {
+ transIn := b.lastIn
+ transOut := b.lastOut
+ b.hasLastT = false
+ b.lastOut = 0
+ trans := tp.alloc()
+ trans.in = transIn
+ trans.out = transOut
+ trans.addr = addr
+ b.node.trans = append(b.node.trans, trans)
+ }
+}
+
+func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
+ if b.node.final {
+ b.node.finalOutput = outputCat(prefix, b.node.finalOutput)
+ }
+ for _, t := range b.node.trans {
+ t.out = outputCat(prefix, t.out)
+ }
+ if b.hasLastT {
+ b.lastOut = outputCat(prefix, b.lastOut)
+ }
+}
+
+type builderNode struct {
+ finalOutput uint64
+ trans []*transition
+ final bool
+}
+
+func (n *builderNode) equiv(o *builderNode) bool {
+ if n.final != o.final {
+ return false
+ }
+ if n.finalOutput != o.finalOutput {
+ return false
+ }
+ if len(n.trans) != len(o.trans) {
+ return false
+ }
+ for i, ntrans := range n.trans {
+ otrans := o.trans[i]
+ if ntrans.in != otrans.in {
+ return false
+ }
+ if ntrans.addr != otrans.addr {
+ return false
+ }
+ if ntrans.out != otrans.out {
+ return false
+ }
+ }
+ return true
+}
+
+type transition struct {
+ out uint64
+ addr int
+ in byte
+}
+
+func outputPrefix(l, r uint64) uint64 {
+ if l < r {
+ return l
+ }
+ return r
+}
+
+func outputSub(l, r uint64) uint64 {
+ return l - r
+}
+
+func outputCat(l, r uint64) uint64 {
+ return l + r
+}
+
+// the next builderNode to alloc() will be all[nextOuter][nextInner]
+type builderNodePool struct {
+ all [][]builderNode
+ nextOuter int
+ nextInner int
+}
+
+func (p *builderNodePool) reset() {
+ p.nextOuter = 0
+ p.nextInner = 0
+}
+
+func (p *builderNodePool) alloc() *builderNode {
+ if p.nextOuter >= len(p.all) {
+ p.all = append(p.all, make([]builderNode, 256))
+ }
+ rv := &p.all[p.nextOuter][p.nextInner]
+ p.nextInner += 1
+ if p.nextInner >= len(p.all[p.nextOuter]) {
+ p.nextOuter += 1
+ p.nextInner = 0
+ }
+ rv.finalOutput = 0
+ rv.trans = rv.trans[:0]
+ rv.final = false
+ return rv
+}
+
+// the next transition to alloc() will be all[nextOuter][nextInner]
+type transitionPool struct {
+ all [][]transition
+ nextOuter int
+ nextInner int
+}
+
+func (p *transitionPool) reset() {
+ p.nextOuter = 0
+ p.nextInner = 0
+}
+
+func (p *transitionPool) alloc() *transition {
+ if p.nextOuter >= len(p.all) {
+ p.all = append(p.all, make([]transition, 256))
+ }
+ rv := &p.all[p.nextOuter][p.nextInner]
+ p.nextInner += 1
+ if p.nextInner >= len(p.all[p.nextOuter]) {
+ p.nextOuter += 1
+ p.nextInner = 0
+ }
+ *rv = transition{}
+ return rv
+}
diff --git a/vendor/github.com/couchbase/vellum/common.go b/vendor/github.com/couchbase/vellum/common.go
new file mode 100644
index 0000000000..cd3e6a0d0b
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/common.go
@@ -0,0 +1,547 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+const maxCommon = 1<<6 - 1
+
+func encodeCommon(in byte) byte {
+ val := byte((int(commonInputs[in]) + 1) % 256)
+ if val > maxCommon {
+ return 0
+ }
+ return val
+}
+
+func decodeCommon(in byte) byte {
+ return commonInputsInv[in-1]
+}
+
+var commonInputs = []byte{
+ 84, // '\x00'
+ 85, // '\x01'
+ 86, // '\x02'
+ 87, // '\x03'
+ 88, // '\x04'
+ 89, // '\x05'
+ 90, // '\x06'
+ 91, // '\x07'
+ 92, // '\x08'
+ 93, // '\t'
+ 94, // '\n'
+ 95, // '\x0b'
+ 96, // '\x0c'
+ 97, // '\r'
+ 98, // '\x0e'
+ 99, // '\x0f'
+ 100, // '\x10'
+ 101, // '\x11'
+ 102, // '\x12'
+ 103, // '\x13'
+ 104, // '\x14'
+ 105, // '\x15'
+ 106, // '\x16'
+ 107, // '\x17'
+ 108, // '\x18'
+ 109, // '\x19'
+ 110, // '\x1a'
+ 111, // '\x1b'
+ 112, // '\x1c'
+ 113, // '\x1d'
+ 114, // '\x1e'
+ 115, // '\x1f'
+ 116, // ' '
+ 80, // '!'
+ 117, // '"'
+ 118, // '#'
+ 79, // '$'
+ 39, // '%'
+ 30, // '&'
+ 81, // "'"
+ 75, // '('
+ 74, // ')'
+ 82, // '*'
+ 57, // '+'
+ 66, // ','
+ 16, // '-'
+ 12, // '.'
+ 2, // '/'
+ 19, // '0'
+ 20, // '1'
+ 21, // '2'
+ 27, // '3'
+ 32, // '4'
+ 29, // '5'
+ 35, // '6'
+ 36, // '7'
+ 37, // '8'
+ 34, // '9'
+ 24, // ':'
+ 73, // ';'
+ 119, // '<'
+ 23, // '='
+ 120, // '>'
+ 40, // '?'
+ 83, // '@'
+ 44, // 'A'
+ 48, // 'B'
+ 42, // 'C'
+ 43, // 'D'
+ 49, // 'E'
+ 46, // 'F'
+ 62, // 'G'
+ 61, // 'H'
+ 47, // 'I'
+ 69, // 'J'
+ 68, // 'K'
+ 58, // 'L'
+ 56, // 'M'
+ 55, // 'N'
+ 59, // 'O'
+ 51, // 'P'
+ 72, // 'Q'
+ 54, // 'R'
+ 45, // 'S'
+ 52, // 'T'
+ 64, // 'U'
+ 65, // 'V'
+ 63, // 'W'
+ 71, // 'X'
+ 67, // 'Y'
+ 70, // 'Z'
+ 77, // '['
+ 121, // '\\'
+ 78, // ']'
+ 122, // '^'
+ 31, // '_'
+ 123, // '`'
+ 4, // 'a'
+ 25, // 'b'
+ 9, // 'c'
+ 17, // 'd'
+ 1, // 'e'
+ 26, // 'f'
+ 22, // 'g'
+ 13, // 'h'
+ 7, // 'i'
+ 50, // 'j'
+ 38, // 'k'
+ 14, // 'l'
+ 15, // 'm'
+ 10, // 'n'
+ 3, // 'o'
+ 8, // 'p'
+ 60, // 'q'
+ 6, // 'r'
+ 5, // 's'
+ 0, // 't'
+ 18, // 'u'
+ 33, // 'v'
+ 11, // 'w'
+ 41, // 'x'
+ 28, // 'y'
+ 53, // 'z'
+ 124, // '{'
+ 125, // '|'
+ 126, // '}'
+ 76, // '~'
+ 127, // '\x7f'
+ 128, // '\x80'
+ 129, // '\x81'
+ 130, // '\x82'
+ 131, // '\x83'
+ 132, // '\x84'
+ 133, // '\x85'
+ 134, // '\x86'
+ 135, // '\x87'
+ 136, // '\x88'
+ 137, // '\x89'
+ 138, // '\x8a'
+ 139, // '\x8b'
+ 140, // '\x8c'
+ 141, // '\x8d'
+ 142, // '\x8e'
+ 143, // '\x8f'
+ 144, // '\x90'
+ 145, // '\x91'
+ 146, // '\x92'
+ 147, // '\x93'
+ 148, // '\x94'
+ 149, // '\x95'
+ 150, // '\x96'
+ 151, // '\x97'
+ 152, // '\x98'
+ 153, // '\x99'
+ 154, // '\x9a'
+ 155, // '\x9b'
+ 156, // '\x9c'
+ 157, // '\x9d'
+ 158, // '\x9e'
+ 159, // '\x9f'
+ 160, // '\xa0'
+ 161, // '¡'
+ 162, // '¢'
+ 163, // '£'
+ 164, // '¤'
+ 165, // '¥'
+ 166, // '¦'
+ 167, // '§'
+ 168, // '¨'
+ 169, // '©'
+ 170, // 'ª'
+ 171, // '«'
+ 172, // '¬'
+ 173, // '\xad'
+ 174, // '®'
+ 175, // '¯'
+ 176, // '°'
+ 177, // '±'
+ 178, // '²'
+ 179, // '³'
+ 180, // '´'
+ 181, // 'µ'
+ 182, // '¶'
+ 183, // '·'
+ 184, // '¸'
+ 185, // '¹'
+ 186, // 'º'
+ 187, // '»'
+ 188, // '¼'
+ 189, // '½'
+ 190, // '¾'
+ 191, // '¿'
+ 192, // 'À'
+ 193, // 'Á'
+ 194, // 'Â'
+ 195, // 'Ã'
+ 196, // 'Ä'
+ 197, // 'Å'
+ 198, // 'Æ'
+ 199, // 'Ç'
+ 200, // 'È'
+ 201, // 'É'
+ 202, // 'Ê'
+ 203, // 'Ë'
+ 204, // 'Ì'
+ 205, // 'Í'
+ 206, // 'Î'
+ 207, // 'Ï'
+ 208, // 'Ð'
+ 209, // 'Ñ'
+ 210, // 'Ò'
+ 211, // 'Ó'
+ 212, // 'Ô'
+ 213, // 'Õ'
+ 214, // 'Ö'
+ 215, // '×'
+ 216, // 'Ø'
+ 217, // 'Ù'
+ 218, // 'Ú'
+ 219, // 'Û'
+ 220, // 'Ü'
+ 221, // 'Ý'
+ 222, // 'Þ'
+ 223, // 'ß'
+ 224, // 'à'
+ 225, // 'á'
+ 226, // 'â'
+ 227, // 'ã'
+ 228, // 'ä'
+ 229, // 'å'
+ 230, // 'æ'
+ 231, // 'ç'
+ 232, // 'è'
+ 233, // 'é'
+ 234, // 'ê'
+ 235, // 'ë'
+ 236, // 'ì'
+ 237, // 'í'
+ 238, // 'î'
+ 239, // 'ï'
+ 240, // 'ð'
+ 241, // 'ñ'
+ 242, // 'ò'
+ 243, // 'ó'
+ 244, // 'ô'
+ 245, // 'õ'
+ 246, // 'ö'
+ 247, // '÷'
+ 248, // 'ø'
+ 249, // 'ù'
+ 250, // 'ú'
+ 251, // 'û'
+ 252, // 'ü'
+ 253, // 'ý'
+ 254, // 'þ'
+ 255, // 'ÿ'
+}
+
+var commonInputsInv = []byte{
+ 't',
+ 'e',
+ '/',
+ 'o',
+ 'a',
+ 's',
+ 'r',
+ 'i',
+ 'p',
+ 'c',
+ 'n',
+ 'w',
+ '.',
+ 'h',
+ 'l',
+ 'm',
+ '-',
+ 'd',
+ 'u',
+ '0',
+ '1',
+ '2',
+ 'g',
+ '=',
+ ':',
+ 'b',
+ 'f',
+ '3',
+ 'y',
+ '5',
+ '&',
+ '_',
+ '4',
+ 'v',
+ '9',
+ '6',
+ '7',
+ '8',
+ 'k',
+ '%',
+ '?',
+ 'x',
+ 'C',
+ 'D',
+ 'A',
+ 'S',
+ 'F',
+ 'I',
+ 'B',
+ 'E',
+ 'j',
+ 'P',
+ 'T',
+ 'z',
+ 'R',
+ 'N',
+ 'M',
+ '+',
+ 'L',
+ 'O',
+ 'q',
+ 'H',
+ 'G',
+ 'W',
+ 'U',
+ 'V',
+ ',',
+ 'Y',
+ 'K',
+ 'J',
+ 'Z',
+ 'X',
+ 'Q',
+ ';',
+ ')',
+ '(',
+ '~',
+ '[',
+ ']',
+ '$',
+ '!',
+ '\'',
+ '*',
+ '@',
+ '\x00',
+ '\x01',
+ '\x02',
+ '\x03',
+ '\x04',
+ '\x05',
+ '\x06',
+ '\x07',
+ '\x08',
+ '\t',
+ '\n',
+ '\x0b',
+ '\x0c',
+ '\r',
+ '\x0e',
+ '\x0f',
+ '\x10',
+ '\x11',
+ '\x12',
+ '\x13',
+ '\x14',
+ '\x15',
+ '\x16',
+ '\x17',
+ '\x18',
+ '\x19',
+ '\x1a',
+ '\x1b',
+ '\x1c',
+ '\x1d',
+ '\x1e',
+ '\x1f',
+ ' ',
+ '"',
+ '#',
+ '<',
+ '>',
+ '\\',
+ '^',
+ '`',
+ '{',
+ '|',
+ '}',
+ '\x7f',
+ '\x80',
+ '\x81',
+ '\x82',
+ '\x83',
+ '\x84',
+ '\x85',
+ '\x86',
+ '\x87',
+ '\x88',
+ '\x89',
+ '\x8a',
+ '\x8b',
+ '\x8c',
+ '\x8d',
+ '\x8e',
+ '\x8f',
+ '\x90',
+ '\x91',
+ '\x92',
+ '\x93',
+ '\x94',
+ '\x95',
+ '\x96',
+ '\x97',
+ '\x98',
+ '\x99',
+ '\x9a',
+ '\x9b',
+ '\x9c',
+ '\x9d',
+ '\x9e',
+ '\x9f',
+ '\xa0',
+ '\xa1',
+ '\xa2',
+ '\xa3',
+ '\xa4',
+ '\xa5',
+ '\xa6',
+ '\xa7',
+ '\xa8',
+ '\xa9',
+ '\xaa',
+ '\xab',
+ '\xac',
+ '\xad',
+ '\xae',
+ '\xaf',
+ '\xb0',
+ '\xb1',
+ '\xb2',
+ '\xb3',
+ '\xb4',
+ '\xb5',
+ '\xb6',
+ '\xb7',
+ '\xb8',
+ '\xb9',
+ '\xba',
+ '\xbb',
+ '\xbc',
+ '\xbd',
+ '\xbe',
+ '\xbf',
+ '\xc0',
+ '\xc1',
+ '\xc2',
+ '\xc3',
+ '\xc4',
+ '\xc5',
+ '\xc6',
+ '\xc7',
+ '\xc8',
+ '\xc9',
+ '\xca',
+ '\xcb',
+ '\xcc',
+ '\xcd',
+ '\xce',
+ '\xcf',
+ '\xd0',
+ '\xd1',
+ '\xd2',
+ '\xd3',
+ '\xd4',
+ '\xd5',
+ '\xd6',
+ '\xd7',
+ '\xd8',
+ '\xd9',
+ '\xda',
+ '\xdb',
+ '\xdc',
+ '\xdd',
+ '\xde',
+ '\xdf',
+ '\xe0',
+ '\xe1',
+ '\xe2',
+ '\xe3',
+ '\xe4',
+ '\xe5',
+ '\xe6',
+ '\xe7',
+ '\xe8',
+ '\xe9',
+ '\xea',
+ '\xeb',
+ '\xec',
+ '\xed',
+ '\xee',
+ '\xef',
+ '\xf0',
+ '\xf1',
+ '\xf2',
+ '\xf3',
+ '\xf4',
+ '\xf5',
+ '\xf6',
+ '\xf7',
+ '\xf8',
+ '\xf9',
+ '\xfa',
+ '\xfb',
+ '\xfc',
+ '\xfd',
+ '\xfe',
+ '\xff',
+}
diff --git a/vendor/github.com/couchbase/vellum/decoder_v1.go b/vendor/github.com/couchbase/vellum/decoder_v1.go
new file mode 100644
index 0000000000..5a0ea68871
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/decoder_v1.go
@@ -0,0 +1,316 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "strconv"
+)
+
+func init() {
+ registerDecoder(versionV1, func(data []byte) decoder {
+ return newDecoderV1(data)
+ })
+}
+
+type decoderV1 struct {
+ data []byte
+ root uint64
+ len uint64
+}
+
+func newDecoderV1(data []byte) *decoderV1 {
+ return &decoderV1{
+ data: data,
+ }
+}
+
+func (d *decoderV1) getRoot() int {
+ if len(d.data) < footerSizeV1 {
+ return noneAddr
+ }
+ footer := d.data[len(d.data)-footerSizeV1:]
+ root := binary.LittleEndian.Uint64(footer[8:])
+ return int(root)
+}
+
+func (d *decoderV1) getLen() int {
+ if len(d.data) < footerSizeV1 {
+ return 0
+ }
+ footer := d.data[len(d.data)-footerSizeV1:]
+ dlen := binary.LittleEndian.Uint64(footer)
+ return int(dlen)
+}
+
+func (d *decoderV1) stateAt(addr int, prealloc fstState) (fstState, error) {
+ state, ok := prealloc.(*fstStateV1)
+ if ok && state != nil {
+ *state = fstStateV1{} // clear the struct
+ } else {
+ state = &fstStateV1{}
+ }
+ err := state.at(d.data, addr)
+ if err != nil {
+ return nil, err
+ }
+ return state, nil
+}
+
+type fstStateV1 struct {
+ data []byte
+ top int
+ bottom int
+ numTrans int
+
+ // single trans only
+ singleTransChar byte
+ singleTransNext bool
+ singleTransAddr uint64
+ singleTransOut uint64
+
+ // shared
+ transSize int
+ outSize int
+
+ // multiple trans only
+ final bool
+ transTop int
+ transBottom int
+ destTop int
+ destBottom int
+ outTop int
+ outBottom int
+ outFinal int
+}
+
+func (f *fstStateV1) isEncodedSingle() bool {
+ if f.data[f.top]>>7 > 0 {
+ return true
+ }
+ return false
+}
+
+func (f *fstStateV1) at(data []byte, addr int) error {
+ f.data = data
+ if addr == emptyAddr {
+ return f.atZero()
+ } else if addr == noneAddr {
+ return f.atNone()
+ }
+ if addr > len(data) || addr < 16 {
+ return fmt.Errorf("invalid address %d/%d", addr, len(data))
+ }
+ f.top = addr
+ f.bottom = addr
+ if f.isEncodedSingle() {
+ return f.atSingle(data, addr)
+ }
+ return f.atMulti(data, addr)
+}
+
+func (f *fstStateV1) atZero() error {
+ f.top = 0
+ f.bottom = 1
+ f.numTrans = 0
+ f.final = true
+ f.outFinal = 0
+ return nil
+}
+
+func (f *fstStateV1) atNone() error {
+ f.top = 0
+ f.bottom = 1
+ f.numTrans = 0
+ f.final = false
+ f.outFinal = 0
+ return nil
+}
+
+func (f *fstStateV1) atSingle(data []byte, addr int) error {
+ // handle single transition case
+ f.numTrans = 1
+ f.singleTransNext = data[f.top]&transitionNext > 0
+ f.singleTransChar = data[f.top] & maxCommon
+ if f.singleTransChar == 0 {
+ f.bottom-- // extra byte for uncommon
+ f.singleTransChar = data[f.bottom]
+ } else {
+ f.singleTransChar = decodeCommon(f.singleTransChar)
+ }
+ if f.singleTransNext {
+ // now we know the bottom, can compute next addr
+ f.singleTransAddr = uint64(f.bottom - 1)
+ f.singleTransOut = 0
+ } else {
+ f.bottom-- // extra byte with pack sizes
+ f.transSize, f.outSize = decodePackSize(data[f.bottom])
+ f.bottom -= f.transSize // exactly one trans
+ f.singleTransAddr = readPackedUint(data[f.bottom : f.bottom+f.transSize])
+ if f.outSize > 0 {
+ f.bottom -= f.outSize // exactly one out (could be length 0 though)
+ f.singleTransOut = readPackedUint(data[f.bottom : f.bottom+f.outSize])
+ } else {
+ f.singleTransOut = 0
+ }
+ // need to wait till we know bottom
+ if f.singleTransAddr != 0 {
+ f.singleTransAddr = uint64(f.bottom) - f.singleTransAddr
+ }
+ }
+ return nil
+}
+
+func (f *fstStateV1) atMulti(data []byte, addr int) error {
+ // handle multiple transitions case
+ f.final = data[f.top]&stateFinal > 0
+ f.numTrans = int(data[f.top] & maxNumTrans)
+ if f.numTrans == 0 {
+ f.bottom-- // extra byte for number of trans
+ f.numTrans = int(data[f.bottom])
+ if f.numTrans == 1 {
+ // can't really be 1 here, this is special case that means 256
+ f.numTrans = 256
+ }
+ }
+ f.bottom-- // extra byte with pack sizes
+ f.transSize, f.outSize = decodePackSize(data[f.bottom])
+
+ f.transTop = f.bottom
+ f.bottom -= f.numTrans // one byte for each transition
+ f.transBottom = f.bottom
+
+ f.destTop = f.bottom
+ f.bottom -= f.numTrans * f.transSize
+ f.destBottom = f.bottom
+
+ if f.outSize > 0 {
+ f.outTop = f.bottom
+ f.bottom -= f.numTrans * f.outSize
+ f.outBottom = f.bottom
+ if f.final {
+ f.bottom -= f.outSize
+ f.outFinal = f.bottom
+ }
+ }
+ return nil
+}
+
+func (f *fstStateV1) Address() int {
+ return f.top
+}
+
+func (f *fstStateV1) Final() bool {
+ return f.final
+}
+
+func (f *fstStateV1) FinalOutput() uint64 {
+ if f.numTrans > 0 && f.final && f.outSize > 0 {
+ return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
+ }
+ return 0
+}
+
+func (f *fstStateV1) NumTransitions() int {
+ return f.numTrans
+}
+
+func (f *fstStateV1) TransitionAt(i int) byte {
+ if f.isEncodedSingle() {
+ return f.singleTransChar
+ }
+ transitionKeys := f.data[f.transBottom:f.transTop]
+ return transitionKeys[f.numTrans-i-1]
+}
+
+func (f *fstStateV1) TransitionFor(b byte) (int, int, uint64) {
+ if f.isEncodedSingle() {
+ if f.singleTransChar == b {
+ return 0, int(f.singleTransAddr), f.singleTransOut
+ }
+ return -1, noneAddr, 0
+ }
+ transitionKeys := f.data[f.transBottom:f.transTop]
+ pos := bytes.IndexByte(transitionKeys, b)
+ if pos < 0 {
+ return -1, noneAddr, 0
+ }
+ transDests := f.data[f.destBottom:f.destTop]
+ dest := int(readPackedUint(transDests[pos*f.transSize : pos*f.transSize+f.transSize]))
+ if dest > 0 {
+ // convert delta
+ dest = f.bottom - dest
+ }
+ transVals := f.data[f.outBottom:f.outTop]
+ var out uint64
+ if f.outSize > 0 {
+ out = readPackedUint(transVals[pos*f.outSize : pos*f.outSize+f.outSize])
+ }
+ return f.numTrans - pos - 1, dest, out
+}
+
+func (f *fstStateV1) String() string {
+ rv := ""
+ rv += fmt.Sprintf("State: %d (%#x)", f.top, f.top)
+ if f.final {
+ rv += " final"
+ fout := f.FinalOutput()
+ if fout != 0 {
+ rv += fmt.Sprintf(" (%d)", fout)
+ }
+ }
+ rv += "\n"
+ rv += fmt.Sprintf("Data: % x\n", f.data[f.bottom:f.top+1])
+
+ for i := 0; i < f.numTrans; i++ {
+ transChar := f.TransitionAt(i)
+ _, transDest, transOut := f.TransitionFor(transChar)
+ rv += fmt.Sprintf(" - %d (%#x) '%s' ---> %d (%#x) with output: %d", transChar, transChar, string(transChar), transDest, transDest, transOut)
+ rv += "\n"
+ }
+ if f.numTrans == 0 {
+ rv += "\n"
+ }
+ return rv
+}
+
+func (f *fstStateV1) DotString(num int) string {
+ rv := ""
+ label := fmt.Sprintf("%d", num)
+ final := ""
+ if f.final {
+ final = ",peripheries=2"
+ }
+ rv += fmt.Sprintf(" %d [label=\"%s\"%s];\n", f.top, label, final)
+
+ for i := 0; i < f.numTrans; i++ {
+ transChar := f.TransitionAt(i)
+ _, transDest, transOut := f.TransitionFor(transChar)
+ out := ""
+ if transOut != 0 {
+ out = fmt.Sprintf("/%d", transOut)
+ }
+ rv += fmt.Sprintf(" %d -> %d [label=\"%s%s\"];\n", f.top, transDest, escapeInput(transChar), out)
+ }
+
+ return rv
+}
+
+func escapeInput(b byte) string {
+ x := strconv.AppendQuoteRune(nil, rune(b))
+ return string(x[1:(len(x) - 1)])
+}
diff --git a/vendor/github.com/couchbase/vellum/encoder_v1.go b/vendor/github.com/couchbase/vellum/encoder_v1.go
new file mode 100644
index 0000000000..0651fc8614
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/encoder_v1.go
@@ -0,0 +1,227 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+)
+
+const versionV1 = 1
+const oneTransition = 1 << 7
+const transitionNext = 1 << 6
+const stateFinal = 1 << 6
+const footerSizeV1 = 16
+
+func init() {
+ registerEncoder(versionV1, func(w io.Writer) encoder {
+ return newEncoderV1(w)
+ })
+}
+
+type encoderV1 struct {
+ bw *writer
+}
+
+func newEncoderV1(w io.Writer) *encoderV1 {
+ return &encoderV1{
+ bw: newWriter(w),
+ }
+}
+
+func (e *encoderV1) reset(w io.Writer) {
+ e.bw.Reset(w)
+}
+
+func (e *encoderV1) start() error {
+ header := make([]byte, headerSize)
+ binary.LittleEndian.PutUint64(header, versionV1)
+ binary.LittleEndian.PutUint64(header[8:], uint64(0)) // type
+ n, err := e.bw.Write(header)
+ if err != nil {
+ return err
+ }
+ if n != headerSize {
+ return fmt.Errorf("short write of header %d/%d", n, headerSize)
+ }
+ return nil
+}
+
+func (e *encoderV1) encodeState(s *builderNode, lastAddr int) (int, error) {
+ if len(s.trans) == 0 && s.final && s.finalOutput == 0 {
+ return 0, nil
+ } else if len(s.trans) != 1 || s.final {
+ return e.encodeStateMany(s)
+ } else if !s.final && s.trans[0].out == 0 && s.trans[0].addr == lastAddr {
+ return e.encodeStateOneFinish(s, transitionNext)
+ }
+ return e.encodeStateOne(s)
+}
+
+func (e *encoderV1) encodeStateOne(s *builderNode) (int, error) {
+ start := uint64(e.bw.counter)
+ outPackSize := 0
+ if s.trans[0].out != 0 {
+ outPackSize = packedSize(s.trans[0].out)
+ err := e.bw.WritePackedUintIn(s.trans[0].out, outPackSize)
+ if err != nil {
+ return 0, err
+ }
+ }
+ delta := deltaAddr(start, uint64(s.trans[0].addr))
+ transPackSize := packedSize(delta)
+ err := e.bw.WritePackedUintIn(delta, transPackSize)
+ if err != nil {
+ return 0, err
+ }
+
+ packSize := encodePackSize(transPackSize, outPackSize)
+ err = e.bw.WriteByte(packSize)
+ if err != nil {
+ return 0, err
+ }
+
+ return e.encodeStateOneFinish(s, 0)
+}
+
+func (e *encoderV1) encodeStateOneFinish(s *builderNode, next byte) (int, error) {
+ enc := encodeCommon(s.trans[0].in)
+
+ // not a common input
+ if enc == 0 {
+ err := e.bw.WriteByte(s.trans[0].in)
+ if err != nil {
+ return 0, err
+ }
+ }
+ err := e.bw.WriteByte(oneTransition | next | enc)
+ if err != nil {
+ return 0, err
+ }
+
+ return e.bw.counter - 1, nil
+}
+
+func (e *encoderV1) encodeStateMany(s *builderNode) (int, error) {
+ start := uint64(e.bw.counter)
+ transPackSize := 0
+ outPackSize := packedSize(s.finalOutput)
+ anyOutputs := s.finalOutput != 0
+ for i := range s.trans {
+ delta := deltaAddr(start, uint64(s.trans[i].addr))
+ tsize := packedSize(delta)
+ if tsize > transPackSize {
+ transPackSize = tsize
+ }
+ osize := packedSize(s.trans[i].out)
+ if osize > outPackSize {
+ outPackSize = osize
+ }
+ anyOutputs = anyOutputs || s.trans[i].out != 0
+ }
+ if !anyOutputs {
+ outPackSize = 0
+ }
+
+ if anyOutputs {
+ // output final value
+ if s.final {
+ err := e.bw.WritePackedUintIn(s.finalOutput, outPackSize)
+ if err != nil {
+ return 0, err
+ }
+ }
+ // output transition values (in reverse)
+ for j := len(s.trans) - 1; j >= 0; j-- {
+ err := e.bw.WritePackedUintIn(s.trans[j].out, outPackSize)
+ if err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ // output transition dests (in reverse)
+ for j := len(s.trans) - 1; j >= 0; j-- {
+ delta := deltaAddr(start, uint64(s.trans[j].addr))
+ err := e.bw.WritePackedUintIn(delta, transPackSize)
+ if err != nil {
+ return 0, err
+ }
+ }
+
+ // output transition keys (in reverse)
+ for j := len(s.trans) - 1; j >= 0; j-- {
+ err := e.bw.WriteByte(s.trans[j].in)
+ if err != nil {
+ return 0, err
+ }
+ }
+
+ packSize := encodePackSize(transPackSize, outPackSize)
+ err := e.bw.WriteByte(packSize)
+ if err != nil {
+ return 0, err
+ }
+
+ numTrans := encodeNumTrans(len(s.trans))
+
+ // if number of transitions wont fit in edge header byte
+ // write out separately
+ if numTrans == 0 {
+ if len(s.trans) == 256 {
+ // this wouldn't fit in single byte, but reuse value 1
+ // which would have always fit in the edge header instead
+ err = e.bw.WriteByte(1)
+ if err != nil {
+ return 0, err
+ }
+ } else {
+ err = e.bw.WriteByte(byte(len(s.trans)))
+ if err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ // finally write edge header
+ if s.final {
+ numTrans |= stateFinal
+ }
+ err = e.bw.WriteByte(numTrans)
+ if err != nil {
+ return 0, err
+ }
+
+ return e.bw.counter - 1, nil
+}
+
+func (e *encoderV1) finish(count, rootAddr int) error {
+ footer := make([]byte, footerSizeV1)
+ binary.LittleEndian.PutUint64(footer, uint64(count)) // root addr
+ binary.LittleEndian.PutUint64(footer[8:], uint64(rootAddr)) // root addr
+ n, err := e.bw.Write(footer)
+ if err != nil {
+ return err
+ }
+ if n != footerSizeV1 {
+ return fmt.Errorf("short write of footer %d/%d", n, footerSizeV1)
+ }
+ err = e.bw.Flush()
+ if err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/vendor/github.com/couchbase/vellum/encoding.go b/vendor/github.com/couchbase/vellum/encoding.go
new file mode 100644
index 0000000000..988d486499
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/encoding.go
@@ -0,0 +1,87 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+)
+
+const headerSize = 16
+
+type encoderConstructor func(w io.Writer) encoder
+type decoderConstructor func([]byte) decoder
+
+var encoders = map[int]encoderConstructor{}
+var decoders = map[int]decoderConstructor{}
+
+type encoder interface {
+ start() error
+ encodeState(s *builderNode, addr int) (int, error)
+ finish(count, rootAddr int) error
+ reset(w io.Writer)
+}
+
+func loadEncoder(ver int, w io.Writer) (encoder, error) {
+ if cons, ok := encoders[ver]; ok {
+ return cons(w), nil
+ }
+ return nil, fmt.Errorf("no encoder for version %d registered", ver)
+}
+
+func registerEncoder(ver int, cons encoderConstructor) {
+ encoders[ver] = cons
+}
+
+type decoder interface {
+ getRoot() int
+ getLen() int
+ stateAt(addr int, prealloc fstState) (fstState, error)
+}
+
+func loadDecoder(ver int, data []byte) (decoder, error) {
+ if cons, ok := decoders[ver]; ok {
+ return cons(data), nil
+ }
+ return nil, fmt.Errorf("no decoder for version %d registered", ver)
+}
+
+func registerDecoder(ver int, cons decoderConstructor) {
+ decoders[ver] = cons
+}
+
+func decodeHeader(header []byte) (ver int, typ int, err error) {
+ if len(header) < headerSize {
+ err = fmt.Errorf("invalid header < 16 bytes")
+ return
+ }
+ ver = int(binary.LittleEndian.Uint64(header[0:8]))
+ typ = int(binary.LittleEndian.Uint64(header[8:16]))
+ return
+}
+
+// fstState represents a state inside the FTS runtime
+// It is the main contract between the FST impl and the decoder
+// The FST impl should work only with this interface, while only the decoder
+// impl knows the physical representation.
+type fstState interface {
+ Address() int
+ Final() bool
+ FinalOutput() uint64
+ NumTransitions() int
+ TransitionFor(b byte) (int, int, uint64)
+ TransitionAt(i int) byte
+}
diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go
new file mode 100644
index 0000000000..ecc528395c
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/fst.go
@@ -0,0 +1,254 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "io"
+
+ "github.com/willf/bitset"
+)
+
+// FST is an in-memory representation of a finite state transducer,
+// capable of returning the uint64 value associated with
+// each []byte key stored, as well as enumerating all of the keys
+// in order.
+type FST struct {
+ f io.Closer
+ ver int
+ len int
+ typ int
+ data []byte
+ decoder decoder
+}
+
+func new(data []byte, f io.Closer) (rv *FST, err error) {
+ rv = &FST{
+ data: data,
+ f: f,
+ }
+
+ rv.ver, rv.typ, err = decodeHeader(data)
+ if err != nil {
+ return nil, err
+ }
+
+ rv.decoder, err = loadDecoder(rv.ver, rv.data)
+ if err != nil {
+ return nil, err
+ }
+
+ rv.len = rv.decoder.getLen()
+
+ return rv, nil
+}
+
+// Contains returns true if this FST contains the specified key.
+func (f *FST) Contains(val []byte) (bool, error) {
+ _, exists, err := f.Get(val)
+ return exists, err
+}
+
+// Get returns the value associated with the key. NOTE: a value of zero
+// does not imply the key does not exist, you must consult the second
+// return value as well.
+func (f *FST) Get(input []byte) (uint64, bool, error) {
+ return f.get(input, nil)
+}
+
+func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
+ var total uint64
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, prealloc)
+ if err != nil {
+ return 0, false, err
+ }
+ for i := range input {
+ _, curr, output := state.TransitionFor(input[i])
+ if curr == noneAddr {
+ return 0, false, nil
+ }
+
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return 0, false, err
+ }
+
+ total += output
+ }
+
+ if state.Final() {
+ total += state.FinalOutput()
+ return total, true, nil
+ }
+ return 0, false, nil
+}
+
+// Version returns the encoding version used by this FST instance.
+func (f *FST) Version() int {
+ return f.ver
+}
+
+// Len returns the number of entries in this FST instance.
+func (f *FST) Len() int {
+ return f.len
+}
+
+// Type returns the type of this FST instance.
+func (f *FST) Type() int {
+ return f.typ
+}
+
+// Close will unmap any mmap'd data (if managed by vellum) and it will close
+// the backing file (if managed by vellum). You MUST call Close() for any
+// FST instance that is created.
+func (f *FST) Close() error {
+ if f.f != nil {
+ err := f.f.Close()
+ if err != nil {
+ return err
+ }
+ }
+ f.data = nil
+ f.decoder = nil
+ return nil
+}
+
+// Start returns the start state of this Automaton
+func (f *FST) Start() int {
+ return f.decoder.getRoot()
+}
+
+// IsMatch returns if this state is a matching state in this Automaton
+func (f *FST) IsMatch(addr int) bool {
+ match, _ := f.IsMatchWithVal(addr)
+ return match
+}
+
+// CanMatch returns if this state can ever transition to a matching state
+// in this Automaton
+func (f *FST) CanMatch(addr int) bool {
+ if addr == noneAddr {
+ return false
+ }
+ return true
+}
+
+// WillAlwaysMatch returns if from this state the Automaton will always
+// be in a matching state
+func (f *FST) WillAlwaysMatch(int) bool {
+ return false
+}
+
+// Accept returns the next state for this Automaton on input of byte b
+func (f *FST) Accept(addr int, b byte) int {
+ next, _ := f.AcceptWithVal(addr, b)
+ return next
+}
+
+// IsMatchWithVal returns if this state is a matching state in this Automaton
+// and also returns the final output value for this state
+func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
+ s, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return false, 0
+ }
+ return s.Final(), s.FinalOutput()
+}
+
+// AcceptWithVal returns the next state for this Automaton on input of byte b
+// and also returns the output value for the transition
+func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
+ s, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return noneAddr, 0
+ }
+ _, next, output := s.TransitionFor(b)
+ return next, output
+}
+
+// Iterator returns a new Iterator capable of enumerating the key/value pairs
+// between the provided startKeyInclusive and endKeyExclusive.
+func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
+ return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
+}
+
+// Search returns a new Iterator capable of enumerating the key/value pairs
+// between the provided startKeyInclusive and endKeyExclusive that also
+// satisfy the provided automaton.
+func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
+ return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
+}
+
+// Debug is only intended for debug purposes, it simply asks the underlying
+// decoder visit each state, and pass it to the provided callback.
+func (f *FST) Debug(callback func(int, interface{}) error) error {
+
+ addr := f.decoder.getRoot()
+ set := bitset.New(uint(addr))
+ stack := addrStack{addr}
+
+ stateNumber := 0
+ stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
+ for addr != noneAddr {
+ if set.Test(uint(addr)) {
+ stack, addr = stack.Pop()
+ continue
+ }
+ set.Set(uint(addr))
+ state, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return err
+ }
+ err = callback(stateNumber, state)
+ if err != nil {
+ return err
+ }
+ for i := 0; i < state.NumTransitions(); i++ {
+ tchar := state.TransitionAt(i)
+ _, dest, _ := state.TransitionFor(tchar)
+ stack = append(stack, dest)
+ }
+ stateNumber++
+ stack, addr = stack.Pop()
+ }
+
+ return nil
+}
+
+type addrStack []int
+
+func (a addrStack) Pop() (addrStack, int) {
+ l := len(a)
+ if l < 1 {
+ return a, noneAddr
+ }
+ return a[:l-1], a[l-1]
+}
+
+// Reader() returns a Reader instance that a single thread may use to
+// retrieve data from the FST
+func (f *FST) Reader() (*Reader, error) {
+ return &Reader{f: f}, nil
+}
+
+// A Reader is meant for a single threaded use
+type Reader struct {
+ f *FST
+ prealloc fstStateV1
+}
+
+func (r *Reader) Get(input []byte) (uint64, bool, error) {
+ return r.f.get(input, &r.prealloc)
+}
diff --git a/vendor/github.com/couchbase/vellum/fst_iterator.go b/vendor/github.com/couchbase/vellum/fst_iterator.go
new file mode 100644
index 0000000000..389ac64aab
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/fst_iterator.go
@@ -0,0 +1,276 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "bytes"
+)
+
+// Iterator represents a means of visity key/value pairs in order.
+type Iterator interface {
+
+ // Current() returns the key/value pair currently pointed to.
+ // The []byte of the key is ONLY guaranteed to be valid until
+ // another call to Next/Seek/Close. If you need it beyond that
+ // point you MUST make a copy.
+ Current() ([]byte, uint64)
+
+ // Next() advances the iterator to the next key/value pair.
+ // If no more key/value pairs exist, ErrIteratorDone is returned.
+ Next() error
+
+ // Seek() advances the iterator the specified key, or the next key
+ // if it does not exist.
+ // If no keys exist after that point, ErrIteratorDone is returned.
+ Seek(key []byte) error
+
+ // Reset resets the Iterator' internal state to allow for iterator
+ // reuse (e.g. pooling).
+ Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error
+
+ // Close() frees any resources held by this iterator.
+ Close() error
+}
+
+// FSTIterator is a structure for iterating key/value pairs in this FST in
+// lexicographic order. Iterators should be constructed with the FSTIterator
+// method on the parent FST structure.
+type FSTIterator struct {
+ f *FST
+ aut Automaton
+
+ startKeyInclusive []byte
+ endKeyExclusive []byte
+
+ statesStack []fstState
+ keysStack []byte
+ keysPosStack []int
+ valsStack []uint64
+ autStatesStack []int
+
+ nextStart []byte
+}
+
+func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
+ aut Automaton) (*FSTIterator, error) {
+
+ rv := &FSTIterator{}
+ err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut)
+ if err != nil {
+ return nil, err
+ }
+ return rv, nil
+}
+
+// Reset resets the Iterator' internal state to allow for iterator
+// reuse (e.g. pooling).
+func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
+ if aut == nil {
+ aut = alwaysMatchAutomaton
+ }
+
+ i.f = f
+ i.startKeyInclusive = startKeyInclusive
+ i.endKeyExclusive = endKeyExclusive
+ i.aut = aut
+
+ return i.pointTo(startKeyInclusive)
+}
+
+// pointTo attempts to point us to the specified location
+func (i *FSTIterator) pointTo(key []byte) error {
+
+ // tried to seek before start
+ if bytes.Compare(key, i.startKeyInclusive) < 0 {
+ key = i.startKeyInclusive
+ }
+
+ // trid to see past end
+ if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 {
+ key = i.endKeyExclusive
+ }
+
+ // reset any state, pointTo always starts over
+ i.statesStack = i.statesStack[:0]
+ i.keysStack = i.keysStack[:0]
+ i.keysPosStack = i.keysPosStack[:0]
+ i.valsStack = i.valsStack[:0]
+ i.autStatesStack = i.autStatesStack[:0]
+
+ root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil)
+ if err != nil {
+ return err
+ }
+
+ autStart := i.aut.Start()
+
+ maxQ := -1
+ // root is always part of the path
+ i.statesStack = append(i.statesStack, root)
+ i.autStatesStack = append(i.autStatesStack, autStart)
+ for j := 0; j < len(key); j++ {
+ curr := i.statesStack[len(i.statesStack)-1]
+ autCurr := i.autStatesStack[len(i.autStatesStack)-1]
+
+ pos, nextAddr, nextVal := curr.TransitionFor(key[j])
+ if nextAddr == noneAddr {
+ // needed transition doesn't exist
+ // find last trans before the one we needed
+ for q := 0; q < curr.NumTransitions(); q++ {
+ if curr.TransitionAt(q) < key[j] {
+ maxQ = q
+ }
+ }
+ break
+ }
+ autNext := i.aut.Accept(autCurr, key[j])
+
+ next, err := i.f.decoder.stateAt(nextAddr, nil)
+ if err != nil {
+ return err
+ }
+
+ i.statesStack = append(i.statesStack, next)
+ i.keysStack = append(i.keysStack, key[j])
+ i.keysPosStack = append(i.keysPosStack, pos)
+ i.valsStack = append(i.valsStack, nextVal)
+ i.autStatesStack = append(i.autStatesStack, autNext)
+ continue
+ }
+
+ if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 {
+ return i.next(maxQ)
+ }
+
+ return nil
+}
+
+// Current returns the key and value currently pointed to by the iterator.
+// If the iterator is not pointing at a valid value (because Iterator/Next/Seek)
+// returned an error previously, it may return nil,0.
+func (i *FSTIterator) Current() ([]byte, uint64) {
+ curr := i.statesStack[len(i.statesStack)-1]
+ if curr.Final() {
+ var total uint64
+ for _, v := range i.valsStack {
+ total += v
+ }
+ total += curr.FinalOutput()
+ return i.keysStack, total
+ }
+ return nil, 0
+}
+
+// Next advances this iterator to the next key/value pair. If there is none
+// or the advancement goes beyond the configured endKeyExclusive, then
+// ErrIteratorDone is returned.
+func (i *FSTIterator) Next() error {
+ return i.next(-1)
+}
+
+func (i *FSTIterator) next(lastOffset int) error {
+
+ // remember where we started
+ if cap(i.nextStart) < len(i.keysStack) {
+ i.nextStart = make([]byte, len(i.keysStack))
+ } else {
+ i.nextStart = i.nextStart[0:len(i.keysStack)]
+ }
+ copy(i.nextStart, i.keysStack)
+
+ for true {
+ curr := i.statesStack[len(i.statesStack)-1]
+ autCurr := i.autStatesStack[len(i.autStatesStack)-1]
+
+ if curr.Final() && i.aut.IsMatch(autCurr) &&
+ bytes.Compare(i.keysStack, i.nextStart) > 0 {
+ // in final state greater than start key
+ return nil
+ }
+
+ nextOffset := lastOffset + 1
+ if nextOffset < curr.NumTransitions() {
+ t := curr.TransitionAt(nextOffset)
+ autNext := i.aut.Accept(autCurr, t)
+ if i.aut.CanMatch(autNext) {
+ pos, nextAddr, v := curr.TransitionFor(t)
+
+ // the next slot in the statesStack might have an
+ // fstState instance that we can reuse
+ var nextPrealloc fstState
+ if len(i.statesStack) < cap(i.statesStack) {
+ nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
+ }
+
+ // push onto stack
+ next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
+ if err != nil {
+ return err
+ }
+ i.statesStack = append(i.statesStack, next)
+ i.keysStack = append(i.keysStack, t)
+ i.keysPosStack = append(i.keysPosStack, pos)
+ i.valsStack = append(i.valsStack, v)
+ i.autStatesStack = append(i.autStatesStack, autNext)
+ lastOffset = -1
+
+ // check to see if new keystack might have gone too far
+ if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
+ return ErrIteratorDone
+ }
+ } else {
+ lastOffset = nextOffset
+ }
+
+ continue
+ }
+
+ if len(i.statesStack) > 1 {
+ // no transitions, and still room to pop
+ i.statesStack = i.statesStack[:len(i.statesStack)-1]
+ i.keysStack = i.keysStack[:len(i.keysStack)-1]
+ lastOffset = i.keysPosStack[len(i.keysPosStack)-1]
+
+ i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
+ i.valsStack = i.valsStack[:len(i.valsStack)-1]
+ i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
+ continue
+ } else {
+ // stack len is 1 (root), can't go back further, we're done
+ break
+ }
+
+ }
+
+ return ErrIteratorDone
+}
+
+// Seek advances this iterator to the specified key/value pair. If this key
+// is not in the FST, Current() will return the next largest key. If this
+// seek operation would go past the last key, or outside the configured
+// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
+func (i *FSTIterator) Seek(key []byte) error {
+ err := i.pointTo(key)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// Close will free any resources held by this iterator.
+func (i *FSTIterator) Close() error {
+ // at the moment we don't do anything, but wanted this for API completeness
+ return nil
+}
diff --git a/vendor/github.com/couchbase/vellum/merge_iterator.go b/vendor/github.com/couchbase/vellum/merge_iterator.go
new file mode 100644
index 0000000000..f00f7783e1
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/merge_iterator.go
@@ -0,0 +1,188 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "bytes"
+)
+
+// MergeFunc is used to choose the new value for a key when merging a slice
+// of iterators, and the same key is observed with multiple values.
+// Values presented to the MergeFunc will be in the same order as the
+// original slice creating the MergeIterator. This allows some MergeFunc
+// implementations to prioritize one iterator over another.
+type MergeFunc func([]uint64) uint64
+
+// MergeIterator implements the Iterator interface by traversing a slice
+// of iterators and merging the contents of them. If the same key exists
+// in mulitipe underlying iterators, a user-provided MergeFunc will be
+// invoked to choose the new value.
+type MergeIterator struct {
+ itrs []Iterator
+ f MergeFunc
+ currKs [][]byte
+ currVs []uint64
+
+ lowK []byte
+ lowV uint64
+ lowIdxs []int
+
+ mergeV []uint64
+}
+
+// NewMergeIterator creates a new MergeIterator over the provided slice of
+// Iterators and with the specified MergeFunc to resolve duplicate keys.
+func NewMergeIterator(itrs []Iterator, f MergeFunc) (*MergeIterator, error) {
+ rv := &MergeIterator{
+ itrs: itrs,
+ f: f,
+ currKs: make([][]byte, len(itrs)),
+ currVs: make([]uint64, len(itrs)),
+ lowIdxs: make([]int, 0, len(itrs)),
+ mergeV: make([]uint64, 0, len(itrs)),
+ }
+ rv.init()
+ if rv.lowK == nil {
+ return rv, ErrIteratorDone
+ }
+ return rv, nil
+}
+
+func (m *MergeIterator) init() {
+ for i, itr := range m.itrs {
+ m.currKs[i], m.currVs[i] = itr.Current()
+ }
+ m.updateMatches()
+}
+
+func (m *MergeIterator) updateMatches() {
+ if len(m.itrs) < 1 {
+ return
+ }
+ m.lowK = m.currKs[0]
+ m.lowIdxs = m.lowIdxs[:0]
+ m.lowIdxs = append(m.lowIdxs, 0)
+ for i := 1; i < len(m.itrs); i++ {
+ if m.currKs[i] == nil {
+ continue
+ }
+ cmp := bytes.Compare(m.currKs[i], m.lowK)
+ if m.lowK == nil || cmp < 0 {
+ // reached a new low
+ m.lowK = m.currKs[i]
+ m.lowIdxs = m.lowIdxs[:0]
+ m.lowIdxs = append(m.lowIdxs, i)
+ } else if cmp == 0 {
+ m.lowIdxs = append(m.lowIdxs, i)
+ }
+ }
+ if len(m.lowIdxs) > 1 {
+ // merge multiple values
+ m.mergeV = m.mergeV[:0]
+ for _, vi := range m.lowIdxs {
+ m.mergeV = append(m.mergeV, m.currVs[vi])
+ }
+ m.lowV = m.f(m.mergeV)
+ } else if len(m.lowIdxs) == 1 {
+ m.lowV = m.currVs[m.lowIdxs[0]]
+ }
+}
+
+// Current returns the key and value currently pointed to by this iterator.
+// If the iterator is not pointing at a valid value (because Iterator/Next/Seek)
+// returned an error previously, it may return nil,0.
+func (m *MergeIterator) Current() ([]byte, uint64) {
+ return m.lowK, m.lowV
+}
+
+// Next advances this iterator to the next key/value pair. If there is none,
+// then ErrIteratorDone is returned.
+func (m *MergeIterator) Next() error {
+ // move all the current low iterators to next
+ for _, vi := range m.lowIdxs {
+ err := m.itrs[vi].Next()
+ if err != nil && err != ErrIteratorDone {
+ return err
+ }
+ m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
+ }
+ m.updateMatches()
+ if m.lowK == nil {
+ return ErrIteratorDone
+ }
+ return nil
+}
+
+// Seek advances this iterator to the specified key/value pair. If this key
+// is not in the FST, Current() will return the next largest key. If this
+// seek operation would go past the last key, then ErrIteratorDone is returned.
+func (m *MergeIterator) Seek(key []byte) error {
+ for i := range m.itrs {
+ err := m.itrs[i].Seek(key)
+ if err != nil && err != ErrIteratorDone {
+ return err
+ }
+ }
+ m.updateMatches()
+ if m.lowK == nil {
+ return ErrIteratorDone
+ }
+ return nil
+}
+
+// Close will attempt to close all the underlying Iterators. If any errors
+// are encountered, the first will be returned.
+func (m *MergeIterator) Close() error {
+ var rv error
+ for i := range m.itrs {
+ // close all iterators, return first error if any
+ err := m.itrs[i].Close()
+ if rv == nil {
+ rv = err
+ }
+ }
+ return rv
+}
+
+// MergeMin chooses the minimum value
+func MergeMin(vals []uint64) uint64 {
+ rv := vals[0]
+ for _, v := range vals[1:] {
+ if v < rv {
+ rv = v
+ }
+ }
+ return rv
+}
+
+// MergeMax chooses the maximum value
+func MergeMax(vals []uint64) uint64 {
+ rv := vals[0]
+ for _, v := range vals[1:] {
+ if v > rv {
+ rv = v
+ }
+ }
+ return rv
+}
+
+// MergeSum sums the values
+func MergeSum(vals []uint64) uint64 {
+ rv := vals[0]
+ for _, v := range vals[1:] {
+ rv += v
+ }
+ return rv
+}
diff --git a/vendor/github.com/couchbase/vellum/pack.go b/vendor/github.com/couchbase/vellum/pack.go
new file mode 100644
index 0000000000..78f3dcd588
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/pack.go
@@ -0,0 +1,55 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+func deltaAddr(base, trans uint64) uint64 {
+ // transition dest of 0 is special case
+ if trans == 0 {
+ return 0
+ }
+ return base - trans
+}
+
+const packOutMask = 1<<4 - 1
+
+func encodePackSize(transSize, outSize int) byte {
+ var rv byte
+ rv = byte(transSize << 4)
+ rv |= byte(outSize)
+ return rv
+}
+
+func decodePackSize(pack byte) (transSize int, packSize int) {
+ transSize = int(pack >> 4)
+ packSize = int(pack & packOutMask)
+ return
+}
+
+const maxNumTrans = 1<<6 - 1
+
+func encodeNumTrans(n int) byte {
+ if n <= maxNumTrans {
+ return byte(n)
+ }
+ return 0
+}
+
+func readPackedUint(data []byte) (rv uint64) {
+ for i := range data {
+ shifted := uint64(data[i]) << uint(i*8)
+ rv |= shifted
+ }
+ return
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go
new file mode 100644
index 0000000000..6922b749db
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/compile.go
@@ -0,0 +1,316 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+ "regexp/syntax"
+ "unicode"
+
+ "github.com/couchbase/vellum/utf8"
+)
+
+type compiler struct {
+ sizeLimit uint
+ insts prog
+}
+
+func newCompiler(sizeLimit uint) *compiler {
+ return &compiler{
+ sizeLimit: sizeLimit,
+ }
+}
+
+func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
+ err := c.c(ast)
+ if err != nil {
+ return nil, err
+ }
+ c.insts = append(c.insts, &inst{
+ op: OpMatch,
+ })
+ return c.insts, nil
+}
+
+func (c *compiler) c(ast *syntax.Regexp) error {
+ if ast.Flags&syntax.NonGreedy > 1 {
+ return ErrNoLazy
+ }
+
+ switch ast.Op {
+ case syntax.OpEndLine, syntax.OpBeginLine,
+ syntax.OpBeginText, syntax.OpEndText:
+ return ErrNoEmpty
+ case syntax.OpWordBoundary, syntax.OpNoWordBoundary:
+ return ErrNoWordBoundary
+ case syntax.OpEmptyMatch:
+ return nil
+ case syntax.OpLiteral:
+ for _, r := range ast.Rune {
+ if ast.Flags&syntax.FoldCase > 0 {
+ next := syntax.Regexp{
+ Op: syntax.OpCharClass,
+ Flags: ast.Flags & syntax.FoldCase,
+ Rune0: [2]rune{r, r},
+ }
+ next.Rune = next.Rune0[0:2]
+ return c.c(&next)
+ }
+ seqs, err := utf8.NewSequences(r, r)
+ if err != nil {
+ return err
+ }
+ for _, seq := range seqs {
+ c.compileUtf8Ranges(seq)
+ }
+ }
+ case syntax.OpAnyChar:
+ next := syntax.Regexp{
+ Op: syntax.OpCharClass,
+ Flags: ast.Flags & syntax.FoldCase,
+ Rune0: [2]rune{0, unicode.MaxRune},
+ }
+ next.Rune = next.Rune0[:2]
+ return c.c(&next)
+ case syntax.OpAnyCharNotNL:
+ next := syntax.Regexp{
+ Op: syntax.OpCharClass,
+ Flags: ast.Flags & syntax.FoldCase,
+ Rune: []rune{0, 0x09, 0x0B, unicode.MaxRune},
+ }
+ return c.c(&next)
+ case syntax.OpCharClass:
+ return c.compileClass(ast)
+ case syntax.OpCapture:
+ return c.c(ast.Sub[0])
+ case syntax.OpConcat:
+ for _, sub := range ast.Sub {
+ err := c.c(sub)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ case syntax.OpAlternate:
+ if len(ast.Sub) == 0 {
+ return nil
+ }
+ jmpsToEnd := []uint{}
+
+ // does not handle last entry
+ for i := 0; i < len(ast.Sub)-1; i++ {
+ sub := ast.Sub[i]
+ split := c.emptySplit()
+ j1 := c.top()
+ err := c.c(sub)
+ if err != nil {
+ return err
+ }
+ jmpsToEnd = append(jmpsToEnd, c.emptyJump())
+ j2 := c.top()
+ c.setSplit(split, j1, j2)
+ }
+ // handle last entry
+ err := c.c(ast.Sub[len(ast.Sub)-1])
+ if err != nil {
+ return err
+ }
+ end := uint(len(c.insts))
+ for _, jmpToEnd := range jmpsToEnd {
+ c.setJump(jmpToEnd, end)
+ }
+ case syntax.OpQuest:
+ split := c.emptySplit()
+ j1 := c.top()
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ j2 := c.top()
+ c.setSplit(split, j1, j2)
+
+ case syntax.OpStar:
+ j1 := c.top()
+ split := c.emptySplit()
+ j2 := c.top()
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ jmp := c.emptyJump()
+ j3 := uint(len(c.insts))
+
+ c.setJump(jmp, j1)
+ c.setSplit(split, j2, j3)
+
+ case syntax.OpPlus:
+ j1 := c.top()
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ split := c.emptySplit()
+ j2 := c.top()
+ c.setSplit(split, j1, j2)
+
+ case syntax.OpRepeat:
+ if ast.Max == -1 {
+ for i := 0; i < ast.Min; i++ {
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ }
+ next := syntax.Regexp{
+ Op: syntax.OpStar,
+ Flags: ast.Flags,
+ Sub: ast.Sub,
+ Sub0: ast.Sub0,
+ Rune: ast.Rune,
+ Rune0: ast.Rune0,
+ }
+ return c.c(&next)
+ }
+ for i := 0; i < ast.Min; i++ {
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ }
+ var splits, starts []uint
+ for i := ast.Min; i < ast.Max; i++ {
+ splits = append(splits, c.emptySplit())
+ starts = append(starts, uint(len(c.insts)))
+ err := c.c(ast.Sub[0])
+ if err != nil {
+ return err
+ }
+ }
+ end := uint(len(c.insts))
+ for i := 0; i < len(splits); i++ {
+ c.setSplit(splits[i], starts[i], end)
+ }
+
+ }
+
+ return c.checkSize()
+}
+
+func (c *compiler) checkSize() error {
+ if uint(len(c.insts)*instSize) > c.sizeLimit {
+ return ErrCompiledTooBig
+ }
+ return nil
+}
+
+func (c *compiler) compileClass(ast *syntax.Regexp) error {
+ if len(ast.Rune) == 0 {
+ return nil
+ }
+ var jmps []uint
+
+ // does not do last pair
+ for i := 0; i < len(ast.Rune)-2; i += 2 {
+ rstart := ast.Rune[i]
+ rend := ast.Rune[i+1]
+
+ split := c.emptySplit()
+ j1 := c.top()
+ err := c.compileClassRange(rstart, rend)
+ if err != nil {
+ return err
+ }
+ jmps = append(jmps, c.emptyJump())
+ j2 := c.top()
+ c.setSplit(split, j1, j2)
+ }
+ // handle last pair
+ rstart := ast.Rune[len(ast.Rune)-2]
+ rend := ast.Rune[len(ast.Rune)-1]
+ err := c.compileClassRange(rstart, rend)
+ if err != nil {
+ return err
+ }
+ end := c.top()
+ for _, jmp := range jmps {
+ c.setJump(jmp, end)
+ }
+ return nil
+}
+
+func (c *compiler) compileClassRange(startR, endR rune) error {
+ seqs, err := utf8.NewSequences(startR, endR)
+ if err != nil {
+ return err
+ }
+ var jmps []uint
+
+ // does not do last entry
+ for i := 0; i < len(seqs)-1; i++ {
+ seq := seqs[i]
+ split := c.emptySplit()
+ j1 := c.top()
+ c.compileUtf8Ranges(seq)
+ jmps = append(jmps, c.emptyJump())
+ j2 := c.top()
+ c.setSplit(split, j1, j2)
+ }
+ // handle last entry
+ c.compileUtf8Ranges(seqs[len(seqs)-1])
+ end := c.top()
+ for _, jmp := range jmps {
+ c.setJump(jmp, end)
+ }
+
+ return nil
+}
+
+func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
+ for _, r := range seq {
+ c.insts = append(c.insts, &inst{
+ op: OpRange,
+ rangeStart: r.Start,
+ rangeEnd: r.End,
+ })
+ }
+}
+
+func (c *compiler) emptySplit() uint {
+ c.insts = append(c.insts, &inst{
+ op: OpSplit,
+ })
+ return c.top() - 1
+}
+
+func (c *compiler) emptyJump() uint {
+ c.insts = append(c.insts, &inst{
+ op: OpJmp,
+ })
+ return c.top() - 1
+}
+
+func (c *compiler) setSplit(i, pc1, pc2 uint) {
+ split := c.insts[i]
+ split.splitA = pc1
+ split.splitB = pc2
+}
+
+func (c *compiler) setJump(i, pc uint) {
+ jmp := c.insts[i]
+ jmp.to = pc
+}
+
+func (c *compiler) top() uint {
+ return uint(len(c.insts))
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/dfa.go b/vendor/github.com/couchbase/vellum/regexp/dfa.go
new file mode 100644
index 0000000000..9864606b6a
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go
@@ -0,0 +1,188 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+ "encoding/binary"
+ "fmt"
+)
+
+// StateLimit is the maximum number of states allowed
+const StateLimit = 10000
+
+// ErrTooManyStates is returned if you attempt to build a Levenshtein
+// automaton which requries too many states.
+var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
+ StateLimit)
+
+type dfaBuilder struct {
+ dfa *dfa
+ cache map[string]int
+ keyBuf []byte
+}
+
+func newDfaBuilder(insts prog) *dfaBuilder {
+ d := &dfaBuilder{
+ dfa: &dfa{
+ insts: insts,
+ states: make([]*state, 0, 16),
+ },
+ cache: make(map[string]int, 1024),
+ }
+ // add 0 state that is invalid
+ d.dfa.states = append(d.dfa.states, &state{
+ next: make([]int, 256),
+ match: false,
+ })
+ return d
+}
+
+func (d *dfaBuilder) build() (*dfa, error) {
+ cur := newSparseSet(uint(len(d.dfa.insts)))
+ next := newSparseSet(uint(len(d.dfa.insts)))
+
+ d.dfa.add(cur, 0)
+ states := intStack{d.cachedState(cur)}
+ seen := make(map[int]struct{})
+ var s int
+ states, s = states.Pop()
+ for s != 0 {
+ for b := 0; b < 256; b++ {
+ ns := d.runState(cur, next, s, byte(b))
+ if ns != 0 {
+ if _, ok := seen[ns]; !ok {
+ seen[ns] = struct{}{}
+ states = states.Push(ns)
+ }
+ }
+ if len(d.dfa.states) > StateLimit {
+ return nil, ErrTooManyStates
+ }
+ }
+ states, s = states.Pop()
+ }
+ return d.dfa, nil
+}
+
+func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
+ cur.Clear()
+ for _, ip := range d.dfa.states[state].insts {
+ cur.Add(ip)
+ }
+ d.dfa.run(cur, next, b)
+ nextState := d.cachedState(next)
+ d.dfa.states[state].next[b] = nextState
+ return nextState
+}
+
+func instsKey(insts []uint, buf []byte) []byte {
+ if cap(buf) < 8*len(insts) {
+ buf = make([]byte, 8*len(insts))
+ } else {
+ buf = buf[0 : 8*len(insts)]
+ }
+ for i, inst := range insts {
+ binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst))
+ }
+ return buf
+}
+
+func (d *dfaBuilder) cachedState(set *sparseSet) int {
+ var insts []uint
+ var isMatch bool
+ for i := uint(0); i < uint(set.Len()); i++ {
+ ip := set.Get(i)
+ switch d.dfa.insts[ip].op {
+ case OpRange:
+ insts = append(insts, ip)
+ case OpMatch:
+ isMatch = true
+ insts = append(insts, ip)
+ }
+ }
+ if len(insts) == 0 {
+ return 0
+ }
+ d.keyBuf = instsKey(insts, d.keyBuf)
+ v, ok := d.cache[string(d.keyBuf)]
+ if ok {
+ return v
+ }
+ d.dfa.states = append(d.dfa.states, &state{
+ insts: insts,
+ next: make([]int, 256),
+ match: isMatch,
+ })
+ newV := len(d.dfa.states) - 1
+ d.cache[string(d.keyBuf)] = newV
+ return newV
+}
+
+type dfa struct {
+ insts prog
+ states []*state
+}
+
+func (d *dfa) add(set *sparseSet, ip uint) {
+ if set.Contains(ip) {
+ return
+ }
+ set.Add(ip)
+ switch d.insts[ip].op {
+ case OpJmp:
+ d.add(set, d.insts[ip].to)
+ case OpSplit:
+ d.add(set, d.insts[ip].splitA)
+ d.add(set, d.insts[ip].splitB)
+ }
+}
+
+func (d *dfa) run(from, to *sparseSet, b byte) bool {
+ to.Clear()
+ var isMatch bool
+ for i := uint(0); i < uint(from.Len()); i++ {
+ ip := from.Get(i)
+ switch d.insts[ip].op {
+ case OpMatch:
+ isMatch = true
+ case OpRange:
+ if d.insts[ip].rangeStart <= b &&
+ b <= d.insts[ip].rangeEnd {
+ d.add(to, ip+1)
+ }
+ }
+ }
+ return isMatch
+}
+
+type state struct {
+ insts []uint
+ next []int
+ match bool
+}
+
+type intStack []int
+
+func (s intStack) Push(v int) intStack {
+ return append(s, v)
+}
+
+func (s intStack) Pop() (intStack, int) {
+ l := len(s)
+ if l < 1 {
+ return s, 0
+ }
+ return s[:l-1], s[l-1]
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/inst.go b/vendor/github.com/couchbase/vellum/regexp/inst.go
new file mode 100644
index 0000000000..61cbf2f333
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/inst.go
@@ -0,0 +1,62 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import "fmt"
+
+// instOp represents a instruction operation
+type instOp int
+
+// the enumeration of operations
+const (
+ OpMatch instOp = iota
+ OpJmp
+ OpSplit
+ OpRange
+)
+
+// instSize is the approxmiate size of the an inst struct in bytes
+const instSize = 40
+
+type inst struct {
+ op instOp
+ to uint
+ splitA uint
+ splitB uint
+ rangeStart byte
+ rangeEnd byte
+}
+
+func (i *inst) String() string {
+ switch i.op {
+ case OpJmp:
+ return fmt.Sprintf("JMP: %d", i.to)
+ case OpSplit:
+ return fmt.Sprintf("SPLIT: %d - %d", i.splitA, i.splitB)
+ case OpRange:
+ return fmt.Sprintf("RANGE: %x - %x", i.rangeStart, i.rangeEnd)
+ }
+ return "MATCH"
+}
+
+type prog []*inst
+
+func (p prog) String() string {
+ rv := "\n"
+ for i, pi := range p {
+ rv += fmt.Sprintf("%d %v\n", i, pi)
+ }
+ return rv
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/regexp.go b/vendor/github.com/couchbase/vellum/regexp/regexp.go
new file mode 100644
index 0000000000..ed0e7823e1
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go
@@ -0,0 +1,113 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+ "fmt"
+ "regexp/syntax"
+)
+
+// ErrNoEmpty returned when "zero width assertions" are used
+var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed")
+
+// ErrNoWordBoundary returned when word boundaries are used
+var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed")
+
+// ErrNoBytes returned when byte literals are used
+var ErrNoBytes = fmt.Errorf("byte literals are not allowed")
+
+// ErrNoLazy returned when lazy quantifiers are used
+var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
+
+// ErrCompiledTooBig returned when regular expression parses into
+// too many instructions
+var ErrCompiledTooBig = fmt.Errorf("too many instructions")
+
+// Regexp implements the vellum.Automaton interface for matcing a user
+// specified regular expression.
+type Regexp struct {
+ orig string
+ dfa *dfa
+}
+
+// NewRegexp creates a new Regular Expression automaton with the specified
+// expression. By default it is limited to approximately 10MB for the
+// compiled finite state automaton. If this size is exceeded,
+// ErrCompiledTooBig will be returned.
+func New(expr string) (*Regexp, error) {
+ return NewWithLimit(expr, 10*(1<<20))
+}
+
+// NewRegexpWithLimit creates a new Regular Expression automaton with
+// the specified expression. The size of the compiled finite state
+// automaton exceeds the user specified size, ErrCompiledTooBig will be
+// returned.
+func NewWithLimit(expr string, size uint) (*Regexp, error) {
+ parsed, err := syntax.Parse(expr, syntax.Perl)
+ if err != nil {
+ return nil, err
+ }
+ compiler := newCompiler(size)
+ insts, err := compiler.compile(parsed)
+ if err != nil {
+ return nil, err
+ }
+ dfaBuilder := newDfaBuilder(insts)
+ dfa, err := dfaBuilder.build()
+ if err != nil {
+ return nil, err
+ }
+ return &Regexp{
+ orig: expr,
+ dfa: dfa,
+ }, nil
+}
+
+// Start returns the start state of this automaton.
+func (r *Regexp) Start() int {
+ return 1
+}
+
+// IsMatch returns if the specified state is a matching state.
+func (r *Regexp) IsMatch(s int) bool {
+ if s < len(r.dfa.states) {
+ return r.dfa.states[s].match
+ }
+ return false
+}
+
+// CanMatch returns if the specified state can ever transition to a matching
+// state.
+func (r *Regexp) CanMatch(s int) bool {
+ if s < len(r.dfa.states) && s > 0 {
+ return true
+ }
+ return false
+}
+
+// WillAlwaysMatch returns if the specified state will always end in a
+// matching state.
+func (r *Regexp) WillAlwaysMatch(int) bool {
+ return false
+}
+
+// Accept returns the new state, resulting from the transite byte b
+// when currently in the state s.
+func (r *Regexp) Accept(s int, b byte) int {
+ if s < len(r.dfa.states) {
+ return r.dfa.states[s].next[b]
+ }
+ return 0
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/sparse.go b/vendor/github.com/couchbase/vellum/regexp/sparse.go
new file mode 100644
index 0000000000..7afbfceba6
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/sparse.go
@@ -0,0 +1,54 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+type sparseSet struct {
+ dense []uint
+ sparse []uint
+ size uint
+}
+
+func newSparseSet(size uint) *sparseSet {
+ return &sparseSet{
+ dense: make([]uint, size),
+ sparse: make([]uint, size),
+ size: 0,
+ }
+}
+
+func (s *sparseSet) Len() int {
+ return int(s.size)
+}
+
+func (s *sparseSet) Add(ip uint) uint {
+ i := s.size
+ s.dense[i] = ip
+ s.sparse[ip] = i
+ s.size++
+ return i
+}
+
+func (s *sparseSet) Get(i uint) uint {
+ return s.dense[i]
+}
+
+func (s *sparseSet) Contains(ip uint) bool {
+ i := s.sparse[ip]
+ return i < s.size && s.dense[i] == ip
+}
+
+func (s *sparseSet) Clear() {
+ s.size = 0
+}
diff --git a/vendor/github.com/couchbase/vellum/registry.go b/vendor/github.com/couchbase/vellum/registry.go
new file mode 100644
index 0000000000..3721a7c9c3
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/registry.go
@@ -0,0 +1,116 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "hash"
+ "hash/fnv"
+)
+
+type registryCell struct {
+ addr int
+ node *builderNode
+}
+
+type registry struct {
+ table []registryCell
+ tableSize uint
+ mruSize uint
+ hasher hash.Hash64
+}
+
+func newRegistry(tableSize, mruSize int) *registry {
+ nsize := tableSize * mruSize
+ rv := &registry{
+ table: make([]registryCell, nsize),
+ tableSize: uint(tableSize),
+ mruSize: uint(mruSize),
+ hasher: fnv.New64a(),
+ }
+ return rv
+}
+
+func (r *registry) Reset() {
+ for i := 0; i < len(r.table); i++ {
+ r.table[i] = registryCell{}
+ }
+ r.hasher.Reset()
+}
+
+func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
+ if len(r.table) == 0 {
+ return false, 0, nil
+ }
+ bucket := r.hash(node)
+ start := r.mruSize * uint(bucket)
+ end := start + r.mruSize
+ rc := registryCache(r.table[start:end])
+ return rc.entry(node)
+}
+
+const fnvPrime = 1099511628211
+
+func (r *registry) hash(b *builderNode) int {
+ var final uint64
+ if b.final {
+ final = 1
+ }
+
+ var h uint64 = 14695981039346656037
+ h = (h ^ final) * fnvPrime
+ h = (h ^ b.finalOutput) * fnvPrime
+ for _, t := range b.trans {
+ h = (h ^ uint64(t.in)) * fnvPrime
+ h = (h ^ t.out) * fnvPrime
+ h = (h ^ uint64(t.addr)) * fnvPrime
+ }
+ return int(h % uint64(r.tableSize))
+}
+
+type registryCache []registryCell
+
+func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
+ if len(r) == 1 {
+ if r[0].node != nil && r[0].node.equiv(node) {
+ return true, r[0].addr, nil
+ }
+ r[0].node = node
+ return false, 0, &r[0]
+ }
+ for i := range r {
+ if r[i].node != nil && r[i].node.equiv(node) {
+ addr := r[i].addr
+ r.promote(i)
+ return true, addr, nil
+ }
+ }
+ // no match
+ last := len(r) - 1
+ r[last].node = node // discard LRU
+ r.promote(last)
+ return false, 0, &r[0]
+
+}
+
+func (r registryCache) promote(i int) {
+ for i > 0 {
+ r.swap(i-1, i)
+ i--
+ }
+}
+
+func (r registryCache) swap(i, j int) {
+ r[i], r[j] = r[j], r[i]
+}
diff --git a/vendor/github.com/couchbase/vellum/transducer.go b/vendor/github.com/couchbase/vellum/transducer.go
new file mode 100644
index 0000000000..753c422d57
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/transducer.go
@@ -0,0 +1,55 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+// Transducer represents the general contract of a byte-based finite transducer
+type Transducer interface {
+
+ // all transducers are also automatons
+ Automaton
+
+ // IsMatchWithValue returns true if and only if the state is a match
+ // additionally it returns a states final value (if any)
+ IsMatchWithVal(int) (bool, uint64)
+
+ // Accept returns the next state given the input to the specified state
+ // additionally it returns the value associated with the transition
+ AcceptWithVal(int, byte) (int, uint64)
+}
+
+// TransducerGet implements an generic Get() method which works
+// on any implementation of Transducer
+// The caller MUST check the boolean return value for a match.
+// Zero is a valid value regardless of match status,
+// and if it is NOT a match, the value collected so far is returned.
+func TransducerGet(t Transducer, k []byte) (bool, uint64) {
+ var total uint64
+ i := 0
+ curr := t.Start()
+ for t.CanMatch(curr) && i < len(k) {
+ var transVal uint64
+ curr, transVal = t.AcceptWithVal(curr, k[i])
+ if curr == noneAddr {
+ break
+ }
+ total += transVal
+ i++
+ }
+ if i != len(k) {
+ return false, total
+ }
+ match, finalVal := t.IsMatchWithVal(curr)
+ return match, total + finalVal
+}
diff --git a/vendor/github.com/couchbase/vellum/utf8/utf8.go b/vendor/github.com/couchbase/vellum/utf8/utf8.go
new file mode 100644
index 0000000000..47dbe9d1c5
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/utf8/utf8.go
@@ -0,0 +1,246 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utf8
+
+import (
+ "fmt"
+ "unicode/utf8"
+)
+
+// Sequences is a collection of Sequence
+type Sequences []Sequence
+
+// NewSequences constructs a collection of Sequence which describe the
+// byte ranges covered between the start and end runes.
+func NewSequences(start, end rune) (Sequences, error) {
+ var rv Sequences
+
+ var rangeStack rangeStack
+ rangeStack = rangeStack.Push(&scalarRange{start, end})
+
+ rangeStack, r := rangeStack.Pop()
+TOP:
+ for r != nil {
+ INNER:
+ for {
+ r1, r2 := r.split()
+ if r1 != nil {
+ rangeStack = rangeStack.Push(&scalarRange{r2.start, r2.end})
+ r.start = r1.start
+ r.end = r1.end
+ continue INNER
+ }
+ if !r.valid() {
+ rangeStack, r = rangeStack.Pop()
+ continue TOP
+ }
+ for i := 1; i < utf8.UTFMax; i++ {
+ max := maxScalarValue(i)
+ if r.start <= max && max < r.end {
+ rangeStack = rangeStack.Push(&scalarRange{max + 1, r.end})
+ r.end = max
+ continue INNER
+ }
+ }
+ asciiRange := r.ascii()
+ if asciiRange != nil {
+ rv = append(rv, Sequence{
+ asciiRange,
+ })
+ rangeStack, r = rangeStack.Pop()
+ continue TOP
+ }
+ for i := uint(1); i < utf8.UTFMax; i++ {
+ m := rune((1 << (6 * i)) - 1)
+ if (r.start & ^m) != (r.end & ^m) {
+ if (r.start & m) != 0 {
+ rangeStack = rangeStack.Push(&scalarRange{(r.start | m) + 1, r.end})
+ r.end = r.start | m
+ continue INNER
+ }
+ if (r.end & m) != m {
+ rangeStack = rangeStack.Push(&scalarRange{r.end & ^m, r.end})
+ r.end = (r.end & ^m) - 1
+ continue INNER
+ }
+ }
+ }
+ start := make([]byte, utf8.UTFMax)
+ end := make([]byte, utf8.UTFMax)
+ n, m := r.encode(start, end)
+ seq, err := SequenceFromEncodedRange(start[0:n], end[0:m])
+ if err != nil {
+ return nil, err
+ }
+ rv = append(rv, seq)
+ rangeStack, r = rangeStack.Pop()
+ continue TOP
+ }
+ }
+
+ return rv, nil
+}
+
+// Sequence is a collection of *Range
+type Sequence []*Range
+
+// SequenceFromEncodedRange creates sequence from the encoded bytes
+func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
+ if len(start) != len(end) {
+ return nil, fmt.Errorf("byte slices must be the same length")
+ }
+ switch len(start) {
+ case 2:
+ return Sequence{
+ &Range{start[0], end[0]},
+ &Range{start[1], end[1]},
+ }, nil
+ case 3:
+ return Sequence{
+ &Range{start[0], end[0]},
+ &Range{start[1], end[1]},
+ &Range{start[2], end[2]},
+ }, nil
+ case 4:
+ return Sequence{
+ &Range{start[0], end[0]},
+ &Range{start[1], end[1]},
+ &Range{start[2], end[2]},
+ &Range{start[3], end[3]},
+ }, nil
+ }
+
+ return nil, fmt.Errorf("invalid encoded byte length")
+}
+
+// Matches checks to see if the provided byte slice matches the Sequence
+func (u Sequence) Matches(bytes []byte) bool {
+ if len(bytes) < len(u) {
+ return false
+ }
+ for i := 0; i < len(u); i++ {
+ if !u[i].matches(bytes[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func (u Sequence) String() string {
+ switch len(u) {
+ case 1:
+ return fmt.Sprintf("%v", u[0])
+ case 2:
+ return fmt.Sprintf("%v%v", u[0], u[1])
+ case 3:
+ return fmt.Sprintf("%v%v%v", u[0], u[1], u[2])
+ case 4:
+ return fmt.Sprintf("%v%v%v%v", u[0], u[1], u[2], u[3])
+ default:
+ return fmt.Sprintf("invalid utf8 sequence")
+ }
+}
+
+// Range describes a single range of byte values
+type Range struct {
+ Start byte
+ End byte
+}
+
+func (u Range) matches(b byte) bool {
+ if u.Start <= b && b <= u.End {
+ return true
+ }
+ return false
+}
+
+func (u Range) String() string {
+ if u.Start == u.End {
+ return fmt.Sprintf("[%X]", u.Start)
+ }
+ return fmt.Sprintf("[%X-%X]", u.Start, u.End)
+}
+
+type scalarRange struct {
+ start rune
+ end rune
+}
+
+func (s *scalarRange) String() string {
+ return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end)
+}
+
+// split this scalar range if it overlaps with a surrogate codepoint
+func (s *scalarRange) split() (*scalarRange, *scalarRange) {
+ if s.start < 0xe000 && s.end > 0xd7ff {
+ return &scalarRange{
+ start: s.start,
+ end: 0xd7ff,
+ },
+ &scalarRange{
+ start: 0xe000,
+ end: s.end,
+ }
+ }
+ return nil, nil
+}
+
+func (s *scalarRange) valid() bool {
+ return s.start <= s.end
+}
+
+func (s *scalarRange) ascii() *Range {
+ if s.valid() && s.end <= 0x7f {
+ return &Range{
+ Start: byte(s.start),
+ End: byte(s.end),
+ }
+ }
+ return nil
+}
+
+// start and end MUST have capacity for utf8.UTFMax bytes
+func (s *scalarRange) encode(start, end []byte) (int, int) {
+ n := utf8.EncodeRune(start, s.start)
+ m := utf8.EncodeRune(end, s.end)
+ return n, m
+}
+
+type rangeStack []*scalarRange
+
+func (s rangeStack) Push(v *scalarRange) rangeStack {
+ return append(s, v)
+}
+
+func (s rangeStack) Pop() (rangeStack, *scalarRange) {
+ l := len(s)
+ if l < 1 {
+ return s, nil
+ }
+ return s[:l-1], s[l-1]
+}
+
+func maxScalarValue(nbytes int) rune {
+ switch nbytes {
+ case 1:
+ return 0x007f
+ case 2:
+ return 0x07FF
+ case 3:
+ return 0xFFFF
+ default:
+ return 0x10FFFF
+ }
+}
diff --git a/vendor/github.com/couchbase/vellum/vellum.go b/vendor/github.com/couchbase/vellum/vellum.go
new file mode 100644
index 0000000000..b2537b3f00
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/vellum.go
@@ -0,0 +1,111 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+Package vellum is a library for building, serializing and executing an FST (finite
+state transducer).
+
+There are two distinct phases, building an FST and using it.
+
+When building an FST, you insert keys ([]byte) and their associated value
+(uint64). Insert operations MUST be done in lexicographic order. While
+building the FST, data is streamed to an underlying Writer. At the conclusion
+of building, you MUST call Close() on the builder.
+
+After completion of the build phase, you can either Open() the FST if you
+serialized it to disk. Alternatively, if you already have the bytes in
+memory, you can use Load(). By default, Open() will use mmap to avoid loading
+the entire file into memory.
+
+Once the FST is ready, you can use the Contains() method to see if a keys is
+in the FST. You can use the Get() method to see if a key is in the FST and
+retrieve it's associated value. And, you can use the Iterator method to
+enumerate key/value pairs within a specified range.
+
+*/
+package vellum
+
+import (
+ "errors"
+ "io"
+)
+
+// ErrOutOfOrder is returned when values are not inserted in
+// lexicographic order.
+var ErrOutOfOrder = errors.New("values not inserted in lexicographic order")
+
+// ErrIteratorDone is returned by Iterator/Next/Seek methods when the
+// Current() value pointed to by the iterator is greater than the last
+// key in this FST, or outside the configured startKeyInclusive/endKeyExclusive
+// range of the Iterator.
+var ErrIteratorDone = errors.New("iterator-done")
+
+// BuilderOpts is a structure to let advanced users customize the behavior
+// of the builder and some aspects of the generated FST.
+type BuilderOpts struct {
+ Encoder int
+ RegistryTableSize int
+ RegistryMRUSize int
+}
+
+// New returns a new Builder which will stream out the
+// underlying representation to the provided Writer as the set is built.
+func New(w io.Writer, opts *BuilderOpts) (*Builder, error) {
+ return newBuilder(w, opts)
+}
+
+// Open loads the FST stored in the provided path
+func Open(path string) (*FST, error) {
+ return open(path)
+}
+
+// Load will return the FST represented by the provided byte slice.
+func Load(data []byte) (*FST, error) {
+ return new(data, nil)
+}
+
+// Merge will iterate through the provided Iterators, merge duplicate keys
+// with the provided MergeFunc, and build a new FST to the provided Writer.
+func Merge(w io.Writer, opts *BuilderOpts, itrs []Iterator, f MergeFunc) error {
+ builder, err := New(w, opts)
+ if err != nil {
+ return err
+ }
+
+ itr, err := NewMergeIterator(itrs, f)
+ for err == nil {
+ k, v := itr.Current()
+ err = builder.Insert(k, v)
+ if err != nil {
+ return err
+ }
+ err = itr.Next()
+ }
+
+ if err != nil && err != ErrIteratorDone {
+ return err
+ }
+
+ err = itr.Close()
+ if err != nil {
+ return err
+ }
+
+ err = builder.Close()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/couchbase/vellum/vellum_mmap.go b/vendor/github.com/couchbase/vellum/vellum_mmap.go
new file mode 100644
index 0000000000..5acd2f4707
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/vellum_mmap.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !nommap
+
+package vellum
+
+import (
+ "os"
+
+ mmap "github.com/edsrzf/mmap-go"
+)
+
+type mmapWrapper struct {
+ f *os.File
+ mm mmap.MMap
+}
+
+func (m *mmapWrapper) Close() (err error) {
+ if m.mm != nil {
+ err = m.mm.Unmap()
+ }
+ // try to close file even if unmap failed
+ if m.f != nil {
+ err2 := m.f.Close()
+ if err == nil {
+ // try to return first error
+ err = err2
+ }
+ }
+ return
+}
+
+func open(path string) (*FST, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ mm, err := mmap.Map(f, mmap.RDONLY, 0)
+ if err != nil {
+ // mmap failed, try to close the file
+ _ = f.Close()
+ return nil, err
+ }
+ return new(mm, &mmapWrapper{
+ f: f,
+ mm: mm,
+ })
+}
diff --git a/vendor/github.com/couchbase/vellum/vellum_nommap.go b/vendor/github.com/couchbase/vellum/vellum_nommap.go
new file mode 100644
index 0000000000..e985272872
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/vellum_nommap.go
@@ -0,0 +1,27 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build nommap
+
+package vellum
+
+import "io/ioutil"
+
+func open(path string) (*FST, error) {
+ data, err := ioutil.ReadFile(string)
+ if err != nil {
+ return nil, err
+ }
+ return new(data, nil)
+}
diff --git a/vendor/github.com/couchbase/vellum/writer.go b/vendor/github.com/couchbase/vellum/writer.go
new file mode 100644
index 0000000000..d655d47f7f
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/writer.go
@@ -0,0 +1,92 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "bufio"
+ "io"
+)
+
+// A writer is a buffered writer used by vellum. It counts how many bytes have
+// been written and has some convenience methods used for encoding the data.
+type writer struct {
+ w *bufio.Writer
+ counter int
+}
+
+func newWriter(w io.Writer) *writer {
+ return &writer{
+ w: bufio.NewWriter(w),
+ }
+}
+
+func (w *writer) Reset(newWriter io.Writer) {
+ w.w.Reset(newWriter)
+ w.counter = 0
+}
+
+func (w *writer) WriteByte(c byte) error {
+ err := w.w.WriteByte(c)
+ if err != nil {
+ return err
+ }
+ w.counter++
+ return nil
+}
+
+func (w *writer) Write(p []byte) (int, error) {
+ n, err := w.w.Write(p)
+ w.counter += n
+ return n, err
+}
+
+func (w *writer) Flush() error {
+ return w.w.Flush()
+}
+
+func (w *writer) WritePackedUintIn(v uint64, n int) error {
+ for shift := uint(0); shift < uint(n*8); shift += 8 {
+ err := w.WriteByte(byte(v >> shift))
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (w *writer) WritePackedUint(v uint64) error {
+ n := packedSize(v)
+ return w.WritePackedUintIn(v, n)
+}
+
+func packedSize(n uint64) int {
+ if n < 1<<8 {
+ return 1
+ } else if n < 1<<16 {
+ return 2
+ } else if n < 1<<24 {
+ return 3
+ } else if n < 1<<32 {
+ return 4
+ } else if n < 1<<40 {
+ return 5
+ } else if n < 1<<48 {
+ return 6
+ } else if n < 1<<56 {
+ return 7
+ }
+ return 8
+}