aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase
diff options
context:
space:
mode:
authorLunny Xiao <xiaolunwen@gmail.com>2019-02-18 08:50:26 +0800
committertechknowlogick <matti@mdranta.net>2019-02-17 19:50:26 -0500
commita380cfd8e03148a05859a7496d235fa14bde4796 (patch)
tree9ef2f4b66804e73e242d0d07fd30769898a0ca23 /vendor/github.com/couchbase
parent11e316654e523bd668a20e1e6a95da3f5b9b22de (diff)
downloadgitea-a380cfd8e03148a05859a7496d235fa14bde4796.tar.gz
gitea-a380cfd8e03148a05859a7496d235fa14bde4796.zip
Update bleve dependency to latest master revision (#6100)
* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
Diffstat (limited to 'vendor/github.com/couchbase')
-rw-r--r--vendor/github.com/couchbase/vellum/automaton.go2
-rw-r--r--vendor/github.com/couchbase/vellum/builder.go159
-rw-r--r--vendor/github.com/couchbase/vellum/decoder_v1.go4
-rw-r--r--vendor/github.com/couchbase/vellum/fst.go50
-rw-r--r--vendor/github.com/couchbase/vellum/fst_iterator.go131
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/LICENSE203
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go125
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/dfa.go250
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go64
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go292
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go349
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/compile.go79
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/dfa.go38
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/inst.go2
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/regexp.go10
-rw-r--r--vendor/github.com/couchbase/vellum/registry.go36
-rw-r--r--vendor/github.com/couchbase/vellum/utf8/utf8.go98
17 files changed, 1637 insertions, 255 deletions
diff --git a/vendor/github.com/couchbase/vellum/automaton.go b/vendor/github.com/couchbase/vellum/automaton.go
index 47526595bc..70398f2d47 100644
--- a/vendor/github.com/couchbase/vellum/automaton.go
+++ b/vendor/github.com/couchbase/vellum/automaton.go
@@ -81,5 +81,5 @@ func (m *AlwaysMatch) Accept(int, byte) int {
return 0
}
-// creating an alwaysMatchAutomaton to avoid unnecesary repeated allocations.
+// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
var alwaysMatchAutomaton = &AlwaysMatch{}
diff --git a/vendor/github.com/couchbase/vellum/builder.go b/vendor/github.com/couchbase/vellum/builder.go
index b21db98072..f793329575 100644
--- a/vendor/github.com/couchbase/vellum/builder.go
+++ b/vendor/github.com/couchbase/vellum/builder.go
@@ -38,8 +38,7 @@ type Builder struct {
encoder encoder
opts *BuilderOpts
- builderNodePool builderNodePool
- transitionPool transitionPool
+ builderNodePool *builderNodePool
}
const noneAddr = 1
@@ -51,12 +50,14 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
if opts == nil {
opts = defaultBuilderOpts
}
+ builderNodePool := &builderNodePool{}
rv := &Builder{
- registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize),
- opts: opts,
- lastAddr: noneAddr,
+ unfinished: newUnfinishedNodes(builderNodePool),
+ registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize),
+ builderNodePool: builderNodePool,
+ opts: opts,
+ lastAddr: noneAddr,
}
- rv.unfinished = newUnfinishedNodes(&rv.builderNodePool)
var err error
rv.encoder, err = loadEncoder(opts.Encoder, w)
@@ -71,9 +72,7 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
}
func (b *Builder) Reset(w io.Writer) error {
- b.transitionPool.reset()
- b.builderNodePool.reset()
- b.unfinished.Reset(&b.builderNodePool)
+ b.unfinished.Reset()
b.registry.Reset()
b.lastAddr = noneAddr
b.encoder.reset(w)
@@ -107,7 +106,7 @@ func (b *Builder) Insert(key []byte, val uint64) error {
return err
}
b.copyLastKey(key)
- b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool)
+ b.unfinished.addSuffix(key[prefixLen:], out)
return nil
}
@@ -142,7 +141,7 @@ func (b *Builder) compileFrom(iState int) error {
if addr == noneAddr {
node = b.unfinished.popEmpty()
} else {
- node = b.unfinished.popFreeze(addr, &b.transitionPool)
+ node = b.unfinished.popFreeze(addr)
}
var err error
addr, err = b.compile(node)
@@ -150,7 +149,7 @@ func (b *Builder) compileFrom(iState int) error {
return nil
}
}
- b.unfinished.topLastFreeze(addr, &b.transitionPool)
+ b.unfinished.topLastFreeze(addr)
return nil
}
@@ -183,22 +182,25 @@ type unfinishedNodes struct {
// this means calls get() and pushXYZ() must be paired,
// as well as calls put() and popXYZ()
cache []builderNodeUnfinished
+
+ builderNodePool *builderNodePool
}
-func (u *unfinishedNodes) Reset(p *builderNodePool) {
+func (u *unfinishedNodes) Reset() {
u.stack = u.stack[:0]
for i := 0; i < len(u.cache); i++ {
u.cache[i] = builderNodeUnfinished{}
}
- u.pushEmpty(false, p)
+ u.pushEmpty(false)
}
func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes {
rv := &unfinishedNodes{
- stack: make([]*builderNodeUnfinished, 0, 64),
- cache: make([]builderNodeUnfinished, 64),
+ stack: make([]*builderNodeUnfinished, 0, 64),
+ cache: make([]builderNodeUnfinished, 64),
+ builderNodePool: p,
}
- rv.pushEmpty(false, p)
+ rv.pushEmpty(false)
return rv
}
@@ -249,9 +251,9 @@ func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte,
return i, out
}
-func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) {
+func (u *unfinishedNodes) pushEmpty(final bool) {
next := u.get()
- next.node = p.alloc()
+ next.node = u.builderNodePool.Get()
next.node.final = final
u.stack = append(u.stack, next)
}
@@ -265,11 +267,11 @@ func (u *unfinishedNodes) popRoot() *builderNode {
return rv
}
-func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode {
+func (u *unfinishedNodes) popFreeze(addr int) *builderNode {
l := len(u.stack)
var unfinished *builderNodeUnfinished
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
- unfinished.lastCompiled(addr, tp)
+ unfinished.lastCompiled(addr)
rv := unfinished.node
u.put()
return rv
@@ -289,12 +291,12 @@ func (u *unfinishedNodes) setRootOutput(out uint64) {
u.stack[0].node.finalOutput = out
}
-func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) {
+func (u *unfinishedNodes) topLastFreeze(addr int) {
last := len(u.stack) - 1
- u.stack[last].lastCompiled(addr, tp)
+ u.stack[last].lastCompiled(addr)
}
-func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
+func (u *unfinishedNodes) addSuffix(bs []byte, out uint64) {
if len(bs) == 0 {
return
}
@@ -304,13 +306,13 @@ func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
u.stack[last].lastOut = out
for _, b := range bs[1:] {
next := u.get()
- next.node = p.alloc()
+ next.node = u.builderNodePool.Get()
next.hasLastT = true
next.lastIn = b
next.lastOut = 0
u.stack = append(u.stack, next)
}
- u.pushEmpty(true, p)
+ u.pushEmpty(true)
}
type builderNodeUnfinished struct {
@@ -320,17 +322,17 @@ type builderNodeUnfinished struct {
hasLastT bool
}
-func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) {
+func (b *builderNodeUnfinished) lastCompiled(addr int) {
if b.hasLastT {
transIn := b.lastIn
transOut := b.lastOut
b.hasLastT = false
b.lastOut = 0
- trans := tp.alloc()
- trans.in = transIn
- trans.out = transOut
- trans.addr = addr
- b.node.trans = append(b.node.trans, trans)
+ b.node.trans = append(b.node.trans, transition{
+ in: transIn,
+ out: transOut,
+ addr: addr,
+ })
}
}
@@ -338,8 +340,8 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
if b.node.final {
b.node.finalOutput = outputCat(prefix, b.node.finalOutput)
}
- for _, t := range b.node.trans {
- t.out = outputCat(prefix, t.out)
+ for i := range b.node.trans {
+ b.node.trans[i].out = outputCat(prefix, b.node.trans[i].out)
}
if b.hasLastT {
b.lastOut = outputCat(prefix, b.lastOut)
@@ -348,8 +350,22 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
type builderNode struct {
finalOutput uint64
- trans []*transition
+ trans []transition
final bool
+
+ // intrusive linked list
+ next *builderNode
+}
+
+// reset resets the receiver builderNode to a re-usable state.
+func (n *builderNode) reset() {
+ n.final = false
+ n.finalOutput = 0
+ for i := range n.trans {
+ n.trans[i] = emptyTransition
+ }
+ n.trans = n.trans[:0]
+ n.next = nil
}
func (n *builderNode) equiv(o *builderNode) bool {
@@ -377,6 +393,8 @@ func (n *builderNode) equiv(o *builderNode) bool {
return true
}
+var emptyTransition = transition{}
+
type transition struct {
out uint64
addr int
@@ -398,56 +416,37 @@ func outputCat(l, r uint64) uint64 {
return l + r
}
-// the next builderNode to alloc() will be all[nextOuter][nextInner]
+// builderNodePool pools builderNodes using a singly linked list.
+//
+// NB: builderNode lifecylce is described by the following interactions -
+// +------------------------+ +----------------------+
+// | Unfinished Nodes | Transfer once | Registry |
+// |(not frozen builderNode)|-----builderNode is ------->| (frozen builderNode) |
+// +------------------------+ marked frozen +----------------------+
+// ^ |
+// | |
+// | Put()
+// | Get() on +-------------------+ when
+// +-new char--------| builderNode Pool |<-----------evicted
+// +-------------------+
type builderNodePool struct {
- all [][]builderNode
- nextOuter int
- nextInner int
-}
-
-func (p *builderNodePool) reset() {
- p.nextOuter = 0
- p.nextInner = 0
+ head *builderNode
}
-func (p *builderNodePool) alloc() *builderNode {
- if p.nextOuter >= len(p.all) {
- p.all = append(p.all, make([]builderNode, 256))
+func (p *builderNodePool) Get() *builderNode {
+ if p.head == nil {
+ return &builderNode{}
}
- rv := &p.all[p.nextOuter][p.nextInner]
- p.nextInner += 1
- if p.nextInner >= len(p.all[p.nextOuter]) {
- p.nextOuter += 1
- p.nextInner = 0
- }
- rv.finalOutput = 0
- rv.trans = rv.trans[:0]
- rv.final = false
- return rv
+ head := p.head
+ p.head = p.head.next
+ return head
}
-// the next transition to alloc() will be all[nextOuter][nextInner]
-type transitionPool struct {
- all [][]transition
- nextOuter int
- nextInner int
-}
-
-func (p *transitionPool) reset() {
- p.nextOuter = 0
- p.nextInner = 0
-}
-
-func (p *transitionPool) alloc() *transition {
- if p.nextOuter >= len(p.all) {
- p.all = append(p.all, make([]transition, 256))
- }
- rv := &p.all[p.nextOuter][p.nextInner]
- p.nextInner += 1
- if p.nextInner >= len(p.all[p.nextOuter]) {
- p.nextOuter += 1
- p.nextInner = 0
+func (p *builderNodePool) Put(v *builderNode) {
+ if v == nil {
+ return
}
- *rv = transition{}
- return rv
+ v.reset()
+ v.next = p.head
+ p.head = v
}
diff --git a/vendor/github.com/couchbase/vellum/decoder_v1.go b/vendor/github.com/couchbase/vellum/decoder_v1.go
index 5a0ea68871..d56e61db58 100644
--- a/vendor/github.com/couchbase/vellum/decoder_v1.go
+++ b/vendor/github.com/couchbase/vellum/decoder_v1.go
@@ -29,8 +29,6 @@ func init() {
type decoderV1 struct {
data []byte
- root uint64
- len uint64
}
func newDecoderV1(data []byte) *decoderV1 {
@@ -219,7 +217,7 @@ func (f *fstStateV1) Final() bool {
}
func (f *fstStateV1) FinalOutput() uint64 {
- if f.numTrans > 0 && f.final && f.outSize > 0 {
+ if f.final && f.outSize > 0 {
return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
}
return 0
diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go
index ecc528395c..64ee21a410 100644
--- a/vendor/github.com/couchbase/vellum/fst.go
+++ b/vendor/github.com/couchbase/vellum/fst.go
@@ -74,8 +74,8 @@ func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
if err != nil {
return 0, false, err
}
- for i := range input {
- _, curr, output := state.TransitionFor(input[i])
+ for _, c := range input {
+ _, curr, output := state.TransitionFor(c)
if curr == noneAddr {
return 0, false, nil
}
@@ -243,6 +243,52 @@ func (f *FST) Reader() (*Reader, error) {
return &Reader{f: f}, nil
}
+func (f *FST) GetMinKey() ([]byte, error) {
+ var rv []byte
+
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ for !state.Final() {
+ nextTrans := state.TransitionAt(0)
+ _, curr, _ = state.TransitionFor(nextTrans)
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return nil, err
+ }
+
+ rv = append(rv, nextTrans)
+ }
+
+ return rv, nil
+}
+
+func (f *FST) GetMaxKey() ([]byte, error) {
+ var rv []byte
+
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ for state.NumTransitions() > 0 {
+ nextTrans := state.TransitionAt(state.NumTransitions() - 1)
+ _, curr, _ = state.TransitionFor(nextTrans)
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return nil, err
+ }
+
+ rv = append(rv, nextTrans)
+ }
+
+ return rv, nil
+}
+
// A Reader is meant for a single threaded use
type Reader struct {
f *FST
diff --git a/vendor/github.com/couchbase/vellum/fst_iterator.go b/vendor/github.com/couchbase/vellum/fst_iterator.go
index 389ac64aab..eb731395b2 100644
--- a/vendor/github.com/couchbase/vellum/fst_iterator.go
+++ b/vendor/github.com/couchbase/vellum/fst_iterator.go
@@ -76,7 +76,8 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
// Reset resets the Iterator' internal state to allow for iterator
// reuse (e.g. pooling).
-func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
+func (i *FSTIterator) Reset(f *FST,
+ startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
if aut == nil {
aut = alwaysMatchAutomaton
}
@@ -91,14 +92,14 @@ func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, a
// pointTo attempts to point us to the specified location
func (i *FSTIterator) pointTo(key []byte) error {
-
// tried to seek before start
if bytes.Compare(key, i.startKeyInclusive) < 0 {
key = i.startKeyInclusive
}
- // trid to see past end
- if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 {
+ // tried to see past end
+ if i.endKeyExclusive != nil &&
+ bytes.Compare(key, i.endKeyExclusive) > 0 {
key = i.endKeyExclusive
}
@@ -121,21 +122,23 @@ func (i *FSTIterator) pointTo(key []byte) error {
i.statesStack = append(i.statesStack, root)
i.autStatesStack = append(i.autStatesStack, autStart)
for j := 0; j < len(key); j++ {
+ keyJ := key[j]
curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
- pos, nextAddr, nextVal := curr.TransitionFor(key[j])
+ pos, nextAddr, nextVal := curr.TransitionFor(keyJ)
if nextAddr == noneAddr {
// needed transition doesn't exist
// find last trans before the one we needed
- for q := 0; q < curr.NumTransitions(); q++ {
- if curr.TransitionAt(q) < key[j] {
+ for q := curr.NumTransitions() - 1; q >= 0; q-- {
+ if curr.TransitionAt(q) < keyJ {
maxQ = q
+ break
}
}
break
}
- autNext := i.aut.Accept(autCurr, key[j])
+ autNext := i.aut.Accept(autCurr, keyJ)
next, err := i.f.decoder.stateAt(nextAddr, nil)
if err != nil {
@@ -143,14 +146,16 @@ func (i *FSTIterator) pointTo(key []byte) error {
}
i.statesStack = append(i.statesStack, next)
- i.keysStack = append(i.keysStack, key[j])
+ i.keysStack = append(i.keysStack, keyJ)
i.keysPosStack = append(i.keysPosStack, pos)
i.valsStack = append(i.valsStack, nextVal)
i.autStatesStack = append(i.autStatesStack, autNext)
continue
}
- if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 {
+ if !i.statesStack[len(i.statesStack)-1].Final() ||
+ !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
+ bytes.Compare(i.keysStack, key) < 0 {
return i.next(maxQ)
}
@@ -181,15 +186,12 @@ func (i *FSTIterator) Next() error {
}
func (i *FSTIterator) next(lastOffset int) error {
-
// remember where we started
- if cap(i.nextStart) < len(i.keysStack) {
- i.nextStart = make([]byte, len(i.keysStack))
- } else {
- i.nextStart = i.nextStart[0:len(i.keysStack)]
- }
- copy(i.nextStart, i.keysStack)
+ i.nextStart = append(i.nextStart[:0], i.keysStack...)
+ nextOffset := lastOffset + 1
+
+OUTER:
for true {
curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
@@ -200,58 +202,62 @@ func (i *FSTIterator) next(lastOffset int) error {
return nil
}
- nextOffset := lastOffset + 1
- if nextOffset < curr.NumTransitions() {
+ numTrans := curr.NumTransitions()
+
+ INNER:
+ for nextOffset < numTrans {
t := curr.TransitionAt(nextOffset)
autNext := i.aut.Accept(autCurr, t)
- if i.aut.CanMatch(autNext) {
- pos, nextAddr, v := curr.TransitionFor(t)
-
- // the next slot in the statesStack might have an
- // fstState instance that we can reuse
- var nextPrealloc fstState
- if len(i.statesStack) < cap(i.statesStack) {
- nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
- }
+ if !i.aut.CanMatch(autNext) {
+ nextOffset += 1
+ continue INNER
+ }
- // push onto stack
- next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
- if err != nil {
- return err
- }
- i.statesStack = append(i.statesStack, next)
- i.keysStack = append(i.keysStack, t)
- i.keysPosStack = append(i.keysPosStack, pos)
- i.valsStack = append(i.valsStack, v)
- i.autStatesStack = append(i.autStatesStack, autNext)
- lastOffset = -1
-
- // check to see if new keystack might have gone too far
- if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
- return ErrIteratorDone
- }
- } else {
- lastOffset = nextOffset
+ pos, nextAddr, v := curr.TransitionFor(t)
+
+ // the next slot in the statesStack might have an
+ // fstState instance that we can reuse
+ var nextPrealloc fstState
+ if len(i.statesStack) < cap(i.statesStack) {
+ nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
}
- continue
+ // push onto stack
+ next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
+ if err != nil {
+ return err
+ }
+
+ i.statesStack = append(i.statesStack, next)
+ i.keysStack = append(i.keysStack, t)
+ i.keysPosStack = append(i.keysPosStack, pos)
+ i.valsStack = append(i.valsStack, v)
+ i.autStatesStack = append(i.autStatesStack, autNext)
+
+ // check to see if new keystack might have gone too far
+ if i.endKeyExclusive != nil &&
+ bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
+ return ErrIteratorDone
+ }
+
+ nextOffset = 0
+ continue OUTER
}
- if len(i.statesStack) > 1 {
- // no transitions, and still room to pop
- i.statesStack = i.statesStack[:len(i.statesStack)-1]
- i.keysStack = i.keysStack[:len(i.keysStack)-1]
- lastOffset = i.keysPosStack[len(i.keysPosStack)-1]
-
- i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
- i.valsStack = i.valsStack[:len(i.valsStack)-1]
- i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
- continue
- } else {
+ if len(i.statesStack) <= 1 {
// stack len is 1 (root), can't go back further, we're done
break
}
+ // no transitions, and still room to pop
+ i.statesStack = i.statesStack[:len(i.statesStack)-1]
+ i.keysStack = i.keysStack[:len(i.keysStack)-1]
+
+ nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1
+
+ i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
+ i.valsStack = i.valsStack[:len(i.valsStack)-1]
+ i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
}
return ErrIteratorDone
@@ -262,15 +268,12 @@ func (i *FSTIterator) next(lastOffset int) error {
// seek operation would go past the last key, or outside the configured
// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
func (i *FSTIterator) Seek(key []byte) error {
- err := i.pointTo(key)
- if err != nil {
- return err
- }
- return nil
+ return i.pointTo(key)
}
// Close will free any resources held by this iterator.
func (i *FSTIterator) Close() error {
- // at the moment we don't do anything, but wanted this for API completeness
+ // at the moment we don't do anything,
+ // but wanted this for API completeness
return nil
}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE b/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
new file mode 100644
index 0000000000..6b0b1270ff
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
new file mode 100644
index 0000000000..4bf64fef2e
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
@@ -0,0 +1,125 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "fmt"
+ "sort"
+ "unicode/utf8"
+)
+
+type FullCharacteristicVector []uint32
+
+func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
+ bucketID := offset / 32
+ align := offset - bucketID*32
+ if align == 0 {
+ return fcv[bucketID] & mask
+ }
+ left := fcv[bucketID] >> align
+ right := fcv[bucketID+1] << (32 - align)
+ return (left | right) & mask
+}
+
+type tuple struct {
+ char rune
+ fcv FullCharacteristicVector
+}
+
+type sortRunes []rune
+
+func (s sortRunes) Less(i, j int) bool {
+ return s[i] < s[j]
+}
+
+func (s sortRunes) Swap(i, j int) {
+ s[i], s[j] = s[j], s[i]
+}
+
+func (s sortRunes) Len() int {
+ return len(s)
+}
+
+func sortRune(r []rune) []rune {
+ sort.Sort(sortRunes(r))
+ return r
+}
+
+type Alphabet struct {
+ charset []tuple
+ index uint32
+}
+
+func (a *Alphabet) resetNext() {
+ a.index = 0
+}
+
+func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
+ if int(a.index) >= len(a.charset) {
+ return 0, nil, fmt.Errorf("eof")
+ }
+
+ rv := a.charset[a.index]
+ a.index++
+ return rv.char, rv.fcv, nil
+}
+
+func dedupe(in string) string {
+ lookUp := make(map[rune]struct{}, len(in))
+ var rv string
+ for len(in) > 0 {
+ r, size := utf8.DecodeRuneInString(in)
+ in = in[size:]
+ if _, ok := lookUp[r]; !ok {
+ rv += string(r)
+ lookUp[r] = struct{}{}
+ }
+ }
+ return rv
+}
+
+func queryChars(qChars string) Alphabet {
+ chars := dedupe(qChars)
+ inChars := sortRune([]rune(chars))
+ charsets := make([]tuple, 0, len(inChars))
+
+ for _, c := range inChars {
+ tempChars := qChars
+ var bits []uint32
+ for len(tempChars) > 0 {
+ var chunk string
+ if len(tempChars) > 32 {
+ chunk = tempChars[0:32]
+ tempChars = tempChars[32:]
+ } else {
+ chunk = tempChars
+ tempChars = tempChars[:0]
+ }
+
+ chunkBits := uint32(0)
+ bit := uint32(1)
+ for _, chr := range chunk {
+ if chr == c {
+ chunkBits |= bit
+ }
+ bit <<= 1
+ }
+ bits = append(bits, chunkBits)
+ }
+ bits = append(bits, 0)
+ charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
+ }
+ return Alphabet{charset: charsets}
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
new file mode 100644
index 0000000000..e82a780a52
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
@@ -0,0 +1,250 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "fmt"
+ "math"
+)
+
+const SinkState = uint32(0)
+
+type DFA struct {
+ transitions [][256]uint32
+ distances []Distance
+ initState int
+ ed uint8
+}
+
+/// Returns the initial state
+func (d *DFA) initialState() int {
+ return d.initState
+}
+
+/// Returns the Levenshtein distance associated to the
+/// current state.
+func (d *DFA) distance(stateId int) Distance {
+ return d.distances[stateId]
+}
+
+/// Returns the number of states in the `DFA`.
+func (d *DFA) numStates() int {
+ return len(d.transitions)
+}
+
+/// Returns the destination state reached after consuming a given byte.
+func (d *DFA) transition(fromState int, b uint8) int {
+ return int(d.transitions[fromState][b])
+}
+
+func (d *DFA) eval(bytes []uint8) Distance {
+ state := d.initialState()
+
+ for _, b := range bytes {
+ state = d.transition(state, b)
+ }
+
+ return d.distance(state)
+}
+
+func (d *DFA) Start() int {
+ return int(d.initialState())
+}
+
+func (d *DFA) IsMatch(state int) bool {
+ if _, ok := d.distance(state).(Exact); ok {
+ return true
+ }
+ return false
+}
+
+func (d *DFA) CanMatch(state int) bool {
+ return state > 0 && state < d.numStates()
+}
+
+func (d *DFA) Accept(state int, b byte) int {
+ return int(d.transition(state, b))
+}
+
+// WillAlwaysMatch returns if the specified state will always end in a
+// matching state.
+func (d *DFA) WillAlwaysMatch(state int) bool {
+ return false
+}
+
+func fill(dest []uint32, val uint32) {
+ for i := range dest {
+ dest[i] = val
+ }
+}
+
+func fillTransitions(dest *[256]uint32, val uint32) {
+ for i := range dest {
+ dest[i] = val
+ }
+}
+
+type Utf8DFAStateBuilder struct {
+ dfaBuilder *Utf8DFABuilder
+ stateID uint32
+ defaultSuccessor []uint32
+}
+
+func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
+ toStateID uint32) {
+ sb.dfaBuilder.transitions[fromStateID][b] = toStateID
+}
+
+func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
+ fromStateID := sb.stateID
+ chars := []byte(string(in))
+ lastByte := chars[len(chars)-1]
+
+ for i, ch := range chars[:len(chars)-1] {
+ remNumBytes := len(chars) - i - 1
+ defaultSuccessor := sb.defaultSuccessor[remNumBytes]
+ intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
+
+ if intermediateStateID == defaultSuccessor {
+ intermediateStateID = sb.dfaBuilder.allocate()
+ fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
+ sb.defaultSuccessor[remNumBytes-1])
+ }
+
+ sb.addTransitionID(fromStateID, ch, intermediateStateID)
+ fromStateID = intermediateStateID
+ }
+
+ toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
+ sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
+}
+
+type Utf8StateId uint32
+
+func original(stateId uint32) Utf8StateId {
+ return predecessor(stateId, 0)
+}
+
+func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
+ return Utf8StateId(stateId*4 + uint32(numSteps))
+}
+
+// Utf8DFABuilder makes it possible to define a DFA
+// that takes unicode character, and build a `DFA`
+// that operates on utf-8 encoded
+type Utf8DFABuilder struct {
+ index []uint32
+ distances []Distance
+ transitions [][256]uint32
+ initialState uint32
+ numStates uint32
+ maxNumStates uint32
+}
+
+func withMaxStates(maxStates uint32) *Utf8DFABuilder {
+ rv := &Utf8DFABuilder{
+ index: make([]uint32, maxStates*2+100),
+ distances: make([]Distance, 0, maxStates),
+ transitions: make([][256]uint32, 0, maxStates),
+ maxNumStates: maxStates,
+ }
+
+ for i := range rv.index {
+ rv.index[i] = math.MaxUint32
+ }
+
+ return rv
+}
+
+func (dfab *Utf8DFABuilder) allocate() uint32 {
+ newState := dfab.numStates
+ dfab.numStates++
+
+ dfab.distances = append(dfab.distances, Atleast{d: 255})
+ dfab.transitions = append(dfab.transitions, [256]uint32{})
+
+ return newState
+}
+
+func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
+ if int(state) >= cap(dfab.index) {
+ cloneIndex := make([]uint32, int(state)*2)
+ copy(cloneIndex, dfab.index)
+ dfab.index = cloneIndex
+ }
+ if dfab.index[state] != math.MaxUint32 {
+ return dfab.index[state]
+ }
+
+ nstate := dfab.allocate()
+ dfab.index[state] = nstate
+
+ return nstate
+}
+
+func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
+ decodedID := dfab.getOrAllocate(original(iState))
+ dfab.initialState = decodedID
+}
+
+func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
+ return &DFA{
+ transitions: dfab.transitions,
+ distances: dfab.distances,
+ initState: int(dfab.initialState),
+ ed: ed,
+ }
+}
+
+func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
+ distance Distance) (*Utf8DFAStateBuilder, error) {
+ if state > dfab.maxNumStates {
+ return nil, fmt.Errorf("State id is larger than maxNumStates")
+ }
+
+ stateID := dfab.getOrAllocate(original(state))
+ dfab.distances[stateID] = distance
+
+ defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
+ // creates a chain of states of predecessors of `default_suc_orig`.
+ // Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
+ // leads to the `default_suc_orig` state.
+ predecessorStates := []uint32{defaultSuccID,
+ defaultSuccID,
+ defaultSuccID,
+ defaultSuccID}
+
+ for numBytes := uint8(1); numBytes < 4; numBytes++ {
+ predecessorState := predecessor(default_suc_orig, numBytes)
+ predecessorStateID := dfab.getOrAllocate(predecessorState)
+ predecessorStates[numBytes] = predecessorStateID
+ succ := predecessorStates[numBytes-1]
+ fillTransitions(&dfab.transitions[predecessorStateID], succ)
+ }
+
+ // 1-byte encoded chars.
+ fill(dfab.transitions[stateID][0:192], predecessorStates[0])
+ // 2-bytes encoded chars.
+ fill(dfab.transitions[stateID][192:224], predecessorStates[1])
+ // 3-bytes encoded chars.
+ fill(dfab.transitions[stateID][224:240], predecessorStates[2])
+ // 4-bytes encoded chars.
+ fill(dfab.transitions[stateID][240:256], predecessorStates[3])
+
+ return &Utf8DFAStateBuilder{
+ dfaBuilder: dfab,
+ stateID: stateID,
+ defaultSuccessor: predecessorStates}, nil
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
new file mode 100644
index 0000000000..1ca0aaa65b
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
@@ -0,0 +1,64 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import "fmt"
+
+// StateLimit is the maximum number of states allowed
+const StateLimit = 10000
+
+// ErrTooManyStates is returned if you attempt to build a Levenshtein
+// automaton which requires too many states.
+var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
+ StateLimit)
+
+// LevenshteinAutomatonBuilder wraps a precomputed
+// datastructure that allows to produce small (but not minimal) DFA.
+type LevenshteinAutomatonBuilder struct {
+ pDfa *ParametricDFA
+}
+
+// NewLevenshteinAutomatonBuilder creates a
+// reusable, threadsafe Levenshtein automaton builder.
+// `maxDistance` - maximum distance considered by the automaton.
+// `transposition` - assign a distance of 1 for transposition
+//
+// Building this automaton builder is computationally intensive.
+// While it takes only a few milliseconds for `d=2`, it grows
+// exponentially with `d`. It is only reasonable to `d <= 5`.
+func NewLevenshteinAutomatonBuilder(maxDistance uint8,
+ transposition bool) (*LevenshteinAutomatonBuilder, error) {
+ lnfa := newLevenshtein(maxDistance, transposition)
+
+ pdfa, err := fromNfa(lnfa)
+ if err != nil {
+ return nil, err
+ }
+
+ return &LevenshteinAutomatonBuilder{pDfa: pdfa}, nil
+}
+
+// BuildDfa builds the levenshtein automaton for serving
+// queries with a given edit distance.
+func (lab *LevenshteinAutomatonBuilder) BuildDfa(query string,
+ fuzziness uint8) (*DFA, error) {
+ return lab.pDfa.buildDfa(query, fuzziness, false)
+}
+
+// MaxDistance returns the MaxEdit distance supported by the
+// LevenshteinAutomatonBuilder builder.
+func (lab *LevenshteinAutomatonBuilder) MaxDistance() uint8 {
+ return lab.pDfa.maxDistance
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
new file mode 100644
index 0000000000..bed9b99d56
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
@@ -0,0 +1,292 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "math"
+ "sort"
+)
+
+/// Levenshtein Distance computed by a Levenshtein Automaton.
+///
+/// Levenshtein automata can only compute the exact Levenshtein distance
+/// up to a given `max_distance`.
+///
+/// Over this distance, the automaton will invariably
+/// return `Distance::AtLeast(max_distance + 1)`.
+type Distance interface {
+ distance() uint8
+}
+
+type Exact struct {
+ d uint8
+}
+
+func (e Exact) distance() uint8 {
+ return e.d
+}
+
+type Atleast struct {
+ d uint8
+}
+
+func (a Atleast) distance() uint8 {
+ return a.d
+}
+
+func characteristicVector(query []rune, c rune) uint64 {
+ chi := uint64(0)
+ for i := 0; i < len(query); i++ {
+ if query[i] == c {
+ chi |= 1 << uint64(i)
+ }
+ }
+ return chi
+}
+
+type NFAState struct {
+ Offset uint32
+ Distance uint8
+ InTranspose bool
+}
+
+type NFAStates []NFAState
+
+func (ns NFAStates) Len() int {
+ return len(ns)
+}
+
+func (ns NFAStates) Less(i, j int) bool {
+ if ns[i].Offset != ns[j].Offset {
+ return ns[i].Offset < ns[j].Offset
+ }
+
+ if ns[i].Distance != ns[j].Distance {
+ return ns[i].Distance < ns[j].Distance
+ }
+
+ return !ns[i].InTranspose && ns[j].InTranspose
+}
+
+func (ns NFAStates) Swap(i, j int) {
+ ns[i], ns[j] = ns[j], ns[i]
+}
+
+func (ns *NFAState) imply(other NFAState) bool {
+ transposeImply := ns.InTranspose
+ if !other.InTranspose {
+ transposeImply = !other.InTranspose
+ }
+
+ deltaOffset := ns.Offset - other.Offset
+ if ns.Offset < other.Offset {
+ deltaOffset = other.Offset - ns.Offset
+ }
+
+ if transposeImply {
+ return uint32(other.Distance) >= (uint32(ns.Distance) + deltaOffset)
+ }
+
+ return uint32(other.Distance) > (uint32(ns.Distance) + deltaOffset)
+}
+
+type MultiState struct {
+ states []NFAState
+}
+
+func (ms *MultiState) States() []NFAState {
+ return ms.states
+}
+
+func (ms *MultiState) Clear() {
+ ms.states = ms.states[:0]
+}
+
+func newMultiState() *MultiState {
+ return &MultiState{states: make([]NFAState, 0)}
+}
+
+func (ms *MultiState) normalize() uint32 {
+ minOffset := uint32(math.MaxUint32)
+
+ for _, s := range ms.states {
+ if s.Offset < minOffset {
+ minOffset = s.Offset
+ }
+ }
+ if minOffset == uint32(math.MaxUint32) {
+ minOffset = 0
+ }
+
+ for i := 0; i < len(ms.states); i++ {
+ ms.states[i].Offset -= minOffset
+ }
+
+ sort.Sort(NFAStates(ms.states))
+
+ return minOffset
+}
+
+func (ms *MultiState) addStates(nState NFAState) {
+
+ for _, s := range ms.states {
+ if s.imply(nState) {
+ return
+ }
+ }
+
+ i := 0
+ for i < len(ms.states) {
+ if nState.imply(ms.states[i]) {
+ ms.states = append(ms.states[:i], ms.states[i+1:]...)
+ } else {
+ i++
+ }
+ }
+ ms.states = append(ms.states, nState)
+
+}
+
+func extractBit(bitset uint64, pos uint8) bool {
+ shift := bitset >> pos
+ bit := shift & 1
+ return bit == uint64(1)
+}
+
+func dist(left, right uint32) uint32 {
+ if left > right {
+ return left - right
+ }
+ return right - left
+}
+
+type LevenshteinNFA struct {
+ mDistance uint8
+ damerau bool
+}
+
+func newLevenshtein(maxD uint8, transposition bool) *LevenshteinNFA {
+ return &LevenshteinNFA{mDistance: maxD,
+ damerau: transposition,
+ }
+}
+
+func (la *LevenshteinNFA) maxDistance() uint8 {
+ return la.mDistance
+}
+
+func (la *LevenshteinNFA) msDiameter() uint8 {
+ return 2*la.mDistance + 1
+}
+
+func (la *LevenshteinNFA) initialStates() *MultiState {
+ ms := MultiState{}
+ nfaState := NFAState{}
+ ms.addStates(nfaState)
+ return &ms
+}
+
+func (la *LevenshteinNFA) multistateDistance(ms *MultiState,
+ queryLen uint32) Distance {
+ minDistance := Atleast{d: la.mDistance + 1}
+ for _, s := range ms.states {
+ t := s.Distance + uint8(dist(queryLen, s.Offset))
+ if t <= uint8(la.mDistance) {
+ if minDistance.distance() > t {
+ minDistance.d = t
+ }
+ }
+ }
+
+ if minDistance.distance() == la.mDistance+1 {
+ return Atleast{d: la.mDistance + 1}
+ }
+
+ return minDistance
+}
+
+func (la *LevenshteinNFA) simpleTransition(state NFAState,
+ symbol uint64, ms *MultiState) {
+
+ if state.Distance < la.mDistance {
+ // insertion
+ ms.addStates(NFAState{Offset: state.Offset,
+ Distance: state.Distance + 1,
+ InTranspose: false})
+
+ // substitution
+ ms.addStates(NFAState{Offset: state.Offset + 1,
+ Distance: state.Distance + 1,
+ InTranspose: false})
+
+ n := la.mDistance + 1 - state.Distance
+ for d := uint8(1); d < n; d++ {
+ if extractBit(symbol, d) {
+ // for d > 0, as many deletion and character match
+ ms.addStates(NFAState{Offset: state.Offset + 1 + uint32(d),
+ Distance: state.Distance + d,
+ InTranspose: false})
+ }
+ }
+
+ if la.damerau && extractBit(symbol, 1) {
+ ms.addStates(NFAState{
+ Offset: state.Offset,
+ Distance: state.Distance + 1,
+ InTranspose: true})
+ }
+
+ }
+
+ if extractBit(symbol, 0) {
+ ms.addStates(NFAState{Offset: state.Offset + 1,
+ Distance: state.Distance,
+ InTranspose: false})
+ }
+
+ if state.InTranspose && extractBit(symbol, 0) {
+ ms.addStates(NFAState{Offset: state.Offset + 2,
+ Distance: state.Distance,
+ InTranspose: false})
+ }
+
+}
+
+func (la *LevenshteinNFA) transition(cState *MultiState,
+ dState *MultiState, scv uint64) {
+ dState.Clear()
+ mask := (uint64(1) << la.msDiameter()) - uint64(1)
+
+ for _, state := range cState.states {
+ cv := (scv >> state.Offset) & mask
+ la.simpleTransition(state, cv, dState)
+ }
+
+ sort.Sort(NFAStates(dState.states))
+}
+
+func (la *LevenshteinNFA) computeDistance(query, other []rune) Distance {
+ cState := la.initialStates()
+ nState := newMultiState()
+
+ for _, i := range other {
+ nState.Clear()
+ chi := characteristicVector(query, i)
+ la.transition(cState, nState, chi)
+ cState, nState = nState, cState
+ }
+
+ return la.multistateDistance(cState, uint32(len(query)))
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
new file mode 100644
index 0000000000..ebd9311959
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "crypto/md5"
+ "encoding/json"
+ "fmt"
+ "math"
+)
+
+type ParametricState struct {
+ shapeID uint32
+ offset uint32
+}
+
+func newParametricState() ParametricState {
+ return ParametricState{}
+}
+
+func (ps *ParametricState) isDeadEnd() bool {
+ return ps.shapeID == 0
+}
+
+type Transition struct {
+ destShapeID uint32
+ deltaOffset uint32
+}
+
+func (t *Transition) apply(state ParametricState) ParametricState {
+ ps := ParametricState{
+ shapeID: t.destShapeID}
+ // don't need any offset if we are in the dead state,
+ // this ensures we have only one dead state.
+ if t.destShapeID != 0 {
+ ps.offset = state.offset + t.deltaOffset
+ }
+
+ return ps
+}
+
+type ParametricStateIndex struct {
+ stateIndex []uint32
+ stateQueue []ParametricState
+ numOffsets uint32
+}
+
+func newParametricStateIndex(queryLen,
+ numParamState uint32) ParametricStateIndex {
+ numOffsets := queryLen + 1
+ if numParamState == 0 {
+ numParamState = numOffsets
+ }
+ maxNumStates := numParamState * numOffsets
+ psi := ParametricStateIndex{
+ stateIndex: make([]uint32, maxNumStates),
+ stateQueue: make([]ParametricState, 0, 150),
+ numOffsets: numOffsets,
+ }
+
+ for i := uint32(0); i < maxNumStates; i++ {
+ psi.stateIndex[i] = math.MaxUint32
+ }
+ return psi
+}
+
+func (psi *ParametricStateIndex) numStates() int {
+ return len(psi.stateQueue)
+}
+
+func (psi *ParametricStateIndex) maxNumStates() int {
+ return len(psi.stateIndex)
+}
+
+func (psi *ParametricStateIndex) get(stateID uint32) ParametricState {
+ return psi.stateQueue[stateID]
+}
+
+func (psi *ParametricStateIndex) getOrAllocate(ps ParametricState) uint32 {
+ bucket := ps.shapeID*psi.numOffsets + ps.offset
+ if bucket < uint32(len(psi.stateIndex)) &&
+ psi.stateIndex[bucket] != math.MaxUint32 {
+ return psi.stateIndex[bucket]
+ }
+ nState := uint32(len(psi.stateQueue))
+ psi.stateQueue = append(psi.stateQueue, ps)
+
+ psi.stateIndex[bucket] = nState
+ return nState
+}
+
+type ParametricDFA struct {
+ distance []uint8
+ transitions []Transition
+ maxDistance uint8
+ transitionStride uint32
+ diameter uint32
+}
+
+func (pdfa *ParametricDFA) initialState() ParametricState {
+ return ParametricState{shapeID: 1}
+}
+
+// Returns true iff whatever characters come afterward,
+// we will never reach a shorter distance
+func (pdfa *ParametricDFA) isPrefixSink(state ParametricState, queryLen uint32) bool {
+ if state.isDeadEnd() {
+ return true
+ }
+
+ remOffset := queryLen - state.offset
+ if remOffset < pdfa.diameter {
+ stateDistances := pdfa.distance[pdfa.diameter*state.shapeID:]
+ prefixDistance := stateDistances[remOffset]
+ if prefixDistance > pdfa.maxDistance {
+ return false
+ }
+
+ for _, d := range stateDistances {
+ if d < prefixDistance {
+ return false
+ }
+ }
+ return true
+ }
+ return false
+}
+
+func (pdfa *ParametricDFA) numStates() int {
+ return len(pdfa.transitions) / int(pdfa.transitionStride)
+}
+
+func min(x, y uint32) uint32 {
+ if x < y {
+ return x
+ }
+ return y
+}
+
+func (pdfa *ParametricDFA) transition(state ParametricState,
+ chi uint32) Transition {
+ return pdfa.transitions[pdfa.transitionStride*state.shapeID+chi]
+}
+
+func (pdfa *ParametricDFA) getDistance(state ParametricState,
+ qLen uint32) Distance {
+ remainingOffset := qLen - state.offset
+ if state.isDeadEnd() || remainingOffset >= pdfa.diameter {
+ return Atleast{d: pdfa.maxDistance + 1}
+ }
+ dist := pdfa.distance[int(pdfa.diameter*state.shapeID)+int(remainingOffset)]
+ if dist > pdfa.maxDistance {
+ return Atleast{d: dist}
+ }
+ return Exact{d: dist}
+}
+
+func (pdfa *ParametricDFA) computeDistance(left, right string) Distance {
+ state := pdfa.initialState()
+ leftChars := []rune(left)
+ for _, chr := range []rune(right) {
+ start := state.offset
+ stop := min(start+pdfa.diameter, uint32(len(leftChars)))
+ chi := characteristicVector(leftChars[start:stop], chr)
+ transition := pdfa.transition(state, uint32(chi))
+ state = transition.apply(state)
+ if state.isDeadEnd() {
+ return Atleast{d: pdfa.maxDistance + 1}
+ }
+ }
+ return pdfa.getDistance(state, uint32(len(left)))
+}
+
+func (pdfa *ParametricDFA) buildDfa(query string, distance uint8,
+ prefix bool) (*DFA, error) {
+ qLen := uint32(len([]rune(query)))
+ alphabet := queryChars(query)
+
+ psi := newParametricStateIndex(qLen, uint32(pdfa.numStates()))
+ maxNumStates := psi.maxNumStates()
+ deadEndStateID := psi.getOrAllocate(newParametricState())
+ if deadEndStateID != 0 {
+ return nil, fmt.Errorf("Invalid dead end state")
+ }
+
+ initialStateID := psi.getOrAllocate(pdfa.initialState())
+ dfaBuilder := withMaxStates(uint32(maxNumStates))
+ mask := uint32((1 << pdfa.diameter) - 1)
+
+ var stateID int
+ for stateID = 0; stateID < StateLimit; stateID++ {
+ if stateID == psi.numStates() {
+ break
+ }
+ state := psi.get(uint32(stateID))
+ if prefix && pdfa.isPrefixSink(state, qLen) {
+ distance := pdfa.getDistance(state, qLen)
+ dfaBuilder.addState(uint32(stateID), uint32(stateID), distance)
+ } else {
+ transition := pdfa.transition(state, 0)
+ defSuccessor := transition.apply(state)
+ defSuccessorID := psi.getOrAllocate(defSuccessor)
+ distance := pdfa.getDistance(state, qLen)
+ stateBuilder, err := dfaBuilder.addState(uint32(stateID), defSuccessorID, distance)
+
+ if err != nil {
+ return nil, fmt.Errorf("parametric_dfa: buildDfa, err: %v", err)
+ }
+
+ alphabet.resetNext()
+ chr, cv, err := alphabet.next()
+ for err == nil {
+ chi := cv.shiftAndMask(state.offset, mask)
+
+ transition := pdfa.transition(state, chi)
+
+ destState := transition.apply(state)
+
+ destStateID := psi.getOrAllocate(destState)
+
+ stateBuilder.addTransition(chr, destStateID)
+
+ chr, cv, err = alphabet.next()
+ }
+ }
+ }
+
+ if stateID == StateLimit {
+ return nil, ErrTooManyStates
+ }
+
+ dfaBuilder.setInitialState(initialStateID)
+ return dfaBuilder.build(distance), nil
+}
+
+func fromNfa(nfa *LevenshteinNFA) (*ParametricDFA, error) {
+ lookUp := newHash()
+ lookUp.getOrAllocate(*newMultiState())
+ initialState := nfa.initialStates()
+ lookUp.getOrAllocate(*initialState)
+
+ maxDistance := nfa.maxDistance()
+ msDiameter := nfa.msDiameter()
+
+ numChi := 1 << msDiameter
+ chiValues := make([]uint64, numChi)
+ for i := 0; i < numChi; i++ {
+ chiValues[i] = uint64(i)
+ }
+
+ transitions := make([]Transition, 0, numChi*int(msDiameter))
+ var stateID int
+ for stateID = 0; stateID < StateLimit; stateID++ {
+ if stateID == len(lookUp.items) {
+ break
+ }
+
+ for _, chi := range chiValues {
+ destMs := newMultiState()
+
+ ms := lookUp.getFromID(stateID)
+
+ nfa.transition(ms, destMs, chi)
+
+ translation := destMs.normalize()
+
+ destID := lookUp.getOrAllocate(*destMs)
+
+ transitions = append(transitions, Transition{
+ destShapeID: uint32(destID),
+ deltaOffset: translation,
+ })
+ }
+ }
+
+ if stateID == StateLimit {
+ return nil, ErrTooManyStates
+ }
+
+ ns := len(lookUp.items)
+ diameter := int(msDiameter)
+
+ distances := make([]uint8, 0, diameter*ns)
+ for stateID := 0; stateID < ns; stateID++ {
+ ms := lookUp.getFromID(stateID)
+ for offset := 0; offset < diameter; offset++ {
+ dist := nfa.multistateDistance(ms, uint32(offset))
+ distances = append(distances, dist.distance())
+ }
+ }
+
+ return &ParametricDFA{
+ diameter: uint32(msDiameter),
+ transitions: transitions,
+ maxDistance: maxDistance,
+ transitionStride: uint32(numChi),
+ distance: distances,
+ }, nil
+}
+
+type hash struct {
+ index map[[16]byte]int
+ items []MultiState
+}
+
+func newHash() *hash {
+ return &hash{
+ index: make(map[[16]byte]int, 100),
+ items: make([]MultiState, 0, 100),
+ }
+}
+
+func (h *hash) getOrAllocate(m MultiState) int {
+ size := len(h.items)
+ var exists bool
+ var pos int
+ md5 := getHash(&m)
+ if pos, exists = h.index[md5]; !exists {
+ h.index[md5] = size
+ pos = size
+ h.items = append(h.items, m)
+ }
+ return pos
+}
+
+func (h *hash) getFromID(id int) *MultiState {
+ return &h.items[id]
+}
+
+func getHash(ms *MultiState) [16]byte {
+ msBytes := []byte{}
+ for _, state := range ms.states {
+ jsonBytes, _ := json.Marshal(&state)
+ msBytes = append(msBytes, jsonBytes...)
+ }
+ return md5.Sum(msBytes)
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go
index 6922b749db..55280164c7 100644
--- a/vendor/github.com/couchbase/vellum/regexp/compile.go
+++ b/vendor/github.com/couchbase/vellum/regexp/compile.go
@@ -18,17 +18,27 @@ import (
"regexp/syntax"
"unicode"
+ unicode_utf8 "unicode/utf8"
+
"github.com/couchbase/vellum/utf8"
)
type compiler struct {
sizeLimit uint
insts prog
+ instsPool []inst
+
+ sequences utf8.Sequences
+ rangeStack utf8.RangeStack
+ startBytes []byte
+ endBytes []byte
}
func newCompiler(sizeLimit uint) *compiler {
return &compiler{
- sizeLimit: sizeLimit,
+ sizeLimit: sizeLimit,
+ startBytes: make([]byte, unicode_utf8.UTFMax),
+ endBytes: make([]byte, unicode_utf8.UTFMax),
}
}
@@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
if err != nil {
return nil, err
}
- c.insts = append(c.insts, &inst{
- op: OpMatch,
- })
+ inst := c.allocInst()
+ inst.op = OpMatch
+ c.insts = append(c.insts, inst)
return c.insts, nil
}
-func (c *compiler) c(ast *syntax.Regexp) error {
+func (c *compiler) c(ast *syntax.Regexp) (err error) {
if ast.Flags&syntax.NonGreedy > 1 {
return ErrNoLazy
}
@@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error {
next.Rune = next.Rune0[0:2]
return c.c(&next)
}
- seqs, err := utf8.NewSequences(r, r)
+ c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+ r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
- for _, seq := range seqs {
+ for _, seq := range c.sequences {
c.compileUtf8Ranges(seq)
}
}
@@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error {
if len(ast.Sub) == 0 {
return nil
}
- jmpsToEnd := []uint{}
-
+ jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
// does not handle last entry
for i := 0; i < len(ast.Sub)-1; i++ {
sub := ast.Sub[i]
@@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error {
return err
}
}
- var splits, starts []uint
+ splits := make([]uint, 0, ast.Max-ast.Min)
+ starts := make([]uint, 0, ast.Max-ast.Min)
for i := ast.Min; i < ast.Max; i++ {
splits = append(splits, c.emptySplit())
starts = append(starts, uint(len(c.insts)))
@@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
if len(ast.Rune) == 0 {
return nil
}
- var jmps []uint
-
+ jmps := make([]uint, 0, len(ast.Rune)-2)
// does not do last pair
for i := 0; i < len(ast.Rune)-2; i += 2 {
rstart := ast.Rune[i]
@@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
return nil
}
-func (c *compiler) compileClassRange(startR, endR rune) error {
- seqs, err := utf8.NewSequences(startR, endR)
+func (c *compiler) compileClassRange(startR, endR rune) (err error) {
+ c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+ startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
- var jmps []uint
-
+ jmps := make([]uint, 0, len(c.sequences)-1)
// does not do last entry
- for i := 0; i < len(seqs)-1; i++ {
- seq := seqs[i]
+ for i := 0; i < len(c.sequences)-1; i++ {
+ seq := c.sequences[i]
split := c.emptySplit()
j1 := c.top()
c.compileUtf8Ranges(seq)
@@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
c.setSplit(split, j1, j2)
}
// handle last entry
- c.compileUtf8Ranges(seqs[len(seqs)-1])
+ c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
end := c.top()
for _, jmp := range jmps {
c.setJump(jmp, end)
@@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
for _, r := range seq {
- c.insts = append(c.insts, &inst{
- op: OpRange,
- rangeStart: r.Start,
- rangeEnd: r.End,
- })
+ inst := c.allocInst()
+ inst.op = OpRange
+ inst.rangeStart = r.Start
+ inst.rangeEnd = r.End
+ c.insts = append(c.insts, inst)
}
}
func (c *compiler) emptySplit() uint {
- c.insts = append(c.insts, &inst{
- op: OpSplit,
- })
+ inst := c.allocInst()
+ inst.op = OpSplit
+ c.insts = append(c.insts, inst)
return c.top() - 1
}
func (c *compiler) emptyJump() uint {
- c.insts = append(c.insts, &inst{
- op: OpJmp,
- })
+ inst := c.allocInst()
+ inst.op = OpJmp
+ c.insts = append(c.insts, inst)
return c.top() - 1
}
@@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) {
func (c *compiler) top() uint {
return uint(len(c.insts))
}
+
+func (c *compiler) allocInst() *inst {
+ if len(c.instsPool) <= 0 {
+ c.instsPool = make([]inst, 16)
+ }
+ inst := &c.instsPool[0]
+ c.instsPool = c.instsPool[1:]
+ return inst
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/dfa.go b/vendor/github.com/couchbase/vellum/regexp/dfa.go
index 9864606b6a..7e6fb29dac 100644
--- a/vendor/github.com/couchbase/vellum/regexp/dfa.go
+++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go
@@ -23,7 +23,7 @@ import (
const StateLimit = 10000
// ErrTooManyStates is returned if you attempt to build a Levenshtein
-// automaton which requries too many states.
+// automaton which requires too many states.
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
StateLimit)
@@ -37,12 +37,12 @@ func newDfaBuilder(insts prog) *dfaBuilder {
d := &dfaBuilder{
dfa: &dfa{
insts: insts,
- states: make([]*state, 0, 16),
+ states: make([]state, 0, 16),
},
cache: make(map[string]int, 1024),
}
// add 0 state that is invalid
- d.dfa.states = append(d.dfa.states, &state{
+ d.dfa.states = append(d.dfa.states, state{
next: make([]int, 256),
match: false,
})
@@ -54,13 +54,15 @@ func (d *dfaBuilder) build() (*dfa, error) {
next := newSparseSet(uint(len(d.dfa.insts)))
d.dfa.add(cur, 0)
- states := intStack{d.cachedState(cur)}
+ ns, instsReuse := d.cachedState(cur, nil)
+ states := intStack{ns}
seen := make(map[int]struct{})
var s int
states, s = states.Pop()
for s != 0 {
for b := 0; b < 256; b++ {
- ns := d.runState(cur, next, s, byte(b))
+ var ns int
+ ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
if ns != 0 {
if _, ok := seen[ns]; !ok {
seen[ns] = struct{}{}
@@ -76,15 +78,17 @@ func (d *dfaBuilder) build() (*dfa, error) {
return d.dfa, nil
}
-func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
+func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
+ int, []uint) {
cur.Clear()
for _, ip := range d.dfa.states[state].insts {
cur.Add(ip)
}
d.dfa.run(cur, next, b)
- nextState := d.cachedState(next)
+ var nextState int
+ nextState, instsReuse = d.cachedState(next, instsReuse)
d.dfa.states[state].next[b] = nextState
- return nextState
+ return nextState, instsReuse
}
func instsKey(insts []uint, buf []byte) []byte {
@@ -99,8 +103,12 @@ func instsKey(insts []uint, buf []byte) []byte {
return buf
}
-func (d *dfaBuilder) cachedState(set *sparseSet) int {
- var insts []uint
+func (d *dfaBuilder) cachedState(set *sparseSet,
+ instsReuse []uint) (int, []uint) {
+ insts := instsReuse[:0]
+ if cap(insts) == 0 {
+ insts = make([]uint, 0, set.Len())
+ }
var isMatch bool
for i := uint(0); i < uint(set.Len()); i++ {
ip := set.Get(i)
@@ -113,26 +121,26 @@ func (d *dfaBuilder) cachedState(set *sparseSet) int {
}
}
if len(insts) == 0 {
- return 0
+ return 0, insts
}
d.keyBuf = instsKey(insts, d.keyBuf)
v, ok := d.cache[string(d.keyBuf)]
if ok {
- return v
+ return v, insts
}
- d.dfa.states = append(d.dfa.states, &state{
+ d.dfa.states = append(d.dfa.states, state{
insts: insts,
next: make([]int, 256),
match: isMatch,
})
newV := len(d.dfa.states) - 1
d.cache[string(d.keyBuf)] = newV
- return newV
+ return newV, nil
}
type dfa struct {
insts prog
- states []*state
+ states []state
}
func (d *dfa) add(set *sparseSet, ip uint) {
diff --git a/vendor/github.com/couchbase/vellum/regexp/inst.go b/vendor/github.com/couchbase/vellum/regexp/inst.go
index 61cbf2f333..36f2e602df 100644
--- a/vendor/github.com/couchbase/vellum/regexp/inst.go
+++ b/vendor/github.com/couchbase/vellum/regexp/inst.go
@@ -27,7 +27,7 @@ const (
OpRange
)
-// instSize is the approxmiate size of the an inst struct in bytes
+// instSize is the approximate size of the an inst struct in bytes
const instSize = 40
type inst struct {
diff --git a/vendor/github.com/couchbase/vellum/regexp/regexp.go b/vendor/github.com/couchbase/vellum/regexp/regexp.go
index ed0e7823e1..920ddc3708 100644
--- a/vendor/github.com/couchbase/vellum/regexp/regexp.go
+++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go
@@ -35,6 +35,8 @@ var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
// too many instructions
var ErrCompiledTooBig = fmt.Errorf("too many instructions")
+var DefaultLimit = uint(10 * (1 << 20))
+
// Regexp implements the vellum.Automaton interface for matcing a user
// specified regular expression.
type Regexp struct {
@@ -47,7 +49,7 @@ type Regexp struct {
// compiled finite state automaton. If this size is exceeded,
// ErrCompiledTooBig will be returned.
func New(expr string) (*Regexp, error) {
- return NewWithLimit(expr, 10*(1<<20))
+ return NewWithLimit(expr, DefaultLimit)
}
// NewRegexpWithLimit creates a new Regular Expression automaton with
@@ -59,6 +61,10 @@ func NewWithLimit(expr string, size uint) (*Regexp, error) {
if err != nil {
return nil, err
}
+ return NewParsedWithLimit(expr, parsed, size)
+}
+
+func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
compiler := newCompiler(size)
insts, err := compiler.compile(parsed)
if err != nil {
@@ -103,7 +109,7 @@ func (r *Regexp) WillAlwaysMatch(int) bool {
return false
}
-// Accept returns the new state, resulting from the transite byte b
+// Accept returns the new state, resulting from the transition byte b
// when currently in the state s.
func (r *Regexp) Accept(s int, b byte) int {
if s < len(r.dfa.states) {
diff --git a/vendor/github.com/couchbase/vellum/registry.go b/vendor/github.com/couchbase/vellum/registry.go
index 3721a7c9c3..f5b9b4d59c 100644
--- a/vendor/github.com/couchbase/vellum/registry.go
+++ b/vendor/github.com/couchbase/vellum/registry.go
@@ -14,39 +14,35 @@
package vellum
-import (
- "hash"
- "hash/fnv"
-)
-
type registryCell struct {
addr int
node *builderNode
}
type registry struct {
- table []registryCell
- tableSize uint
- mruSize uint
- hasher hash.Hash64
+ builderNodePool *builderNodePool
+ table []registryCell
+ tableSize uint
+ mruSize uint
}
-func newRegistry(tableSize, mruSize int) *registry {
+func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry {
nsize := tableSize * mruSize
rv := &registry{
- table: make([]registryCell, nsize),
- tableSize: uint(tableSize),
- mruSize: uint(mruSize),
- hasher: fnv.New64a(),
+ builderNodePool: p,
+ table: make([]registryCell, nsize),
+ tableSize: uint(tableSize),
+ mruSize: uint(mruSize),
}
return rv
}
func (r *registry) Reset() {
- for i := 0; i < len(r.table); i++ {
- r.table[i] = registryCell{}
+ var empty registryCell
+ for i := range r.table {
+ r.builderNodePool.Put(r.table[i].node)
+ r.table[i] = empty
}
- r.hasher.Reset()
}
func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
@@ -57,7 +53,7 @@ func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
start := r.mruSize * uint(bucket)
end := start + r.mruSize
rc := registryCache(r.table[start:end])
- return rc.entry(node)
+ return rc.entry(node, r.builderNodePool)
}
const fnvPrime = 1099511628211
@@ -81,11 +77,12 @@ func (r *registry) hash(b *builderNode) int {
type registryCache []registryCell
-func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
+func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) {
if len(r) == 1 {
if r[0].node != nil && r[0].node.equiv(node) {
return true, r[0].addr, nil
}
+ pool.Put(r[0].node)
r[0].node = node
return false, 0, &r[0]
}
@@ -98,6 +95,7 @@ func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
}
// no match
last := len(r) - 1
+ pool.Put(r[last].node)
r[last].node = node // discard LRU
r.promote(last)
return false, 0, &r[0]
diff --git a/vendor/github.com/couchbase/vellum/utf8/utf8.go b/vendor/github.com/couchbase/vellum/utf8/utf8.go
index 47dbe9d1c5..54e23b937c 100644
--- a/vendor/github.com/couchbase/vellum/utf8/utf8.go
+++ b/vendor/github.com/couchbase/vellum/utf8/utf8.go
@@ -25,19 +25,39 @@ type Sequences []Sequence
// NewSequences constructs a collection of Sequence which describe the
// byte ranges covered between the start and end runes.
func NewSequences(start, end rune) (Sequences, error) {
- var rv Sequences
+ rv, _, err := NewSequencesPrealloc(start, end, nil, nil, nil, nil)
+ return rv, err
+}
+
+func NewSequencesPrealloc(start, end rune,
+ preallocSequences Sequences,
+ preallocRangeStack RangeStack,
+ preallocStartBytes, preallocEndBytes []byte) (Sequences, RangeStack, error) {
+ rv := preallocSequences[:0]
+
+ startBytes := preallocStartBytes
+ if cap(startBytes) < utf8.UTFMax {
+ startBytes = make([]byte, utf8.UTFMax)
+ }
+ startBytes = startBytes[:utf8.UTFMax]
- var rangeStack rangeStack
- rangeStack = rangeStack.Push(&scalarRange{start, end})
+ endBytes := preallocEndBytes
+ if cap(endBytes) < utf8.UTFMax {
+ endBytes = make([]byte, utf8.UTFMax)
+ }
+ endBytes = endBytes[:utf8.UTFMax]
+
+ rangeStack := preallocRangeStack[:0]
+ rangeStack = rangeStack.Push(scalarRange{start, end})
rangeStack, r := rangeStack.Pop()
TOP:
- for r != nil {
+ for r != nilScalarRange {
INNER:
for {
r1, r2 := r.split()
- if r1 != nil {
- rangeStack = rangeStack.Push(&scalarRange{r2.start, r2.end})
+ if r1 != nilScalarRange {
+ rangeStack = rangeStack.Push(scalarRange{r2.start, r2.end})
r.start = r1.start
r.end = r1.end
continue INNER
@@ -49,13 +69,13 @@ TOP:
for i := 1; i < utf8.UTFMax; i++ {
max := maxScalarValue(i)
if r.start <= max && max < r.end {
- rangeStack = rangeStack.Push(&scalarRange{max + 1, r.end})
+ rangeStack = rangeStack.Push(scalarRange{max + 1, r.end})
r.end = max
continue INNER
}
}
asciiRange := r.ascii()
- if asciiRange != nil {
+ if asciiRange != nilRange {
rv = append(rv, Sequence{
asciiRange,
})
@@ -66,23 +86,21 @@ TOP:
m := rune((1 << (6 * i)) - 1)
if (r.start & ^m) != (r.end & ^m) {
if (r.start & m) != 0 {
- rangeStack = rangeStack.Push(&scalarRange{(r.start | m) + 1, r.end})
+ rangeStack = rangeStack.Push(scalarRange{(r.start | m) + 1, r.end})
r.end = r.start | m
continue INNER
}
if (r.end & m) != m {
- rangeStack = rangeStack.Push(&scalarRange{r.end & ^m, r.end})
+ rangeStack = rangeStack.Push(scalarRange{r.end & ^m, r.end})
r.end = (r.end & ^m) - 1
continue INNER
}
}
}
- start := make([]byte, utf8.UTFMax)
- end := make([]byte, utf8.UTFMax)
- n, m := r.encode(start, end)
- seq, err := SequenceFromEncodedRange(start[0:n], end[0:m])
+ n, m := r.encode(startBytes, endBytes)
+ seq, err := SequenceFromEncodedRange(startBytes[0:n], endBytes[0:m])
if err != nil {
- return nil, err
+ return nil, nil, err
}
rv = append(rv, seq)
rangeStack, r = rangeStack.Pop()
@@ -90,11 +108,11 @@ TOP:
}
}
- return rv, nil
+ return rv, rangeStack, nil
}
-// Sequence is a collection of *Range
-type Sequence []*Range
+// Sequence is a collection of Range
+type Sequence []Range
// SequenceFromEncodedRange creates sequence from the encoded bytes
func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
@@ -104,21 +122,21 @@ func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
switch len(start) {
case 2:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
}, nil
case 3:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
- &Range{start[2], end[2]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
+ Range{start[2], end[2]},
}, nil
case 4:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
- &Range{start[2], end[2]},
- &Range{start[3], end[3]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
+ Range{start[2], end[2]},
+ Range{start[3], end[3]},
}, nil
}
@@ -159,6 +177,8 @@ type Range struct {
End byte
}
+var nilRange = Range{0xff, 0}
+
func (u Range) matches(b byte) bool {
if u.Start <= b && b <= u.End {
return true
@@ -178,37 +198,39 @@ type scalarRange struct {
end rune
}
+var nilScalarRange = scalarRange{0xffff, 0}
+
func (s *scalarRange) String() string {
return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end)
}
// split this scalar range if it overlaps with a surrogate codepoint
-func (s *scalarRange) split() (*scalarRange, *scalarRange) {
+func (s *scalarRange) split() (scalarRange, scalarRange) {
if s.start < 0xe000 && s.end > 0xd7ff {
- return &scalarRange{
+ return scalarRange{
start: s.start,
end: 0xd7ff,
},
- &scalarRange{
+ scalarRange{
start: 0xe000,
end: s.end,
}
}
- return nil, nil
+ return nilScalarRange, nilScalarRange
}
func (s *scalarRange) valid() bool {
return s.start <= s.end
}
-func (s *scalarRange) ascii() *Range {
+func (s *scalarRange) ascii() Range {
if s.valid() && s.end <= 0x7f {
- return &Range{
+ return Range{
Start: byte(s.start),
End: byte(s.end),
}
}
- return nil
+ return nilRange
}
// start and end MUST have capacity for utf8.UTFMax bytes
@@ -218,16 +240,16 @@ func (s *scalarRange) encode(start, end []byte) (int, int) {
return n, m
}
-type rangeStack []*scalarRange
+type RangeStack []scalarRange
-func (s rangeStack) Push(v *scalarRange) rangeStack {
+func (s RangeStack) Push(v scalarRange) RangeStack {
return append(s, v)
}
-func (s rangeStack) Pop() (rangeStack, *scalarRange) {
+func (s RangeStack) Pop() (RangeStack, scalarRange) {
l := len(s)
if l < 1 {
- return s, nil
+ return s, nilScalarRange
}
return s[:l-1], s[l-1]
}