summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase/vellum/fst.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/couchbase/vellum/fst.go')
-rw-r--r--vendor/github.com/couchbase/vellum/fst.go254
1 files changed, 254 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go
new file mode 100644
index 0000000000..ecc528395c
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/fst.go
@@ -0,0 +1,254 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vellum
+
+import (
+ "io"
+
+ "github.com/willf/bitset"
+)
+
+// FST is an in-memory representation of a finite state transducer,
+// capable of returning the uint64 value associated with
+// each []byte key stored, as well as enumerating all of the keys
+// in order.
+type FST struct {
+ f io.Closer
+ ver int
+ len int
+ typ int
+ data []byte
+ decoder decoder
+}
+
+func new(data []byte, f io.Closer) (rv *FST, err error) {
+ rv = &FST{
+ data: data,
+ f: f,
+ }
+
+ rv.ver, rv.typ, err = decodeHeader(data)
+ if err != nil {
+ return nil, err
+ }
+
+ rv.decoder, err = loadDecoder(rv.ver, rv.data)
+ if err != nil {
+ return nil, err
+ }
+
+ rv.len = rv.decoder.getLen()
+
+ return rv, nil
+}
+
+// Contains returns true if this FST contains the specified key.
+func (f *FST) Contains(val []byte) (bool, error) {
+ _, exists, err := f.Get(val)
+ return exists, err
+}
+
+// Get returns the value associated with the key. NOTE: a value of zero
+// does not imply the key does not exist, you must consult the second
+// return value as well.
+func (f *FST) Get(input []byte) (uint64, bool, error) {
+ return f.get(input, nil)
+}
+
+func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
+ var total uint64
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, prealloc)
+ if err != nil {
+ return 0, false, err
+ }
+ for i := range input {
+ _, curr, output := state.TransitionFor(input[i])
+ if curr == noneAddr {
+ return 0, false, nil
+ }
+
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return 0, false, err
+ }
+
+ total += output
+ }
+
+ if state.Final() {
+ total += state.FinalOutput()
+ return total, true, nil
+ }
+ return 0, false, nil
+}
+
+// Version returns the encoding version used by this FST instance.
+func (f *FST) Version() int {
+ return f.ver
+}
+
+// Len returns the number of entries in this FST instance.
+func (f *FST) Len() int {
+ return f.len
+}
+
+// Type returns the type of this FST instance.
+func (f *FST) Type() int {
+ return f.typ
+}
+
+// Close will unmap any mmap'd data (if managed by vellum) and it will close
+// the backing file (if managed by vellum). You MUST call Close() for any
+// FST instance that is created.
+func (f *FST) Close() error {
+ if f.f != nil {
+ err := f.f.Close()
+ if err != nil {
+ return err
+ }
+ }
+ f.data = nil
+ f.decoder = nil
+ return nil
+}
+
+// Start returns the start state of this Automaton
+func (f *FST) Start() int {
+ return f.decoder.getRoot()
+}
+
+// IsMatch returns if this state is a matching state in this Automaton
+func (f *FST) IsMatch(addr int) bool {
+ match, _ := f.IsMatchWithVal(addr)
+ return match
+}
+
+// CanMatch returns if this state can ever transition to a matching state
+// in this Automaton
+func (f *FST) CanMatch(addr int) bool {
+ if addr == noneAddr {
+ return false
+ }
+ return true
+}
+
+// WillAlwaysMatch returns if from this state the Automaton will always
+// be in a matching state
+func (f *FST) WillAlwaysMatch(int) bool {
+ return false
+}
+
+// Accept returns the next state for this Automaton on input of byte b
+func (f *FST) Accept(addr int, b byte) int {
+ next, _ := f.AcceptWithVal(addr, b)
+ return next
+}
+
+// IsMatchWithVal returns if this state is a matching state in this Automaton
+// and also returns the final output value for this state
+func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
+ s, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return false, 0
+ }
+ return s.Final(), s.FinalOutput()
+}
+
+// AcceptWithVal returns the next state for this Automaton on input of byte b
+// and also returns the output value for the transition
+func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
+ s, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return noneAddr, 0
+ }
+ _, next, output := s.TransitionFor(b)
+ return next, output
+}
+
+// Iterator returns a new Iterator capable of enumerating the key/value pairs
+// between the provided startKeyInclusive and endKeyExclusive.
+func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
+ return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
+}
+
+// Search returns a new Iterator capable of enumerating the key/value pairs
+// between the provided startKeyInclusive and endKeyExclusive that also
+// satisfy the provided automaton.
+func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
+ return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
+}
+
+// Debug is only intended for debug purposes, it simply asks the underlying
+// decoder visit each state, and pass it to the provided callback.
+func (f *FST) Debug(callback func(int, interface{}) error) error {
+
+ addr := f.decoder.getRoot()
+ set := bitset.New(uint(addr))
+ stack := addrStack{addr}
+
+ stateNumber := 0
+ stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
+ for addr != noneAddr {
+ if set.Test(uint(addr)) {
+ stack, addr = stack.Pop()
+ continue
+ }
+ set.Set(uint(addr))
+ state, err := f.decoder.stateAt(addr, nil)
+ if err != nil {
+ return err
+ }
+ err = callback(stateNumber, state)
+ if err != nil {
+ return err
+ }
+ for i := 0; i < state.NumTransitions(); i++ {
+ tchar := state.TransitionAt(i)
+ _, dest, _ := state.TransitionFor(tchar)
+ stack = append(stack, dest)
+ }
+ stateNumber++
+ stack, addr = stack.Pop()
+ }
+
+ return nil
+}
+
+type addrStack []int
+
+func (a addrStack) Pop() (addrStack, int) {
+ l := len(a)
+ if l < 1 {
+ return a, noneAddr
+ }
+ return a[:l-1], a[l-1]
+}
+
+// Reader() returns a Reader instance that a single thread may use to
+// retrieve data from the FST
+func (f *FST) Reader() (*Reader, error) {
+ return &Reader{f: f}, nil
+}
+
+// A Reader is meant for a single threaded use
+type Reader struct {
+ f *FST
+ prealloc fstStateV1
+}
+
+func (r *Reader) Get(input []byte) (uint64, bool, error) {
+ return r.f.get(input, &r.prealloc)
+}