diff options
Diffstat (limited to 'vendor/github.com/couchbase/vellum/fst.go')
-rw-r--r-- | vendor/github.com/couchbase/vellum/fst.go | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go new file mode 100644 index 0000000000..ecc528395c --- /dev/null +++ b/vendor/github.com/couchbase/vellum/fst.go @@ -0,0 +1,254 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vellum + +import ( + "io" + + "github.com/willf/bitset" +) + +// FST is an in-memory representation of a finite state transducer, +// capable of returning the uint64 value associated with +// each []byte key stored, as well as enumerating all of the keys +// in order. +type FST struct { + f io.Closer + ver int + len int + typ int + data []byte + decoder decoder +} + +func new(data []byte, f io.Closer) (rv *FST, err error) { + rv = &FST{ + data: data, + f: f, + } + + rv.ver, rv.typ, err = decodeHeader(data) + if err != nil { + return nil, err + } + + rv.decoder, err = loadDecoder(rv.ver, rv.data) + if err != nil { + return nil, err + } + + rv.len = rv.decoder.getLen() + + return rv, nil +} + +// Contains returns true if this FST contains the specified key. +func (f *FST) Contains(val []byte) (bool, error) { + _, exists, err := f.Get(val) + return exists, err +} + +// Get returns the value associated with the key. NOTE: a value of zero +// does not imply the key does not exist, you must consult the second +// return value as well. +func (f *FST) Get(input []byte) (uint64, bool, error) { + return f.get(input, nil) +} + +func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { + var total uint64 + curr := f.decoder.getRoot() + state, err := f.decoder.stateAt(curr, prealloc) + if err != nil { + return 0, false, err + } + for i := range input { + _, curr, output := state.TransitionFor(input[i]) + if curr == noneAddr { + return 0, false, nil + } + + state, err = f.decoder.stateAt(curr, state) + if err != nil { + return 0, false, err + } + + total += output + } + + if state.Final() { + total += state.FinalOutput() + return total, true, nil + } + return 0, false, nil +} + +// Version returns the encoding version used by this FST instance. +func (f *FST) Version() int { + return f.ver +} + +// Len returns the number of entries in this FST instance. +func (f *FST) Len() int { + return f.len +} + +// Type returns the type of this FST instance. +func (f *FST) Type() int { + return f.typ +} + +// Close will unmap any mmap'd data (if managed by vellum) and it will close +// the backing file (if managed by vellum). You MUST call Close() for any +// FST instance that is created. +func (f *FST) Close() error { + if f.f != nil { + err := f.f.Close() + if err != nil { + return err + } + } + f.data = nil + f.decoder = nil + return nil +} + +// Start returns the start state of this Automaton +func (f *FST) Start() int { + return f.decoder.getRoot() +} + +// IsMatch returns if this state is a matching state in this Automaton +func (f *FST) IsMatch(addr int) bool { + match, _ := f.IsMatchWithVal(addr) + return match +} + +// CanMatch returns if this state can ever transition to a matching state +// in this Automaton +func (f *FST) CanMatch(addr int) bool { + if addr == noneAddr { + return false + } + return true +} + +// WillAlwaysMatch returns if from this state the Automaton will always +// be in a matching state +func (f *FST) WillAlwaysMatch(int) bool { + return false +} + +// Accept returns the next state for this Automaton on input of byte b +func (f *FST) Accept(addr int, b byte) int { + next, _ := f.AcceptWithVal(addr, b) + return next +} + +// IsMatchWithVal returns if this state is a matching state in this Automaton +// and also returns the final output value for this state +func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { + s, err := f.decoder.stateAt(addr, nil) + if err != nil { + return false, 0 + } + return s.Final(), s.FinalOutput() +} + +// AcceptWithVal returns the next state for this Automaton on input of byte b +// and also returns the output value for the transition +func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { + s, err := f.decoder.stateAt(addr, nil) + if err != nil { + return noneAddr, 0 + } + _, next, output := s.TransitionFor(b) + return next, output +} + +// Iterator returns a new Iterator capable of enumerating the key/value pairs +// between the provided startKeyInclusive and endKeyExclusive. +func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { + return newIterator(f, startKeyInclusive, endKeyExclusive, nil) +} + +// Search returns a new Iterator capable of enumerating the key/value pairs +// between the provided startKeyInclusive and endKeyExclusive that also +// satisfy the provided automaton. +func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { + return newIterator(f, startKeyInclusive, endKeyExclusive, aut) +} + +// Debug is only intended for debug purposes, it simply asks the underlying +// decoder visit each state, and pass it to the provided callback. +func (f *FST) Debug(callback func(int, interface{}) error) error { + + addr := f.decoder.getRoot() + set := bitset.New(uint(addr)) + stack := addrStack{addr} + + stateNumber := 0 + stack, addr = stack[:len(stack)-1], stack[len(stack)-1] + for addr != noneAddr { + if set.Test(uint(addr)) { + stack, addr = stack.Pop() + continue + } + set.Set(uint(addr)) + state, err := f.decoder.stateAt(addr, nil) + if err != nil { + return err + } + err = callback(stateNumber, state) + if err != nil { + return err + } + for i := 0; i < state.NumTransitions(); i++ { + tchar := state.TransitionAt(i) + _, dest, _ := state.TransitionFor(tchar) + stack = append(stack, dest) + } + stateNumber++ + stack, addr = stack.Pop() + } + + return nil +} + +type addrStack []int + +func (a addrStack) Pop() (addrStack, int) { + l := len(a) + if l < 1 { + return a, noneAddr + } + return a[:l-1], a[l-1] +} + +// Reader() returns a Reader instance that a single thread may use to +// retrieve data from the FST +func (f *FST) Reader() (*Reader, error) { + return &Reader{f: f}, nil +} + +// A Reader is meant for a single threaded use +type Reader struct { + f *FST + prealloc fstStateV1 +} + +func (r *Reader) Get(input []byte) (uint64, bool, error) { + return r.f.get(input, &r.prealloc) +} |