// Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "io" "github.com/willf/bitset" ) // FST is an in-memory representation of a finite state transducer, // capable of returning the uint64 value associated with // each []byte key stored, as well as enumerating all of the keys // in order. type FST struct { f io.Closer ver int len int typ int data []byte decoder decoder } func new(data []byte, f io.Closer) (rv *FST, err error) { rv = &FST{ data: data, f: f, } rv.ver, rv.typ, err = decodeHeader(data) if err != nil { return nil, err } rv.decoder, err = loadDecoder(rv.ver, rv.data) if err != nil { return nil, err } rv.len = rv.decoder.getLen() return rv, nil } // Contains returns true if this FST contains the specified key. func (f *FST) Contains(val []byte) (bool, error) { _, exists, err := f.Get(val) return exists, err } // Get returns the value associated with the key. NOTE: a value of zero // does not imply the key does not exist, you must consult the second // return value as well. func (f *FST) Get(input []byte) (uint64, bool, error) { return f.get(input, nil) } func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { var total uint64 curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, prealloc) if err != nil { return 0, false, err } for _, c := range input { _, curr, output := state.TransitionFor(c) if curr == noneAddr { return 0, false, nil } state, err = f.decoder.stateAt(curr, state) if err != nil { return 0, false, err } total += output } if state.Final() { total += state.FinalOutput() return total, true, nil } return 0, false, nil } // Version returns the encoding version used by this FST instance. func (f *FST) Version() int { return f.ver } // Len returns the number of entries in this FST instance. func (f *FST) Len() int { return f.len } // Type returns the type of this FST instance. func (f *FST) Type() int { return f.typ } // Close will unmap any mmap'd data (if managed by vellum) and it will close // the backing file (if managed by vellum). You MUST call Close() for any // FST instance that is created. func (f *FST) Close() error { if f.f != nil { err := f.f.Close() if err != nil { return err } } f.data = nil f.decoder = nil return nil } // Start returns the start state of this Automaton func (f *FST) Start() int { return f.decoder.getRoot() } // IsMatch returns if this state is a matching state in this Automaton func (f *FST) IsMatch(addr int) bool { match, _ := f.IsMatchWithVal(addr) return match } // CanMatch returns if this state can ever transition to a matching state // in this Automaton func (f *FST) CanMatch(addr int) bool { if addr == noneAddr { return false } return true } // WillAlwaysMatch returns if from this state the Automaton will always // be in a matching state func (f *FST) WillAlwaysMatch(int) bool { return false } // Accept returns the next state for this Automaton on input of byte b func (f *FST) Accept(addr int, b byte) int { next, _ := f.AcceptWithVal(addr, b) return next } // IsMatchWithVal returns if this state is a matching state in this Automaton // and also returns the final output value for this state func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { s, err := f.decoder.stateAt(addr, nil) if err != nil { return false, 0 } return s.Final(), s.FinalOutput() } // AcceptWithVal returns the next state for this Automaton on input of byte b // and also returns the output value for the transition func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { s, err := f.decoder.stateAt(addr, nil) if err != nil { return noneAddr, 0 } _, next, output := s.TransitionFor(b) return next, output } // Iterator returns a new Iterator capable of enumerating the key/value pairs // between the provided startKeyInclusive and endKeyExclusive. func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { return newIterator(f, startKeyInclusive, endKeyExclusive, nil) } // Search returns a new Iterator capable of enumerating the key/value pairs // between the provided startKeyInclusive and endKeyExclusive that also // satisfy the provided automaton. func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { return newIterator(f, startKeyInclusive, endKeyExclusive, aut) } // Debug is only intended for debug purposes, it simply asks the underlying // decoder visit each state, and pass it to the provided callback. func (f *FST) Debug(callback func(int, interface{}) error) error { addr := f.decoder.getRoot() set := bitset.New(uint(addr)) stack := addrStack{addr} stateNumber := 0 stack, addr = stack[:len(stack)-1], stack[len(stack)-1] for addr != noneAddr { if set.Test(uint(addr)) { stack, addr = stack.Pop() continue } set.Set(uint(addr)) state, err := f.decoder.stateAt(addr, nil) if err != nil { return err } err = callback(stateNumber, state) if err != nil { return err } for i := 0; i < state.NumTransitions(); i++ { tchar := state.TransitionAt(i) _, dest, _ := state.TransitionFor(tchar) stack = append(stack, dest) } stateNumber++ stack, addr = stack.Pop() } return nil } type addrStack []int func (a addrStack) Pop() (addrStack, int) { l := len(a) if l < 1 { return a, noneAddr } return a[:l-1], a[l-1] } // Reader() returns a Reader instance that a single thread may use to // retrieve data from the FST func (f *FST) Reader() (*Reader, error) { return &Reader{f: f}, nil } func (f *FST) GetMinKey() ([]byte, error) { var rv []byte curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, nil) if err != nil { return nil, err } for !state.Final() { nextTrans := state.TransitionAt(0) _, curr, _ = state.TransitionFor(nextTrans) state, err = f.decoder.stateAt(curr, state) if err != nil { return nil, err } rv = append(rv, nextTrans) } return rv, nil } func (f *FST) GetMaxKey() ([]byte, error) { var rv []byte curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, nil) if err != nil { return nil, err } for state.NumTransitions() > 0 { nextTrans := state.TransitionAt(state.NumTransitions() - 1) _, curr, _ = state.TransitionFor(nextTrans) state, err = f.decoder.stateAt(curr, state) if err != nil { return nil, err } rv = append(rv, nextTrans) } return rv, nil } // A Reader is meant for a single threaded use type Reader struct { f *FST prealloc fstStateV1 } func (r *Reader) Get(input []byte) (uint64, bool, error) { return r.f.get(input, &r.prealloc) }