You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fst.go 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vellum
  15. import (
  16. "io"
  17. "github.com/willf/bitset"
  18. )
  19. // FST is an in-memory representation of a finite state transducer,
  20. // capable of returning the uint64 value associated with
  21. // each []byte key stored, as well as enumerating all of the keys
  22. // in order.
  23. type FST struct {
  24. f io.Closer
  25. ver int
  26. len int
  27. typ int
  28. data []byte
  29. decoder decoder
  30. }
  31. func new(data []byte, f io.Closer) (rv *FST, err error) {
  32. rv = &FST{
  33. data: data,
  34. f: f,
  35. }
  36. rv.ver, rv.typ, err = decodeHeader(data)
  37. if err != nil {
  38. return nil, err
  39. }
  40. rv.decoder, err = loadDecoder(rv.ver, rv.data)
  41. if err != nil {
  42. return nil, err
  43. }
  44. rv.len = rv.decoder.getLen()
  45. return rv, nil
  46. }
  47. // Contains returns true if this FST contains the specified key.
  48. func (f *FST) Contains(val []byte) (bool, error) {
  49. _, exists, err := f.Get(val)
  50. return exists, err
  51. }
  52. // Get returns the value associated with the key. NOTE: a value of zero
  53. // does not imply the key does not exist, you must consult the second
  54. // return value as well.
  55. func (f *FST) Get(input []byte) (uint64, bool, error) {
  56. return f.get(input, nil)
  57. }
  58. func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
  59. var total uint64
  60. curr := f.decoder.getRoot()
  61. state, err := f.decoder.stateAt(curr, prealloc)
  62. if err != nil {
  63. return 0, false, err
  64. }
  65. for _, c := range input {
  66. _, curr, output := state.TransitionFor(c)
  67. if curr == noneAddr {
  68. return 0, false, nil
  69. }
  70. state, err = f.decoder.stateAt(curr, state)
  71. if err != nil {
  72. return 0, false, err
  73. }
  74. total += output
  75. }
  76. if state.Final() {
  77. total += state.FinalOutput()
  78. return total, true, nil
  79. }
  80. return 0, false, nil
  81. }
  82. // Version returns the encoding version used by this FST instance.
  83. func (f *FST) Version() int {
  84. return f.ver
  85. }
  86. // Len returns the number of entries in this FST instance.
  87. func (f *FST) Len() int {
  88. return f.len
  89. }
  90. // Type returns the type of this FST instance.
  91. func (f *FST) Type() int {
  92. return f.typ
  93. }
  94. // Close will unmap any mmap'd data (if managed by vellum) and it will close
  95. // the backing file (if managed by vellum). You MUST call Close() for any
  96. // FST instance that is created.
  97. func (f *FST) Close() error {
  98. if f.f != nil {
  99. err := f.f.Close()
  100. if err != nil {
  101. return err
  102. }
  103. }
  104. f.data = nil
  105. f.decoder = nil
  106. return nil
  107. }
  108. // Start returns the start state of this Automaton
  109. func (f *FST) Start() int {
  110. return f.decoder.getRoot()
  111. }
  112. // IsMatch returns if this state is a matching state in this Automaton
  113. func (f *FST) IsMatch(addr int) bool {
  114. match, _ := f.IsMatchWithVal(addr)
  115. return match
  116. }
  117. // CanMatch returns if this state can ever transition to a matching state
  118. // in this Automaton
  119. func (f *FST) CanMatch(addr int) bool {
  120. if addr == noneAddr {
  121. return false
  122. }
  123. return true
  124. }
  125. // WillAlwaysMatch returns if from this state the Automaton will always
  126. // be in a matching state
  127. func (f *FST) WillAlwaysMatch(int) bool {
  128. return false
  129. }
  130. // Accept returns the next state for this Automaton on input of byte b
  131. func (f *FST) Accept(addr int, b byte) int {
  132. next, _ := f.AcceptWithVal(addr, b)
  133. return next
  134. }
  135. // IsMatchWithVal returns if this state is a matching state in this Automaton
  136. // and also returns the final output value for this state
  137. func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
  138. s, err := f.decoder.stateAt(addr, nil)
  139. if err != nil {
  140. return false, 0
  141. }
  142. return s.Final(), s.FinalOutput()
  143. }
  144. // AcceptWithVal returns the next state for this Automaton on input of byte b
  145. // and also returns the output value for the transition
  146. func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
  147. s, err := f.decoder.stateAt(addr, nil)
  148. if err != nil {
  149. return noneAddr, 0
  150. }
  151. _, next, output := s.TransitionFor(b)
  152. return next, output
  153. }
  154. // Iterator returns a new Iterator capable of enumerating the key/value pairs
  155. // between the provided startKeyInclusive and endKeyExclusive.
  156. func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
  157. return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
  158. }
  159. // Search returns a new Iterator capable of enumerating the key/value pairs
  160. // between the provided startKeyInclusive and endKeyExclusive that also
  161. // satisfy the provided automaton.
  162. func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
  163. return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
  164. }
  165. // Debug is only intended for debug purposes, it simply asks the underlying
  166. // decoder visit each state, and pass it to the provided callback.
  167. func (f *FST) Debug(callback func(int, interface{}) error) error {
  168. addr := f.decoder.getRoot()
  169. set := bitset.New(uint(addr))
  170. stack := addrStack{addr}
  171. stateNumber := 0
  172. stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
  173. for addr != noneAddr {
  174. if set.Test(uint(addr)) {
  175. stack, addr = stack.Pop()
  176. continue
  177. }
  178. set.Set(uint(addr))
  179. state, err := f.decoder.stateAt(addr, nil)
  180. if err != nil {
  181. return err
  182. }
  183. err = callback(stateNumber, state)
  184. if err != nil {
  185. return err
  186. }
  187. for i := 0; i < state.NumTransitions(); i++ {
  188. tchar := state.TransitionAt(i)
  189. _, dest, _ := state.TransitionFor(tchar)
  190. stack = append(stack, dest)
  191. }
  192. stateNumber++
  193. stack, addr = stack.Pop()
  194. }
  195. return nil
  196. }
  197. type addrStack []int
  198. func (a addrStack) Pop() (addrStack, int) {
  199. l := len(a)
  200. if l < 1 {
  201. return a, noneAddr
  202. }
  203. return a[:l-1], a[l-1]
  204. }
  205. // Reader() returns a Reader instance that a single thread may use to
  206. // retrieve data from the FST
  207. func (f *FST) Reader() (*Reader, error) {
  208. return &Reader{f: f}, nil
  209. }
  210. func (f *FST) GetMinKey() ([]byte, error) {
  211. var rv []byte
  212. curr := f.decoder.getRoot()
  213. state, err := f.decoder.stateAt(curr, nil)
  214. if err != nil {
  215. return nil, err
  216. }
  217. for !state.Final() {
  218. nextTrans := state.TransitionAt(0)
  219. _, curr, _ = state.TransitionFor(nextTrans)
  220. state, err = f.decoder.stateAt(curr, state)
  221. if err != nil {
  222. return nil, err
  223. }
  224. rv = append(rv, nextTrans)
  225. }
  226. return rv, nil
  227. }
  228. func (f *FST) GetMaxKey() ([]byte, error) {
  229. var rv []byte
  230. curr := f.decoder.getRoot()
  231. state, err := f.decoder.stateAt(curr, nil)
  232. if err != nil {
  233. return nil, err
  234. }
  235. for state.NumTransitions() > 0 {
  236. nextTrans := state.TransitionAt(state.NumTransitions() - 1)
  237. _, curr, _ = state.TransitionFor(nextTrans)
  238. state, err = f.decoder.stateAt(curr, state)
  239. if err != nil {
  240. return nil, err
  241. }
  242. rv = append(rv, nextTrans)
  243. }
  244. return rv, nil
  245. }
  246. // A Reader is meant for a single threaded use
  247. type Reader struct {
  248. f *FST
  249. prealloc fstStateV1
  250. }
  251. func (r *Reader) Get(input []byte) (uint64, bool, error) {
  252. return r.f.get(input, &r.prealloc)
  253. }