You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fst_iterator.go 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vellum
  15. import (
  16. "bytes"
  17. )
  18. // Iterator represents a means of visity key/value pairs in order.
  19. type Iterator interface {
  20. // Current() returns the key/value pair currently pointed to.
  21. // The []byte of the key is ONLY guaranteed to be valid until
  22. // another call to Next/Seek/Close. If you need it beyond that
  23. // point you MUST make a copy.
  24. Current() ([]byte, uint64)
  25. // Next() advances the iterator to the next key/value pair.
  26. // If no more key/value pairs exist, ErrIteratorDone is returned.
  27. Next() error
  28. // Seek() advances the iterator the specified key, or the next key
  29. // if it does not exist.
  30. // If no keys exist after that point, ErrIteratorDone is returned.
  31. Seek(key []byte) error
  32. // Reset resets the Iterator' internal state to allow for iterator
  33. // reuse (e.g. pooling).
  34. Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error
  35. // Close() frees any resources held by this iterator.
  36. Close() error
  37. }
  38. // FSTIterator is a structure for iterating key/value pairs in this FST in
  39. // lexicographic order. Iterators should be constructed with the FSTIterator
  40. // method on the parent FST structure.
  41. type FSTIterator struct {
  42. f *FST
  43. aut Automaton
  44. startKeyInclusive []byte
  45. endKeyExclusive []byte
  46. statesStack []fstState
  47. keysStack []byte
  48. keysPosStack []int
  49. valsStack []uint64
  50. autStatesStack []int
  51. nextStart []byte
  52. }
  53. func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
  54. aut Automaton) (*FSTIterator, error) {
  55. rv := &FSTIterator{}
  56. err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut)
  57. if err != nil {
  58. return nil, err
  59. }
  60. return rv, nil
  61. }
  62. // Reset resets the Iterator' internal state to allow for iterator
  63. // reuse (e.g. pooling).
  64. func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
  65. if aut == nil {
  66. aut = alwaysMatchAutomaton
  67. }
  68. i.f = f
  69. i.startKeyInclusive = startKeyInclusive
  70. i.endKeyExclusive = endKeyExclusive
  71. i.aut = aut
  72. return i.pointTo(startKeyInclusive)
  73. }
  74. // pointTo attempts to point us to the specified location
  75. func (i *FSTIterator) pointTo(key []byte) error {
  76. // tried to seek before start
  77. if bytes.Compare(key, i.startKeyInclusive) < 0 {
  78. key = i.startKeyInclusive
  79. }
  80. // trid to see past end
  81. if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 {
  82. key = i.endKeyExclusive
  83. }
  84. // reset any state, pointTo always starts over
  85. i.statesStack = i.statesStack[:0]
  86. i.keysStack = i.keysStack[:0]
  87. i.keysPosStack = i.keysPosStack[:0]
  88. i.valsStack = i.valsStack[:0]
  89. i.autStatesStack = i.autStatesStack[:0]
  90. root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil)
  91. if err != nil {
  92. return err
  93. }
  94. autStart := i.aut.Start()
  95. maxQ := -1
  96. // root is always part of the path
  97. i.statesStack = append(i.statesStack, root)
  98. i.autStatesStack = append(i.autStatesStack, autStart)
  99. for j := 0; j < len(key); j++ {
  100. curr := i.statesStack[len(i.statesStack)-1]
  101. autCurr := i.autStatesStack[len(i.autStatesStack)-1]
  102. pos, nextAddr, nextVal := curr.TransitionFor(key[j])
  103. if nextAddr == noneAddr {
  104. // needed transition doesn't exist
  105. // find last trans before the one we needed
  106. for q := 0; q < curr.NumTransitions(); q++ {
  107. if curr.TransitionAt(q) < key[j] {
  108. maxQ = q
  109. }
  110. }
  111. break
  112. }
  113. autNext := i.aut.Accept(autCurr, key[j])
  114. next, err := i.f.decoder.stateAt(nextAddr, nil)
  115. if err != nil {
  116. return err
  117. }
  118. i.statesStack = append(i.statesStack, next)
  119. i.keysStack = append(i.keysStack, key[j])
  120. i.keysPosStack = append(i.keysPosStack, pos)
  121. i.valsStack = append(i.valsStack, nextVal)
  122. i.autStatesStack = append(i.autStatesStack, autNext)
  123. continue
  124. }
  125. if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 {
  126. return i.next(maxQ)
  127. }
  128. return nil
  129. }
  130. // Current returns the key and value currently pointed to by the iterator.
  131. // If the iterator is not pointing at a valid value (because Iterator/Next/Seek)
  132. // returned an error previously, it may return nil,0.
  133. func (i *FSTIterator) Current() ([]byte, uint64) {
  134. curr := i.statesStack[len(i.statesStack)-1]
  135. if curr.Final() {
  136. var total uint64
  137. for _, v := range i.valsStack {
  138. total += v
  139. }
  140. total += curr.FinalOutput()
  141. return i.keysStack, total
  142. }
  143. return nil, 0
  144. }
  145. // Next advances this iterator to the next key/value pair. If there is none
  146. // or the advancement goes beyond the configured endKeyExclusive, then
  147. // ErrIteratorDone is returned.
  148. func (i *FSTIterator) Next() error {
  149. return i.next(-1)
  150. }
  151. func (i *FSTIterator) next(lastOffset int) error {
  152. // remember where we started
  153. if cap(i.nextStart) < len(i.keysStack) {
  154. i.nextStart = make([]byte, len(i.keysStack))
  155. } else {
  156. i.nextStart = i.nextStart[0:len(i.keysStack)]
  157. }
  158. copy(i.nextStart, i.keysStack)
  159. for true {
  160. curr := i.statesStack[len(i.statesStack)-1]
  161. autCurr := i.autStatesStack[len(i.autStatesStack)-1]
  162. if curr.Final() && i.aut.IsMatch(autCurr) &&
  163. bytes.Compare(i.keysStack, i.nextStart) > 0 {
  164. // in final state greater than start key
  165. return nil
  166. }
  167. nextOffset := lastOffset + 1
  168. if nextOffset < curr.NumTransitions() {
  169. t := curr.TransitionAt(nextOffset)
  170. autNext := i.aut.Accept(autCurr, t)
  171. if i.aut.CanMatch(autNext) {
  172. pos, nextAddr, v := curr.TransitionFor(t)
  173. // the next slot in the statesStack might have an
  174. // fstState instance that we can reuse
  175. var nextPrealloc fstState
  176. if len(i.statesStack) < cap(i.statesStack) {
  177. nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
  178. }
  179. // push onto stack
  180. next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
  181. if err != nil {
  182. return err
  183. }
  184. i.statesStack = append(i.statesStack, next)
  185. i.keysStack = append(i.keysStack, t)
  186. i.keysPosStack = append(i.keysPosStack, pos)
  187. i.valsStack = append(i.valsStack, v)
  188. i.autStatesStack = append(i.autStatesStack, autNext)
  189. lastOffset = -1
  190. // check to see if new keystack might have gone too far
  191. if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
  192. return ErrIteratorDone
  193. }
  194. } else {
  195. lastOffset = nextOffset
  196. }
  197. continue
  198. }
  199. if len(i.statesStack) > 1 {
  200. // no transitions, and still room to pop
  201. i.statesStack = i.statesStack[:len(i.statesStack)-1]
  202. i.keysStack = i.keysStack[:len(i.keysStack)-1]
  203. lastOffset = i.keysPosStack[len(i.keysPosStack)-1]
  204. i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
  205. i.valsStack = i.valsStack[:len(i.valsStack)-1]
  206. i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
  207. continue
  208. } else {
  209. // stack len is 1 (root), can't go back further, we're done
  210. break
  211. }
  212. }
  213. return ErrIteratorDone
  214. }
  215. // Seek advances this iterator to the specified key/value pair. If this key
  216. // is not in the FST, Current() will return the next largest key. If this
  217. // seek operation would go past the last key, or outside the configured
  218. // startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
  219. func (i *FSTIterator) Seek(key []byte) error {
  220. err := i.pointTo(key)
  221. if err != nil {
  222. return err
  223. }
  224. return nil
  225. }
  226. // Close will free any resources held by this iterator.
  227. func (i *FSTIterator) Close() error {
  228. // at the moment we don't do anything, but wanted this for API completeness
  229. return nil
  230. }