You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fst_iterator.go 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package vellum
  15. import (
  16. "bytes"
  17. )
  18. // Iterator represents a means of visiting key/value pairs in order.
  19. type Iterator interface {
  20. // Current() returns the key/value pair currently pointed to.
  21. // The []byte of the key is ONLY guaranteed to be valid until
  22. // another call to Next/Seek/Close. If you need it beyond that
  23. // point you MUST make a copy.
  24. Current() ([]byte, uint64)
  25. // Next() advances the iterator to the next key/value pair.
  26. // If no more key/value pairs exist, ErrIteratorDone is returned.
  27. Next() error
  28. // Seek() advances the iterator the specified key, or the next key
  29. // if it does not exist.
  30. // If no keys exist after that point, ErrIteratorDone is returned.
  31. Seek(key []byte) error
  32. // Reset resets the Iterator' internal state to allow for iterator
  33. // reuse (e.g. pooling).
  34. Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error
  35. // Close() frees any resources held by this iterator.
  36. Close() error
  37. }
  38. // FSTIterator is a structure for iterating key/value pairs in this FST in
  39. // lexicographic order. Iterators should be constructed with the FSTIterator
  40. // method on the parent FST structure.
  41. type FSTIterator struct {
  42. f *FST
  43. aut Automaton
  44. startKeyInclusive []byte
  45. endKeyExclusive []byte
  46. statesStack []fstState
  47. keysStack []byte
  48. keysPosStack []int
  49. valsStack []uint64
  50. autStatesStack []int
  51. nextStart []byte
  52. }
  53. func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
  54. aut Automaton) (*FSTIterator, error) {
  55. rv := &FSTIterator{}
  56. err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut)
  57. if err != nil {
  58. return nil, err
  59. }
  60. return rv, nil
  61. }
  62. // Reset resets the Iterator' internal state to allow for iterator
  63. // reuse (e.g. pooling).
  64. func (i *FSTIterator) Reset(f *FST,
  65. startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
  66. if aut == nil {
  67. aut = alwaysMatchAutomaton
  68. }
  69. i.f = f
  70. i.startKeyInclusive = startKeyInclusive
  71. i.endKeyExclusive = endKeyExclusive
  72. i.aut = aut
  73. return i.pointTo(startKeyInclusive)
  74. }
  75. // pointTo attempts to point us to the specified location
  76. func (i *FSTIterator) pointTo(key []byte) error {
  77. // tried to seek before start
  78. if bytes.Compare(key, i.startKeyInclusive) < 0 {
  79. key = i.startKeyInclusive
  80. }
  81. // tried to see past end
  82. if i.endKeyExclusive != nil &&
  83. bytes.Compare(key, i.endKeyExclusive) > 0 {
  84. key = i.endKeyExclusive
  85. }
  86. // reset any state, pointTo always starts over
  87. i.statesStack = i.statesStack[:0]
  88. i.keysStack = i.keysStack[:0]
  89. i.keysPosStack = i.keysPosStack[:0]
  90. i.valsStack = i.valsStack[:0]
  91. i.autStatesStack = i.autStatesStack[:0]
  92. root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil)
  93. if err != nil {
  94. return err
  95. }
  96. autStart := i.aut.Start()
  97. maxQ := -1
  98. // root is always part of the path
  99. i.statesStack = append(i.statesStack, root)
  100. i.autStatesStack = append(i.autStatesStack, autStart)
  101. for j := 0; j < len(key); j++ {
  102. keyJ := key[j]
  103. curr := i.statesStack[len(i.statesStack)-1]
  104. autCurr := i.autStatesStack[len(i.autStatesStack)-1]
  105. pos, nextAddr, nextVal := curr.TransitionFor(keyJ)
  106. if nextAddr == noneAddr {
  107. // needed transition doesn't exist
  108. // find last trans before the one we needed
  109. for q := curr.NumTransitions() - 1; q >= 0; q-- {
  110. if curr.TransitionAt(q) < keyJ {
  111. maxQ = q
  112. break
  113. }
  114. }
  115. break
  116. }
  117. autNext := i.aut.Accept(autCurr, keyJ)
  118. next, err := i.f.decoder.stateAt(nextAddr, nil)
  119. if err != nil {
  120. return err
  121. }
  122. i.statesStack = append(i.statesStack, next)
  123. i.keysStack = append(i.keysStack, keyJ)
  124. i.keysPosStack = append(i.keysPosStack, pos)
  125. i.valsStack = append(i.valsStack, nextVal)
  126. i.autStatesStack = append(i.autStatesStack, autNext)
  127. continue
  128. }
  129. if !i.statesStack[len(i.statesStack)-1].Final() ||
  130. !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
  131. bytes.Compare(i.keysStack, key) < 0 {
  132. return i.next(maxQ)
  133. }
  134. return nil
  135. }
  136. // Current returns the key and value currently pointed to by the iterator.
  137. // If the iterator is not pointing at a valid value (because Iterator/Next/Seek)
  138. // returned an error previously, it may return nil,0.
  139. func (i *FSTIterator) Current() ([]byte, uint64) {
  140. curr := i.statesStack[len(i.statesStack)-1]
  141. if curr.Final() {
  142. var total uint64
  143. for _, v := range i.valsStack {
  144. total += v
  145. }
  146. total += curr.FinalOutput()
  147. return i.keysStack, total
  148. }
  149. return nil, 0
  150. }
  151. // Next advances this iterator to the next key/value pair. If there is none
  152. // or the advancement goes beyond the configured endKeyExclusive, then
  153. // ErrIteratorDone is returned.
  154. func (i *FSTIterator) Next() error {
  155. return i.next(-1)
  156. }
  157. func (i *FSTIterator) next(lastOffset int) error {
  158. // remember where we started with keysStack in this next() call
  159. i.nextStart = append(i.nextStart[:0], i.keysStack...)
  160. nextOffset := lastOffset + 1
  161. allowCompare := false
  162. OUTER:
  163. for true {
  164. curr := i.statesStack[len(i.statesStack)-1]
  165. autCurr := i.autStatesStack[len(i.autStatesStack)-1]
  166. if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare {
  167. // check to see if new keystack might have gone too far
  168. if i.endKeyExclusive != nil &&
  169. bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
  170. return ErrIteratorDone
  171. }
  172. cmp := bytes.Compare(i.keysStack, i.nextStart)
  173. if cmp > 0 {
  174. // in final state greater than start key
  175. return nil
  176. }
  177. }
  178. numTrans := curr.NumTransitions()
  179. INNER:
  180. for nextOffset < numTrans {
  181. t := curr.TransitionAt(nextOffset)
  182. autNext := i.aut.Accept(autCurr, t)
  183. if !i.aut.CanMatch(autNext) {
  184. // TODO: potential optimization to skip nextOffset
  185. // forwards more directly to something that the
  186. // automaton likes rather than a linear scan?
  187. nextOffset += 1
  188. continue INNER
  189. }
  190. pos, nextAddr, v := curr.TransitionFor(t)
  191. // the next slot in the statesStack might have an
  192. // fstState instance that we can reuse
  193. var nextPrealloc fstState
  194. if len(i.statesStack) < cap(i.statesStack) {
  195. nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
  196. }
  197. // push onto stack
  198. next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
  199. if err != nil {
  200. return err
  201. }
  202. i.statesStack = append(i.statesStack, next)
  203. i.keysStack = append(i.keysStack, t)
  204. i.keysPosStack = append(i.keysPosStack, pos)
  205. i.valsStack = append(i.valsStack, v)
  206. i.autStatesStack = append(i.autStatesStack, autNext)
  207. nextOffset = 0
  208. allowCompare = true
  209. continue OUTER
  210. }
  211. // no more transitions, so need to backtrack and stack pop
  212. if len(i.statesStack) <= 1 {
  213. // stack len is 1 (root), can't go back further, we're done
  214. break
  215. }
  216. // if the top of the stack represents a linear chain of states
  217. // (i.e., a suffix of nodes linked by single transitions),
  218. // then optimize by popping the suffix in one shot without
  219. // going back all the way to the OUTER loop
  220. var popNum int
  221. for j := len(i.statesStack) - 1; j > 0; j-- {
  222. if j == 1 || i.statesStack[j].NumTransitions() != 1 {
  223. popNum = len(i.statesStack) - 1 - j
  224. break
  225. }
  226. }
  227. if popNum < 1 { // always pop at least 1 entry from the stacks
  228. popNum = 1
  229. }
  230. nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1
  231. allowCompare = false
  232. i.statesStack = i.statesStack[:len(i.statesStack)-popNum]
  233. i.keysStack = i.keysStack[:len(i.keysStack)-popNum]
  234. i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum]
  235. i.valsStack = i.valsStack[:len(i.valsStack)-popNum]
  236. i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum]
  237. }
  238. return ErrIteratorDone
  239. }
  240. // Seek advances this iterator to the specified key/value pair. If this key
  241. // is not in the FST, Current() will return the next largest key. If this
  242. // seek operation would go past the last key, or outside the configured
  243. // startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
  244. func (i *FSTIterator) Seek(key []byte) error {
  245. return i.pointTo(key)
  246. }
  247. // Close will free any resources held by this iterator.
  248. func (i *FSTIterator) Close() error {
  249. // at the moment we don't do anything,
  250. // but wanted this for API completeness
  251. return nil
  252. }