You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

dict.go 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "bytes"
  17. "fmt"
  18. "github.com/RoaringBitmap/roaring"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/index/scorch/segment"
  21. "github.com/couchbase/vellum"
  22. )
  23. // Dictionary is the zap representation of the term dictionary
  24. type Dictionary struct {
  25. sb *SegmentBase
  26. field string
  27. fieldID uint16
  28. fst *vellum.FST
  29. fstReader *vellum.Reader
  30. }
  31. // PostingsList returns the postings list for the specified term
  32. func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
  33. prealloc segment.PostingsList) (segment.PostingsList, error) {
  34. var preallocPL *PostingsList
  35. pl, ok := prealloc.(*PostingsList)
  36. if ok && pl != nil {
  37. preallocPL = pl
  38. }
  39. return d.postingsList(term, except, preallocPL)
  40. }
  41. func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  42. if d.fstReader == nil {
  43. if rv == nil || rv == emptyPostingsList {
  44. return emptyPostingsList, nil
  45. }
  46. return d.postingsListInit(rv, except), nil
  47. }
  48. postingsOffset, exists, err := d.fstReader.Get(term)
  49. if err != nil {
  50. return nil, fmt.Errorf("vellum err: %v", err)
  51. }
  52. if !exists {
  53. if rv == nil || rv == emptyPostingsList {
  54. return emptyPostingsList, nil
  55. }
  56. return d.postingsListInit(rv, except), nil
  57. }
  58. return d.postingsListFromOffset(postingsOffset, except, rv)
  59. }
  60. func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  61. rv = d.postingsListInit(rv, except)
  62. err := rv.read(postingsOffset, d)
  63. if err != nil {
  64. return nil, err
  65. }
  66. return rv, nil
  67. }
  68. func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
  69. if rv == nil || rv == emptyPostingsList {
  70. rv = &PostingsList{}
  71. } else {
  72. postings := rv.postings
  73. if postings != nil {
  74. postings.Clear()
  75. }
  76. *rv = PostingsList{} // clear the struct
  77. rv.postings = postings
  78. }
  79. rv.sb = d.sb
  80. rv.except = except
  81. return rv
  82. }
  83. func (d *Dictionary) Contains(key []byte) (bool, error) {
  84. return d.fst.Contains(key)
  85. }
  86. // Iterator returns an iterator for this dictionary
  87. func (d *Dictionary) Iterator() segment.DictionaryIterator {
  88. rv := &DictionaryIterator{
  89. d: d,
  90. }
  91. if d.fst != nil {
  92. itr, err := d.fst.Iterator(nil, nil)
  93. if err == nil {
  94. rv.itr = itr
  95. } else if err != vellum.ErrIteratorDone {
  96. rv.err = err
  97. }
  98. }
  99. return rv
  100. }
  101. // PrefixIterator returns an iterator which only visits terms having the
  102. // the specified prefix
  103. func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
  104. rv := &DictionaryIterator{
  105. d: d,
  106. }
  107. kBeg := []byte(prefix)
  108. kEnd := segment.IncrementBytes(kBeg)
  109. if d.fst != nil {
  110. itr, err := d.fst.Iterator(kBeg, kEnd)
  111. if err == nil {
  112. rv.itr = itr
  113. } else if err != vellum.ErrIteratorDone {
  114. rv.err = err
  115. }
  116. }
  117. return rv
  118. }
  119. // RangeIterator returns an iterator which only visits terms between the
  120. // start and end terms. NOTE: bleve.index API specifies the end is inclusive.
  121. func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
  122. rv := &DictionaryIterator{
  123. d: d,
  124. }
  125. // need to increment the end position to be inclusive
  126. var endBytes []byte
  127. if len(end) > 0 {
  128. endBytes = []byte(end)
  129. if endBytes[len(endBytes)-1] < 0xff {
  130. endBytes[len(endBytes)-1]++
  131. } else {
  132. endBytes = append(endBytes, 0xff)
  133. }
  134. }
  135. if d.fst != nil {
  136. itr, err := d.fst.Iterator([]byte(start), endBytes)
  137. if err == nil {
  138. rv.itr = itr
  139. } else if err != vellum.ErrIteratorDone {
  140. rv.err = err
  141. }
  142. }
  143. return rv
  144. }
  145. // AutomatonIterator returns an iterator which only visits terms
  146. // having the the vellum automaton and start/end key range
  147. func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
  148. startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
  149. rv := &DictionaryIterator{
  150. d: d,
  151. }
  152. if d.fst != nil {
  153. itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
  154. if err == nil {
  155. rv.itr = itr
  156. } else if err != vellum.ErrIteratorDone {
  157. rv.err = err
  158. }
  159. }
  160. return rv
  161. }
  162. func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
  163. includeCount bool) segment.DictionaryIterator {
  164. rv := &DictionaryIterator{
  165. d: d,
  166. omitCount: !includeCount,
  167. }
  168. var buf bytes.Buffer
  169. builder, err := vellum.New(&buf, nil)
  170. if err != nil {
  171. rv.err = err
  172. return rv
  173. }
  174. for _, term := range onlyTerms {
  175. err = builder.Insert(term, 0)
  176. if err != nil {
  177. rv.err = err
  178. return rv
  179. }
  180. }
  181. err = builder.Close()
  182. if err != nil {
  183. rv.err = err
  184. return rv
  185. }
  186. onlyFST, err := vellum.Load(buf.Bytes())
  187. if err != nil {
  188. rv.err = err
  189. return rv
  190. }
  191. itr, err := d.fst.Search(onlyFST, nil, nil)
  192. if err == nil {
  193. rv.itr = itr
  194. } else if err != vellum.ErrIteratorDone {
  195. rv.err = err
  196. }
  197. return rv
  198. }
  199. // DictionaryIterator is an iterator for term dictionary
  200. type DictionaryIterator struct {
  201. d *Dictionary
  202. itr vellum.Iterator
  203. err error
  204. tmp PostingsList
  205. entry index.DictEntry
  206. omitCount bool
  207. }
  208. // Next returns the next entry in the dictionary
  209. func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
  210. if i.err != nil && i.err != vellum.ErrIteratorDone {
  211. return nil, i.err
  212. } else if i.itr == nil || i.err == vellum.ErrIteratorDone {
  213. return nil, nil
  214. }
  215. term, postingsOffset := i.itr.Current()
  216. i.entry.Term = string(term)
  217. if !i.omitCount {
  218. i.err = i.tmp.read(postingsOffset, i.d)
  219. if i.err != nil {
  220. return nil, i.err
  221. }
  222. i.entry.Count = i.tmp.Count()
  223. }
  224. i.err = i.itr.Next()
  225. return &i.entry, nil
  226. }