You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

dict.go 4.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "fmt"
  17. "github.com/RoaringBitmap/roaring"
  18. index "github.com/blevesearch/bleve_index_api"
  19. segment "github.com/blevesearch/scorch_segment_api/v2"
  20. "github.com/blevesearch/vellum"
  21. )
  22. // Dictionary is the zap representation of the term dictionary
  23. type Dictionary struct {
  24. sb *SegmentBase
  25. field string
  26. fieldID uint16
  27. fst *vellum.FST
  28. fstReader *vellum.Reader
  29. }
  30. // represents an immutable, empty dictionary
  31. var emptyDictionary = &Dictionary{}
  32. // PostingsList returns the postings list for the specified term
  33. func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
  34. prealloc segment.PostingsList) (segment.PostingsList, error) {
  35. var preallocPL *PostingsList
  36. pl, ok := prealloc.(*PostingsList)
  37. if ok && pl != nil {
  38. preallocPL = pl
  39. }
  40. return d.postingsList(term, except, preallocPL)
  41. }
  42. func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  43. if d.fstReader == nil {
  44. if rv == nil || rv == emptyPostingsList {
  45. return emptyPostingsList, nil
  46. }
  47. return d.postingsListInit(rv, except), nil
  48. }
  49. postingsOffset, exists, err := d.fstReader.Get(term)
  50. if err != nil {
  51. return nil, fmt.Errorf("vellum err: %v", err)
  52. }
  53. if !exists {
  54. if rv == nil || rv == emptyPostingsList {
  55. return emptyPostingsList, nil
  56. }
  57. return d.postingsListInit(rv, except), nil
  58. }
  59. return d.postingsListFromOffset(postingsOffset, except, rv)
  60. }
  61. func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  62. rv = d.postingsListInit(rv, except)
  63. err := rv.read(postingsOffset, d)
  64. if err != nil {
  65. return nil, err
  66. }
  67. return rv, nil
  68. }
  69. func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
  70. if rv == nil || rv == emptyPostingsList {
  71. rv = &PostingsList{}
  72. } else {
  73. postings := rv.postings
  74. if postings != nil {
  75. postings.Clear()
  76. }
  77. *rv = PostingsList{} // clear the struct
  78. rv.postings = postings
  79. }
  80. rv.sb = d.sb
  81. rv.except = except
  82. return rv
  83. }
  84. func (d *Dictionary) Contains(key []byte) (bool, error) {
  85. if d.fst != nil {
  86. return d.fst.Contains(key)
  87. }
  88. return false, nil
  89. }
  90. // AutomatonIterator returns an iterator which only visits terms
  91. // having the the vellum automaton and start/end key range
  92. func (d *Dictionary) AutomatonIterator(a segment.Automaton,
  93. startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
  94. if d.fst != nil {
  95. rv := &DictionaryIterator{
  96. d: d,
  97. }
  98. itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
  99. if err == nil {
  100. rv.itr = itr
  101. } else if err != vellum.ErrIteratorDone {
  102. rv.err = err
  103. }
  104. return rv
  105. }
  106. return emptyDictionaryIterator
  107. }
  108. // DictionaryIterator is an iterator for term dictionary
  109. type DictionaryIterator struct {
  110. d *Dictionary
  111. itr vellum.Iterator
  112. err error
  113. tmp PostingsList
  114. entry index.DictEntry
  115. omitCount bool
  116. }
  117. var emptyDictionaryIterator = &DictionaryIterator{}
  118. // Next returns the next entry in the dictionary
  119. func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
  120. if i.err != nil && i.err != vellum.ErrIteratorDone {
  121. return nil, i.err
  122. } else if i.itr == nil || i.err == vellum.ErrIteratorDone {
  123. return nil, nil
  124. }
  125. term, postingsOffset := i.itr.Current()
  126. i.entry.Term = string(term)
  127. if !i.omitCount {
  128. i.err = i.tmp.read(postingsOffset, i.d)
  129. if i.err != nil {
  130. return nil, i.err
  131. }
  132. i.entry.Count = i.tmp.Count()
  133. }
  134. i.err = i.itr.Next()
  135. return &i.entry, nil
  136. }