You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "fmt"
  17. "github.com/RoaringBitmap/roaring"
  18. "github.com/blevesearch/bleve/index"
  19. "github.com/blevesearch/bleve/index/scorch/segment"
  20. "github.com/couchbase/vellum"
  21. "github.com/couchbase/vellum/regexp"
  22. )
  23. // Dictionary is the zap representation of the term dictionary
  24. type Dictionary struct {
  25. sb *SegmentBase
  26. field string
  27. fieldID uint16
  28. fst *vellum.FST
  29. }
  30. // PostingsList returns the postings list for the specified term
  31. func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
  32. return d.postingsList([]byte(term), except, nil)
  33. }
  34. func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  35. if d.fst == nil {
  36. return d.postingsListInit(rv, except), nil
  37. }
  38. postingsOffset, exists, err := d.fst.Get(term)
  39. if err != nil {
  40. return nil, fmt.Errorf("vellum err: %v", err)
  41. }
  42. if !exists {
  43. return d.postingsListInit(rv, except), nil
  44. }
  45. return d.postingsListFromOffset(postingsOffset, except, rv)
  46. }
  47. func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
  48. rv = d.postingsListInit(rv, except)
  49. err := rv.read(postingsOffset, d)
  50. if err != nil {
  51. return nil, err
  52. }
  53. return rv, nil
  54. }
  55. func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
  56. if rv == nil {
  57. rv = &PostingsList{}
  58. } else {
  59. *rv = PostingsList{} // clear the struct
  60. }
  61. rv.sb = d.sb
  62. rv.except = except
  63. return rv
  64. }
  65. // Iterator returns an iterator for this dictionary
  66. func (d *Dictionary) Iterator() segment.DictionaryIterator {
  67. rv := &DictionaryIterator{
  68. d: d,
  69. }
  70. if d.fst != nil {
  71. itr, err := d.fst.Iterator(nil, nil)
  72. if err == nil {
  73. rv.itr = itr
  74. }
  75. }
  76. return rv
  77. }
  78. // PrefixIterator returns an iterator which only visits terms having the
  79. // the specified prefix
  80. func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
  81. rv := &DictionaryIterator{
  82. d: d,
  83. }
  84. if d.fst != nil {
  85. r, err := regexp.New(prefix + ".*")
  86. if err == nil {
  87. itr, err := d.fst.Search(r, nil, nil)
  88. if err == nil {
  89. rv.itr = itr
  90. }
  91. }
  92. }
  93. return rv
  94. }
  95. // RangeIterator returns an iterator which only visits terms between the
  96. // start and end terms. NOTE: bleve.index API specifies the end is inclusive.
  97. func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
  98. rv := &DictionaryIterator{
  99. d: d,
  100. }
  101. // need to increment the end position to be inclusive
  102. endBytes := []byte(end)
  103. if endBytes[len(endBytes)-1] < 0xff {
  104. endBytes[len(endBytes)-1]++
  105. } else {
  106. endBytes = append(endBytes, 0xff)
  107. }
  108. if d.fst != nil {
  109. itr, err := d.fst.Iterator([]byte(start), endBytes)
  110. if err == nil {
  111. rv.itr = itr
  112. }
  113. }
  114. return rv
  115. }
  116. // DictionaryIterator is an iterator for term dictionary
  117. type DictionaryIterator struct {
  118. d *Dictionary
  119. itr vellum.Iterator
  120. err error
  121. tmp PostingsList
  122. }
  123. // Next returns the next entry in the dictionary
  124. func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
  125. if i.itr == nil || i.err == vellum.ErrIteratorDone {
  126. return nil, nil
  127. } else if i.err != nil {
  128. return nil, i.err
  129. }
  130. term, postingsOffset := i.itr.Current()
  131. i.err = i.tmp.read(postingsOffset, i.d)
  132. if i.err != nil {
  133. return nil, i.err
  134. }
  135. rv := &index.DictEntry{
  136. Term: string(term),
  137. Count: i.tmp.Count(),
  138. }
  139. i.err = i.itr.Next()
  140. return rv, nil
  141. }