You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snapshot_index_tfr.go 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package scorch
  15. import (
  16. "bytes"
  17. "fmt"
  18. "reflect"
  19. "sync/atomic"
  20. "github.com/blevesearch/bleve/v2/size"
  21. index "github.com/blevesearch/bleve_index_api"
  22. segment "github.com/blevesearch/scorch_segment_api/v2"
  23. )
  24. var reflectStaticSizeIndexSnapshotTermFieldReader int
  25. func init() {
  26. var istfr IndexSnapshotTermFieldReader
  27. reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
  28. }
  29. type IndexSnapshotTermFieldReader struct {
  30. term []byte
  31. field string
  32. snapshot *IndexSnapshot
  33. dicts []segment.TermDictionary
  34. postings []segment.PostingsList
  35. iterators []segment.PostingsIterator
  36. segmentOffset int
  37. includeFreq bool
  38. includeNorm bool
  39. includeTermVectors bool
  40. currPosting segment.Posting
  41. currID index.IndexInternalID
  42. recycle bool
  43. }
  44. func (i *IndexSnapshotTermFieldReader) Size() int {
  45. sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
  46. len(i.term) +
  47. len(i.field) +
  48. len(i.currID)
  49. for _, entry := range i.postings {
  50. sizeInBytes += entry.Size()
  51. }
  52. for _, entry := range i.iterators {
  53. sizeInBytes += entry.Size()
  54. }
  55. if i.currPosting != nil {
  56. sizeInBytes += i.currPosting.Size()
  57. }
  58. return sizeInBytes
  59. }
  60. func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
  61. rv := preAlloced
  62. if rv == nil {
  63. rv = &index.TermFieldDoc{}
  64. }
  65. // find the next hit
  66. for i.segmentOffset < len(i.iterators) {
  67. next, err := i.iterators[i.segmentOffset].Next()
  68. if err != nil {
  69. return nil, err
  70. }
  71. if next != nil {
  72. // make segment number into global number by adding offset
  73. globalOffset := i.snapshot.offsets[i.segmentOffset]
  74. nnum := next.Number()
  75. rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
  76. i.postingToTermFieldDoc(next, rv)
  77. i.currID = rv.ID
  78. i.currPosting = next
  79. return rv, nil
  80. }
  81. i.segmentOffset++
  82. }
  83. return nil, nil
  84. }
  85. func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
  86. if i.includeFreq {
  87. rv.Freq = next.Frequency()
  88. }
  89. if i.includeNorm {
  90. rv.Norm = next.Norm()
  91. }
  92. if i.includeTermVectors {
  93. locs := next.Locations()
  94. if cap(rv.Vectors) < len(locs) {
  95. rv.Vectors = make([]*index.TermFieldVector, len(locs))
  96. backing := make([]index.TermFieldVector, len(locs))
  97. for i := range backing {
  98. rv.Vectors[i] = &backing[i]
  99. }
  100. }
  101. rv.Vectors = rv.Vectors[:len(locs)]
  102. for i, loc := range locs {
  103. *rv.Vectors[i] = index.TermFieldVector{
  104. Start: loc.Start(),
  105. End: loc.End(),
  106. Pos: loc.Pos(),
  107. ArrayPositions: loc.ArrayPositions(),
  108. Field: loc.Field(),
  109. }
  110. }
  111. }
  112. }
  113. func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
  114. // FIXME do something better
  115. // for now, if we need to seek backwards, then restart from the beginning
  116. if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
  117. i2, err := i.snapshot.TermFieldReader(i.term, i.field,
  118. i.includeFreq, i.includeNorm, i.includeTermVectors)
  119. if err != nil {
  120. return nil, err
  121. }
  122. // close the current term field reader before replacing it with a new one
  123. _ = i.Close()
  124. *i = *(i2.(*IndexSnapshotTermFieldReader))
  125. }
  126. num, err := docInternalToNumber(ID)
  127. if err != nil {
  128. return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
  129. }
  130. segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
  131. if segIndex >= len(i.snapshot.segment) {
  132. return nil, fmt.Errorf("computed segment index %d out of bounds %d",
  133. segIndex, len(i.snapshot.segment))
  134. }
  135. // skip directly to the target segment
  136. i.segmentOffset = segIndex
  137. next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
  138. if err != nil {
  139. return nil, err
  140. }
  141. if next == nil {
  142. // we jumped directly to the segment that should have contained it
  143. // but it wasn't there, so reuse Next() which should correctly
  144. // get the next hit after it (we moved i.segmentOffset)
  145. return i.Next(preAlloced)
  146. }
  147. if preAlloced == nil {
  148. preAlloced = &index.TermFieldDoc{}
  149. }
  150. preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
  151. i.snapshot.offsets[segIndex])
  152. i.postingToTermFieldDoc(next, preAlloced)
  153. i.currID = preAlloced.ID
  154. i.currPosting = next
  155. return preAlloced, nil
  156. }
  157. func (i *IndexSnapshotTermFieldReader) Count() uint64 {
  158. var rv uint64
  159. for _, posting := range i.postings {
  160. rv += posting.Count()
  161. }
  162. return rv
  163. }
  164. func (i *IndexSnapshotTermFieldReader) Close() error {
  165. if i.snapshot != nil {
  166. atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
  167. i.snapshot.recycleTermFieldReader(i)
  168. }
  169. return nil
  170. }