You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

posting.go 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "encoding/binary"
  17. "fmt"
  18. "math"
  19. "reflect"
  20. "github.com/RoaringBitmap/roaring"
  21. segment "github.com/blevesearch/scorch_segment_api/v2"
  22. )
  23. var reflectStaticSizePostingsList int
  24. var reflectStaticSizePostingsIterator int
  25. var reflectStaticSizePosting int
  26. var reflectStaticSizeLocation int
  27. func init() {
  28. var pl PostingsList
  29. reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
  30. var pi PostingsIterator
  31. reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
  32. var p Posting
  33. reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
  34. var l Location
  35. reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
  36. }
  37. // FST or vellum value (uint64) encoding is determined by the top two
  38. // highest-order or most significant bits...
  39. //
  40. // encoding : MSB
  41. // name : 63 62 61...to...bit #0 (LSB)
  42. // ----------+---+---+---------------------------------------------------
  43. // general : 0 | 0 | 62-bits of postingsOffset.
  44. // ~ : 0 | 1 | reserved for future.
  45. // 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
  46. // ~ : 1 | 1 | reserved for future.
  47. //
  48. // Encoding "general" is able to handle all cases, where the
  49. // postingsOffset points to more information about the postings for
  50. // the term.
  51. //
  52. // Encoding "1-hit" is used to optimize a commonly seen case when a
  53. // term has only a single hit. For example, a term in the _id field
  54. // will have only 1 hit. The "1-hit" encoding is used for a term
  55. // in a field when...
  56. //
  57. // - term vector info is disabled for that field;
  58. // - and, the term appears in only a single doc for that field;
  59. // - and, the term's freq is exactly 1 in that single doc for that field;
  60. // - and, the docNum must fit into 31-bits;
  61. //
  62. // Otherwise, the "general" encoding is used instead.
  63. //
  64. // In the "1-hit" encoding, the field in that single doc may have
  65. // other terms, which is supported in the "1-hit" encoding by the
  66. // positive float31 norm.
  67. const FSTValEncodingMask = uint64(0xc000000000000000)
  68. const FSTValEncodingGeneral = uint64(0x0000000000000000)
  69. const FSTValEncoding1Hit = uint64(0x8000000000000000)
  70. func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
  71. return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
  72. }
  73. func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
  74. return (mask31Bits & v), (mask31Bits & (v >> 31))
  75. }
  76. const mask31Bits = uint64(0x000000007fffffff)
  77. func under32Bits(x uint64) bool {
  78. return x <= mask31Bits
  79. }
  80. const DocNum1HitFinished = math.MaxUint64
  81. var NormBits1Hit = uint64(math.Float32bits(float32(1)))
  82. // PostingsList is an in-memory representation of a postings list
  83. type PostingsList struct {
  84. sb *SegmentBase
  85. postingsOffset uint64
  86. freqOffset uint64
  87. locOffset uint64
  88. postings *roaring.Bitmap
  89. except *roaring.Bitmap
  90. // when normBits1Hit != 0, then this postings list came from a
  91. // 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
  92. docNum1Hit uint64
  93. normBits1Hit uint64
  94. }
  95. // represents an immutable, empty postings list
  96. var emptyPostingsList = &PostingsList{}
  97. func (p *PostingsList) Size() int {
  98. sizeInBytes := reflectStaticSizePostingsList + SizeOfPtr
  99. if p.except != nil {
  100. sizeInBytes += int(p.except.GetSizeInBytes())
  101. }
  102. return sizeInBytes
  103. }
  104. func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
  105. if p.normBits1Hit != 0 {
  106. receiver.Add(uint32(p.docNum1Hit))
  107. return
  108. }
  109. if p.postings != nil {
  110. receiver.Or(p.postings)
  111. }
  112. }
  113. // Iterator returns an iterator for this postings list
  114. func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
  115. prealloc segment.PostingsIterator) segment.PostingsIterator {
  116. if p.normBits1Hit == 0 && p.postings == nil {
  117. return emptyPostingsIterator
  118. }
  119. var preallocPI *PostingsIterator
  120. pi, ok := prealloc.(*PostingsIterator)
  121. if ok && pi != nil {
  122. preallocPI = pi
  123. }
  124. if preallocPI == emptyPostingsIterator {
  125. preallocPI = nil
  126. }
  127. return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
  128. }
  129. func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
  130. rv *PostingsIterator) *PostingsIterator {
  131. if rv == nil {
  132. rv = &PostingsIterator{}
  133. } else {
  134. freqNormReader := rv.freqNormReader
  135. if freqNormReader != nil {
  136. freqNormReader.Reset([]byte(nil))
  137. }
  138. locReader := rv.locReader
  139. if locReader != nil {
  140. locReader.Reset([]byte(nil))
  141. }
  142. freqChunkOffsets := rv.freqChunkOffsets[:0]
  143. locChunkOffsets := rv.locChunkOffsets[:0]
  144. nextLocs := rv.nextLocs[:0]
  145. nextSegmentLocs := rv.nextSegmentLocs[:0]
  146. buf := rv.buf
  147. *rv = PostingsIterator{} // clear the struct
  148. rv.freqNormReader = freqNormReader
  149. rv.locReader = locReader
  150. rv.freqChunkOffsets = freqChunkOffsets
  151. rv.locChunkOffsets = locChunkOffsets
  152. rv.nextLocs = nextLocs
  153. rv.nextSegmentLocs = nextSegmentLocs
  154. rv.buf = buf
  155. }
  156. rv.postings = p
  157. rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
  158. rv.includeLocs = includeLocs
  159. if p.normBits1Hit != 0 {
  160. // "1-hit" encoding
  161. rv.docNum1Hit = p.docNum1Hit
  162. rv.normBits1Hit = p.normBits1Hit
  163. if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
  164. rv.docNum1Hit = DocNum1HitFinished
  165. }
  166. return rv
  167. }
  168. // "general" encoding, check if empty
  169. if p.postings == nil {
  170. return rv
  171. }
  172. var n uint64
  173. var read int
  174. // prepare the freq chunk details
  175. if rv.includeFreqNorm {
  176. var numFreqChunks uint64
  177. numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
  178. n += uint64(read)
  179. if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
  180. rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
  181. } else {
  182. rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
  183. }
  184. for i := 0; i < int(numFreqChunks); i++ {
  185. rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
  186. n += uint64(read)
  187. }
  188. rv.freqChunkStart = p.freqOffset + n
  189. }
  190. // prepare the loc chunk details
  191. if rv.includeLocs {
  192. n = 0
  193. var numLocChunks uint64
  194. numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
  195. n += uint64(read)
  196. if cap(rv.locChunkOffsets) >= int(numLocChunks) {
  197. rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
  198. } else {
  199. rv.locChunkOffsets = make([]uint64, int(numLocChunks))
  200. }
  201. for i := 0; i < int(numLocChunks); i++ {
  202. rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
  203. n += uint64(read)
  204. }
  205. rv.locChunkStart = p.locOffset + n
  206. }
  207. rv.all = p.postings.Iterator()
  208. if p.except != nil {
  209. rv.ActualBM = roaring.AndNot(p.postings, p.except)
  210. rv.Actual = rv.ActualBM.Iterator()
  211. } else {
  212. rv.ActualBM = p.postings
  213. rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
  214. }
  215. return rv
  216. }
  217. // Count returns the number of items on this postings list
  218. func (p *PostingsList) Count() uint64 {
  219. var n, e uint64
  220. if p.normBits1Hit != 0 {
  221. n = 1
  222. if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
  223. e = 1
  224. }
  225. } else if p.postings != nil {
  226. n = p.postings.GetCardinality()
  227. if p.except != nil {
  228. e = p.postings.AndCardinality(p.except)
  229. }
  230. }
  231. return n - e
  232. }
  233. func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
  234. rv.postingsOffset = postingsOffset
  235. // handle "1-hit" encoding special case
  236. if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
  237. return rv.init1Hit(postingsOffset)
  238. }
  239. // read the location of the freq/norm details
  240. var n uint64
  241. var read int
  242. rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
  243. n += uint64(read)
  244. rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
  245. n += uint64(read)
  246. var postingsLen uint64
  247. postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
  248. n += uint64(read)
  249. roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
  250. if rv.postings == nil {
  251. rv.postings = roaring.NewBitmap()
  252. }
  253. _, err := rv.postings.FromBuffer(roaringBytes)
  254. if err != nil {
  255. return fmt.Errorf("error loading roaring bitmap: %v", err)
  256. }
  257. return nil
  258. }
  259. func (rv *PostingsList) init1Hit(fstVal uint64) error {
  260. docNum, normBits := FSTValDecode1Hit(fstVal)
  261. rv.docNum1Hit = docNum
  262. rv.normBits1Hit = normBits
  263. return nil
  264. }
  265. // PostingsIterator provides a way to iterate through the postings list
  266. type PostingsIterator struct {
  267. postings *PostingsList
  268. all roaring.IntPeekable
  269. Actual roaring.IntPeekable
  270. ActualBM *roaring.Bitmap
  271. currChunk uint32
  272. currChunkFreqNorm []byte
  273. currChunkLoc []byte
  274. freqNormReader *memUvarintReader
  275. locReader *memUvarintReader
  276. freqChunkOffsets []uint64
  277. freqChunkStart uint64
  278. locChunkOffsets []uint64
  279. locChunkStart uint64
  280. next Posting // reused across Next() calls
  281. nextLocs []Location // reused across Next() calls
  282. nextSegmentLocs []segment.Location // reused across Next() calls
  283. docNum1Hit uint64
  284. normBits1Hit uint64
  285. buf []byte
  286. includeFreqNorm bool
  287. includeLocs bool
  288. }
  289. var emptyPostingsIterator = &PostingsIterator{}
  290. func (i *PostingsIterator) Size() int {
  291. sizeInBytes := reflectStaticSizePostingsIterator + SizeOfPtr +
  292. len(i.currChunkFreqNorm) +
  293. len(i.currChunkLoc) +
  294. len(i.freqChunkOffsets)*SizeOfUint64 +
  295. len(i.locChunkOffsets)*SizeOfUint64 +
  296. i.next.Size()
  297. for _, entry := range i.nextLocs {
  298. sizeInBytes += entry.Size()
  299. }
  300. return sizeInBytes
  301. }
  302. func (i *PostingsIterator) loadChunk(chunk int) error {
  303. if i.includeFreqNorm {
  304. if chunk >= len(i.freqChunkOffsets) {
  305. return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
  306. chunk, len(i.freqChunkOffsets))
  307. }
  308. end, start := i.freqChunkStart, i.freqChunkStart
  309. s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
  310. start += s
  311. end += e
  312. i.currChunkFreqNorm = i.postings.sb.mem[start:end]
  313. if i.freqNormReader == nil {
  314. i.freqNormReader = newMemUvarintReader(i.currChunkFreqNorm)
  315. } else {
  316. i.freqNormReader.Reset(i.currChunkFreqNorm)
  317. }
  318. }
  319. if i.includeLocs {
  320. if chunk >= len(i.locChunkOffsets) {
  321. return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
  322. chunk, len(i.locChunkOffsets))
  323. }
  324. end, start := i.locChunkStart, i.locChunkStart
  325. s, e := readChunkBoundary(chunk, i.locChunkOffsets)
  326. start += s
  327. end += e
  328. i.currChunkLoc = i.postings.sb.mem[start:end]
  329. if i.locReader == nil {
  330. i.locReader = newMemUvarintReader(i.currChunkLoc)
  331. } else {
  332. i.locReader.Reset(i.currChunkLoc)
  333. }
  334. }
  335. i.currChunk = uint32(chunk)
  336. return nil
  337. }
  338. func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
  339. if i.normBits1Hit != 0 {
  340. return 1, i.normBits1Hit, false, nil
  341. }
  342. freqHasLocs, err := i.freqNormReader.ReadUvarint()
  343. if err != nil {
  344. return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
  345. }
  346. freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
  347. normBits, err := i.freqNormReader.ReadUvarint()
  348. if err != nil {
  349. return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
  350. }
  351. return freq, normBits, hasLocs, nil
  352. }
  353. func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
  354. if i.normBits1Hit != 0 {
  355. return false, nil
  356. }
  357. freqHasLocs, err := i.freqNormReader.ReadUvarint()
  358. if err != nil {
  359. return false, fmt.Errorf("error reading freqHasLocs: %v", err)
  360. }
  361. i.freqNormReader.SkipUvarint() // Skip normBits.
  362. return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
  363. }
  364. func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
  365. rv := freq << 1
  366. if hasLocs {
  367. rv = rv | 0x01 // 0'th LSB encodes whether there are locations
  368. }
  369. return rv
  370. }
  371. func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
  372. freq := freqHasLocs >> 1
  373. hasLocs := freqHasLocs&0x01 != 0
  374. return freq, hasLocs
  375. }
  376. // readLocation processes all the integers on the stream representing a single
  377. // location.
  378. func (i *PostingsIterator) readLocation(l *Location) error {
  379. // read off field
  380. fieldID, err := i.locReader.ReadUvarint()
  381. if err != nil {
  382. return fmt.Errorf("error reading location field: %v", err)
  383. }
  384. // read off pos
  385. pos, err := i.locReader.ReadUvarint()
  386. if err != nil {
  387. return fmt.Errorf("error reading location pos: %v", err)
  388. }
  389. // read off start
  390. start, err := i.locReader.ReadUvarint()
  391. if err != nil {
  392. return fmt.Errorf("error reading location start: %v", err)
  393. }
  394. // read off end
  395. end, err := i.locReader.ReadUvarint()
  396. if err != nil {
  397. return fmt.Errorf("error reading location end: %v", err)
  398. }
  399. // read off num array pos
  400. numArrayPos, err := i.locReader.ReadUvarint()
  401. if err != nil {
  402. return fmt.Errorf("error reading location num array pos: %v", err)
  403. }
  404. l.field = i.postings.sb.fieldsInv[fieldID]
  405. l.pos = pos
  406. l.start = start
  407. l.end = end
  408. if cap(l.ap) < int(numArrayPos) {
  409. l.ap = make([]uint64, int(numArrayPos))
  410. } else {
  411. l.ap = l.ap[:int(numArrayPos)]
  412. }
  413. // read off array positions
  414. for k := 0; k < int(numArrayPos); k++ {
  415. ap, err := i.locReader.ReadUvarint()
  416. if err != nil {
  417. return fmt.Errorf("error reading array position: %v", err)
  418. }
  419. l.ap[k] = ap
  420. }
  421. return nil
  422. }
  423. // Next returns the next posting on the postings list, or nil at the end
  424. func (i *PostingsIterator) Next() (segment.Posting, error) {
  425. return i.nextAtOrAfter(0)
  426. }
  427. // Advance returns the posting at the specified docNum or it is not present
  428. // the next posting, or if the end is reached, nil
  429. func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
  430. return i.nextAtOrAfter(docNum)
  431. }
  432. // Next returns the next posting on the postings list, or nil at the end
  433. func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
  434. docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
  435. if err != nil || !exists {
  436. return nil, err
  437. }
  438. i.next = Posting{} // clear the struct
  439. rv := &i.next
  440. rv.docNum = docNum
  441. if !i.includeFreqNorm {
  442. return rv, nil
  443. }
  444. var normBits uint64
  445. var hasLocs bool
  446. rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
  447. if err != nil {
  448. return nil, err
  449. }
  450. rv.norm = math.Float32frombits(uint32(normBits))
  451. if i.includeLocs && hasLocs {
  452. // prepare locations into reused slices, where we assume
  453. // rv.freq >= "number of locs", since in a composite field,
  454. // some component fields might have their IncludeTermVector
  455. // flags disabled while other component fields are enabled
  456. if cap(i.nextLocs) >= int(rv.freq) {
  457. i.nextLocs = i.nextLocs[0:rv.freq]
  458. } else {
  459. i.nextLocs = make([]Location, rv.freq, rv.freq*2)
  460. }
  461. if cap(i.nextSegmentLocs) < int(rv.freq) {
  462. i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
  463. }
  464. rv.locs = i.nextSegmentLocs[:0]
  465. numLocsBytes, err := i.locReader.ReadUvarint()
  466. if err != nil {
  467. return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
  468. }
  469. j := 0
  470. startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
  471. for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
  472. err := i.readLocation(&i.nextLocs[j])
  473. if err != nil {
  474. return nil, err
  475. }
  476. rv.locs = append(rv.locs, &i.nextLocs[j])
  477. j++
  478. }
  479. }
  480. return rv, nil
  481. }
  482. var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
  483. // nextBytes returns the docNum and the encoded freq & loc bytes for
  484. // the next posting
  485. func (i *PostingsIterator) nextBytes() (
  486. docNumOut uint64, freq uint64, normBits uint64,
  487. bytesFreqNorm []byte, bytesLoc []byte, err error) {
  488. docNum, exists, err := i.nextDocNumAtOrAfter(0)
  489. if err != nil || !exists {
  490. return 0, 0, 0, nil, nil, err
  491. }
  492. if i.normBits1Hit != 0 {
  493. if i.buf == nil {
  494. i.buf = make([]byte, binary.MaxVarintLen64*2)
  495. }
  496. n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
  497. n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
  498. return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
  499. }
  500. startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
  501. var hasLocs bool
  502. freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
  503. if err != nil {
  504. return 0, 0, 0, nil, nil, err
  505. }
  506. endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
  507. bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
  508. if hasLocs {
  509. startLoc := len(i.currChunkLoc) - i.locReader.Len()
  510. numLocsBytes, err := i.locReader.ReadUvarint()
  511. if err != nil {
  512. return 0, 0, 0, nil, nil,
  513. fmt.Errorf("error reading location nextBytes numLocs: %v", err)
  514. }
  515. // skip over all the location bytes
  516. i.locReader.SkipBytes(int(numLocsBytes))
  517. endLoc := len(i.currChunkLoc) - i.locReader.Len()
  518. bytesLoc = i.currChunkLoc[startLoc:endLoc]
  519. }
  520. return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
  521. }
  522. // nextDocNum returns the next docNum on the postings list, and also
  523. // sets up the currChunk / loc related fields of the iterator.
  524. func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
  525. if i.normBits1Hit != 0 {
  526. if i.docNum1Hit == DocNum1HitFinished {
  527. return 0, false, nil
  528. }
  529. if i.docNum1Hit < atOrAfter {
  530. // advanced past our 1-hit
  531. i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
  532. return 0, false, nil
  533. }
  534. docNum := i.docNum1Hit
  535. i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
  536. return docNum, true, nil
  537. }
  538. if i.Actual == nil || !i.Actual.HasNext() {
  539. return 0, false, nil
  540. }
  541. if i.postings == nil || i.postings.postings == i.ActualBM {
  542. return i.nextDocNumAtOrAfterClean(atOrAfter)
  543. }
  544. i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
  545. if !i.Actual.HasNext() {
  546. // couldn't find anything
  547. return 0, false, nil
  548. }
  549. n := i.Actual.Next()
  550. allN := i.all.Next()
  551. nChunk := n / i.postings.sb.chunkFactor
  552. // when allN becomes >= to here, then allN is in the same chunk as nChunk.
  553. allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
  554. // n is the next actual hit (excluding some postings), and
  555. // allN is the next hit in the full postings, and
  556. // if they don't match, move 'all' forwards until they do
  557. for allN != n {
  558. // we've reached same chunk, so move the freq/norm/loc decoders forward
  559. if i.includeFreqNorm && allN >= allNReachesNChunk {
  560. err := i.currChunkNext(nChunk)
  561. if err != nil {
  562. return 0, false, err
  563. }
  564. }
  565. allN = i.all.Next()
  566. }
  567. if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
  568. err := i.loadChunk(int(nChunk))
  569. if err != nil {
  570. return 0, false, fmt.Errorf("error loading chunk: %v", err)
  571. }
  572. }
  573. return uint64(n), true, nil
  574. }
  575. // optimization when the postings list is "clean" (e.g., no updates &
  576. // no deletions) where the all bitmap is the same as the actual bitmap
  577. func (i *PostingsIterator) nextDocNumAtOrAfterClean(
  578. atOrAfter uint64) (uint64, bool, error) {
  579. if !i.includeFreqNorm {
  580. i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
  581. if !i.Actual.HasNext() {
  582. return 0, false, nil // couldn't find anything
  583. }
  584. return uint64(i.Actual.Next()), true, nil
  585. }
  586. // freq-norm's needed, so maintain freq-norm chunk reader
  587. sameChunkNexts := 0 // # of times we called Next() in the same chunk
  588. n := i.Actual.Next()
  589. nChunk := n / i.postings.sb.chunkFactor
  590. for uint64(n) < atOrAfter && i.Actual.HasNext() {
  591. n = i.Actual.Next()
  592. nChunkPrev := nChunk
  593. nChunk = n / i.postings.sb.chunkFactor
  594. if nChunk != nChunkPrev {
  595. sameChunkNexts = 0
  596. } else {
  597. sameChunkNexts += 1
  598. }
  599. }
  600. if uint64(n) < atOrAfter {
  601. // couldn't find anything
  602. return 0, false, nil
  603. }
  604. for j := 0; j < sameChunkNexts; j++ {
  605. err := i.currChunkNext(nChunk)
  606. if err != nil {
  607. return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
  608. }
  609. }
  610. if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
  611. err := i.loadChunk(int(nChunk))
  612. if err != nil {
  613. return 0, false, fmt.Errorf("error loading chunk: %v", err)
  614. }
  615. }
  616. return uint64(n), true, nil
  617. }
  618. func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
  619. if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
  620. err := i.loadChunk(int(nChunk))
  621. if err != nil {
  622. return fmt.Errorf("error loading chunk: %v", err)
  623. }
  624. }
  625. // read off freq/offsets even though we don't care about them
  626. hasLocs, err := i.skipFreqNormReadHasLocs()
  627. if err != nil {
  628. return err
  629. }
  630. if i.includeLocs && hasLocs {
  631. numLocsBytes, err := i.locReader.ReadUvarint()
  632. if err != nil {
  633. return fmt.Errorf("error reading location numLocsBytes: %v", err)
  634. }
  635. // skip over all the location bytes
  636. i.locReader.SkipBytes(int(numLocsBytes))
  637. }
  638. return nil
  639. }
  640. // DocNum1Hit returns the docNum and true if this is "1-hit" optimized
  641. // and the docNum is available.
  642. func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
  643. if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
  644. return p.docNum1Hit, true
  645. }
  646. return 0, false
  647. }
  648. // ActualBitmap returns the underlying actual bitmap
  649. // which can be used up the stack for optimizations
  650. func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap {
  651. return p.ActualBM
  652. }
  653. // ReplaceActual replaces the ActualBM with the provided
  654. // bitmap
  655. func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap) {
  656. p.ActualBM = abm
  657. p.Actual = abm.Iterator()
  658. }
  659. // PostingsIteratorFromBitmap constructs a PostingsIterator given an
  660. // "actual" bitmap.
  661. func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
  662. includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
  663. return &PostingsIterator{
  664. ActualBM: bm,
  665. Actual: bm.Iterator(),
  666. includeFreqNorm: includeFreqNorm,
  667. includeLocs: includeLocs,
  668. }, nil
  669. }
  670. // PostingsIteratorFrom1Hit constructs a PostingsIterator given a
  671. // 1-hit docNum.
  672. func PostingsIteratorFrom1Hit(docNum1Hit uint64,
  673. includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
  674. return &PostingsIterator{
  675. docNum1Hit: docNum1Hit,
  676. normBits1Hit: NormBits1Hit,
  677. includeFreqNorm: includeFreqNorm,
  678. includeLocs: includeLocs,
  679. }, nil
  680. }
  681. // Posting is a single entry in a postings list
  682. type Posting struct {
  683. docNum uint64
  684. freq uint64
  685. norm float32
  686. locs []segment.Location
  687. }
  688. func (p *Posting) Size() int {
  689. sizeInBytes := reflectStaticSizePosting
  690. for _, entry := range p.locs {
  691. sizeInBytes += entry.Size()
  692. }
  693. return sizeInBytes
  694. }
  695. // Number returns the document number of this posting in this segment
  696. func (p *Posting) Number() uint64 {
  697. return p.docNum
  698. }
  699. // Frequency returns the frequencies of occurrence of this term in this doc/field
  700. func (p *Posting) Frequency() uint64 {
  701. return p.freq
  702. }
  703. // Norm returns the normalization factor for this posting
  704. func (p *Posting) Norm() float64 {
  705. return float64(p.norm)
  706. }
  707. // Locations returns the location information for each occurrence
  708. func (p *Posting) Locations() []segment.Location {
  709. return p.locs
  710. }
  711. // Location represents the location of a single occurrence
  712. type Location struct {
  713. field string
  714. pos uint64
  715. start uint64
  716. end uint64
  717. ap []uint64
  718. }
  719. func (l *Location) Size() int {
  720. return reflectStaticSizeLocation +
  721. len(l.field) +
  722. len(l.ap)*SizeOfUint64
  723. }
  724. // Field returns the name of the field (useful in composite fields to know
  725. // which original field the value came from)
  726. func (l *Location) Field() string {
  727. return l.field
  728. }
  729. // Start returns the start byte offset of this occurrence
  730. func (l *Location) Start() uint64 {
  731. return l.start
  732. }
  733. // End returns the end byte offset of this occurrence
  734. func (l *Location) End() uint64 {
  735. return l.end
  736. }
  737. // Pos returns the 1-based phrase position of this occurrence
  738. func (l *Location) Pos() uint64 {
  739. return l.pos
  740. }
  741. // ArrayPositions returns the array position vector associated with this occurrence
  742. func (l *Location) ArrayPositions() []uint64 {
  743. return l.ap
  744. }