You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

posting.go 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package zap
  15. import (
  16. "encoding/binary"
  17. "fmt"
  18. "math"
  19. "reflect"
  20. "github.com/RoaringBitmap/roaring"
  21. "github.com/blevesearch/bleve/index/scorch/segment"
  22. "github.com/blevesearch/bleve/size"
  23. )
  24. var reflectStaticSizePostingsList int
  25. var reflectStaticSizePostingsIterator int
  26. var reflectStaticSizePosting int
  27. var reflectStaticSizeLocation int
  28. func init() {
  29. var pl PostingsList
  30. reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
  31. var pi PostingsIterator
  32. reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
  33. var p Posting
  34. reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
  35. var l Location
  36. reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
  37. }
  38. // FST or vellum value (uint64) encoding is determined by the top two
  39. // highest-order or most significant bits...
  40. //
  41. // encoding : MSB
  42. // name : 63 62 61...to...bit #0 (LSB)
  43. // ----------+---+---+---------------------------------------------------
  44. // general : 0 | 0 | 62-bits of postingsOffset.
  45. // ~ : 0 | 1 | reserved for future.
  46. // 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
  47. // ~ : 1 | 1 | reserved for future.
  48. //
  49. // Encoding "general" is able to handle all cases, where the
  50. // postingsOffset points to more information about the postings for
  51. // the term.
  52. //
  53. // Encoding "1-hit" is used to optimize a commonly seen case when a
  54. // term has only a single hit. For example, a term in the _id field
  55. // will have only 1 hit. The "1-hit" encoding is used for a term
  56. // in a field when...
  57. //
  58. // - term vector info is disabled for that field;
  59. // - and, the term appears in only a single doc for that field;
  60. // - and, the term's freq is exactly 1 in that single doc for that field;
  61. // - and, the docNum must fit into 31-bits;
  62. //
  63. // Otherwise, the "general" encoding is used instead.
  64. //
  65. // In the "1-hit" encoding, the field in that single doc may have
  66. // other terms, which is supported in the "1-hit" encoding by the
  67. // positive float31 norm.
  68. const FSTValEncodingMask = uint64(0xc000000000000000)
  69. const FSTValEncodingGeneral = uint64(0x0000000000000000)
  70. const FSTValEncoding1Hit = uint64(0x8000000000000000)
  71. func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
  72. return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
  73. }
  74. func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
  75. return (mask31Bits & v), (mask31Bits & (v >> 31))
  76. }
  77. const mask31Bits = uint64(0x000000007fffffff)
  78. func under32Bits(x uint64) bool {
  79. return x <= mask31Bits
  80. }
  81. const DocNum1HitFinished = math.MaxUint64
  82. var NormBits1Hit = uint64(math.Float32bits(float32(1)))
  83. // PostingsList is an in-memory representation of a postings list
  84. type PostingsList struct {
  85. sb *SegmentBase
  86. postingsOffset uint64
  87. freqOffset uint64
  88. locOffset uint64
  89. postings *roaring.Bitmap
  90. except *roaring.Bitmap
  91. // when normBits1Hit != 0, then this postings list came from a
  92. // 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
  93. docNum1Hit uint64
  94. normBits1Hit uint64
  95. }
  96. // represents an immutable, empty postings list
  97. var emptyPostingsList = &PostingsList{}
  98. func (p *PostingsList) Size() int {
  99. sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
  100. if p.except != nil {
  101. sizeInBytes += int(p.except.GetSizeInBytes())
  102. }
  103. return sizeInBytes
  104. }
  105. func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
  106. if p.normBits1Hit != 0 {
  107. receiver.Add(uint32(p.docNum1Hit))
  108. return
  109. }
  110. if p.postings != nil {
  111. receiver.Or(p.postings)
  112. }
  113. }
  114. // Iterator returns an iterator for this postings list
  115. func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
  116. prealloc segment.PostingsIterator) segment.PostingsIterator {
  117. if p.normBits1Hit == 0 && p.postings == nil {
  118. return emptyPostingsIterator
  119. }
  120. var preallocPI *PostingsIterator
  121. pi, ok := prealloc.(*PostingsIterator)
  122. if ok && pi != nil {
  123. preallocPI = pi
  124. }
  125. if preallocPI == emptyPostingsIterator {
  126. preallocPI = nil
  127. }
  128. return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
  129. }
  130. func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
  131. rv *PostingsIterator) *PostingsIterator {
  132. if rv == nil {
  133. rv = &PostingsIterator{}
  134. } else {
  135. freqNormReader := rv.freqNormReader
  136. if freqNormReader != nil {
  137. freqNormReader.reset()
  138. }
  139. locReader := rv.locReader
  140. if locReader != nil {
  141. locReader.reset()
  142. }
  143. nextLocs := rv.nextLocs[:0]
  144. nextSegmentLocs := rv.nextSegmentLocs[:0]
  145. buf := rv.buf
  146. *rv = PostingsIterator{} // clear the struct
  147. rv.freqNormReader = freqNormReader
  148. rv.locReader = locReader
  149. rv.nextLocs = nextLocs
  150. rv.nextSegmentLocs = nextSegmentLocs
  151. rv.buf = buf
  152. }
  153. rv.postings = p
  154. rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
  155. rv.includeLocs = includeLocs
  156. if p.normBits1Hit != 0 {
  157. // "1-hit" encoding
  158. rv.docNum1Hit = p.docNum1Hit
  159. rv.normBits1Hit = p.normBits1Hit
  160. if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
  161. rv.docNum1Hit = DocNum1HitFinished
  162. }
  163. return rv
  164. }
  165. // "general" encoding, check if empty
  166. if p.postings == nil {
  167. return rv
  168. }
  169. // initialize freq chunk reader
  170. if rv.includeFreqNorm {
  171. rv.freqNormReader = newChunkedIntDecoder(p.sb.mem, p.freqOffset)
  172. }
  173. // initialize the loc chunk reader
  174. if rv.includeLocs {
  175. rv.locReader = newChunkedIntDecoder(p.sb.mem, p.locOffset)
  176. }
  177. rv.all = p.postings.Iterator()
  178. if p.except != nil {
  179. rv.ActualBM = roaring.AndNot(p.postings, p.except)
  180. rv.Actual = rv.ActualBM.Iterator()
  181. } else {
  182. rv.ActualBM = p.postings
  183. rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
  184. }
  185. return rv
  186. }
  187. // Count returns the number of items on this postings list
  188. func (p *PostingsList) Count() uint64 {
  189. var n, e uint64
  190. if p.normBits1Hit != 0 {
  191. n = 1
  192. if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
  193. e = 1
  194. }
  195. } else if p.postings != nil {
  196. n = p.postings.GetCardinality()
  197. if p.except != nil {
  198. e = p.postings.AndCardinality(p.except)
  199. }
  200. }
  201. return n - e
  202. }
  203. func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
  204. rv.postingsOffset = postingsOffset
  205. // handle "1-hit" encoding special case
  206. if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
  207. return rv.init1Hit(postingsOffset)
  208. }
  209. // read the location of the freq/norm details
  210. var n uint64
  211. var read int
  212. rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
  213. n += uint64(read)
  214. rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
  215. n += uint64(read)
  216. var postingsLen uint64
  217. postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
  218. n += uint64(read)
  219. roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
  220. if rv.postings == nil {
  221. rv.postings = roaring.NewBitmap()
  222. }
  223. _, err := rv.postings.FromBuffer(roaringBytes)
  224. if err != nil {
  225. return fmt.Errorf("error loading roaring bitmap: %v", err)
  226. }
  227. return nil
  228. }
  229. func (rv *PostingsList) init1Hit(fstVal uint64) error {
  230. docNum, normBits := FSTValDecode1Hit(fstVal)
  231. rv.docNum1Hit = docNum
  232. rv.normBits1Hit = normBits
  233. return nil
  234. }
  235. // PostingsIterator provides a way to iterate through the postings list
  236. type PostingsIterator struct {
  237. postings *PostingsList
  238. all roaring.IntPeekable
  239. Actual roaring.IntPeekable
  240. ActualBM *roaring.Bitmap
  241. currChunk uint32
  242. freqNormReader *chunkedIntDecoder
  243. locReader *chunkedIntDecoder
  244. next Posting // reused across Next() calls
  245. nextLocs []Location // reused across Next() calls
  246. nextSegmentLocs []segment.Location // reused across Next() calls
  247. docNum1Hit uint64
  248. normBits1Hit uint64
  249. buf []byte
  250. includeFreqNorm bool
  251. includeLocs bool
  252. }
  253. var emptyPostingsIterator = &PostingsIterator{}
  254. func (i *PostingsIterator) Size() int {
  255. sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
  256. i.next.Size()
  257. // account for freqNormReader, locReader if we start using this.
  258. for _, entry := range i.nextLocs {
  259. sizeInBytes += entry.Size()
  260. }
  261. return sizeInBytes
  262. }
  263. func (i *PostingsIterator) loadChunk(chunk int) error {
  264. if i.includeFreqNorm {
  265. err := i.freqNormReader.loadChunk(chunk)
  266. if err != nil {
  267. return err
  268. }
  269. }
  270. if i.includeLocs {
  271. err := i.locReader.loadChunk(chunk)
  272. if err != nil {
  273. return err
  274. }
  275. }
  276. i.currChunk = uint32(chunk)
  277. return nil
  278. }
  279. func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
  280. if i.normBits1Hit != 0 {
  281. return 1, i.normBits1Hit, false, nil
  282. }
  283. freqHasLocs, err := i.freqNormReader.readUvarint()
  284. if err != nil {
  285. return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
  286. }
  287. freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
  288. normBits, err := i.freqNormReader.readUvarint()
  289. if err != nil {
  290. return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
  291. }
  292. return freq, normBits, hasLocs, nil
  293. }
  294. func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
  295. if i.normBits1Hit != 0 {
  296. return false, nil
  297. }
  298. freqHasLocs, err := i.freqNormReader.readUvarint()
  299. if err != nil {
  300. return false, fmt.Errorf("error reading freqHasLocs: %v", err)
  301. }
  302. i.freqNormReader.SkipUvarint() // Skip normBits.
  303. return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
  304. }
  305. func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
  306. rv := freq << 1
  307. if hasLocs {
  308. rv = rv | 0x01 // 0'th LSB encodes whether there are locations
  309. }
  310. return rv
  311. }
  312. func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
  313. freq := freqHasLocs >> 1
  314. hasLocs := freqHasLocs&0x01 != 0
  315. return freq, hasLocs
  316. }
  317. // readLocation processes all the integers on the stream representing a single
  318. // location.
  319. func (i *PostingsIterator) readLocation(l *Location) error {
  320. // read off field
  321. fieldID, err := i.locReader.readUvarint()
  322. if err != nil {
  323. return fmt.Errorf("error reading location field: %v", err)
  324. }
  325. // read off pos
  326. pos, err := i.locReader.readUvarint()
  327. if err != nil {
  328. return fmt.Errorf("error reading location pos: %v", err)
  329. }
  330. // read off start
  331. start, err := i.locReader.readUvarint()
  332. if err != nil {
  333. return fmt.Errorf("error reading location start: %v", err)
  334. }
  335. // read off end
  336. end, err := i.locReader.readUvarint()
  337. if err != nil {
  338. return fmt.Errorf("error reading location end: %v", err)
  339. }
  340. // read off num array pos
  341. numArrayPos, err := i.locReader.readUvarint()
  342. if err != nil {
  343. return fmt.Errorf("error reading location num array pos: %v", err)
  344. }
  345. l.field = i.postings.sb.fieldsInv[fieldID]
  346. l.pos = pos
  347. l.start = start
  348. l.end = end
  349. if cap(l.ap) < int(numArrayPos) {
  350. l.ap = make([]uint64, int(numArrayPos))
  351. } else {
  352. l.ap = l.ap[:int(numArrayPos)]
  353. }
  354. // read off array positions
  355. for k := 0; k < int(numArrayPos); k++ {
  356. ap, err := i.locReader.readUvarint()
  357. if err != nil {
  358. return fmt.Errorf("error reading array position: %v", err)
  359. }
  360. l.ap[k] = ap
  361. }
  362. return nil
  363. }
  364. // Next returns the next posting on the postings list, or nil at the end
  365. func (i *PostingsIterator) Next() (segment.Posting, error) {
  366. return i.nextAtOrAfter(0)
  367. }
  368. // Advance returns the posting at the specified docNum or it is not present
  369. // the next posting, or if the end is reached, nil
  370. func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
  371. return i.nextAtOrAfter(docNum)
  372. }
  373. // Next returns the next posting on the postings list, or nil at the end
  374. func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
  375. docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
  376. if err != nil || !exists {
  377. return nil, err
  378. }
  379. i.next = Posting{} // clear the struct
  380. rv := &i.next
  381. rv.docNum = docNum
  382. if !i.includeFreqNorm {
  383. return rv, nil
  384. }
  385. var normBits uint64
  386. var hasLocs bool
  387. rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
  388. if err != nil {
  389. return nil, err
  390. }
  391. rv.norm = math.Float32frombits(uint32(normBits))
  392. if i.includeLocs && hasLocs {
  393. // prepare locations into reused slices, where we assume
  394. // rv.freq >= "number of locs", since in a composite field,
  395. // some component fields might have their IncludeTermVector
  396. // flags disabled while other component fields are enabled
  397. if cap(i.nextLocs) >= int(rv.freq) {
  398. i.nextLocs = i.nextLocs[0:rv.freq]
  399. } else {
  400. i.nextLocs = make([]Location, rv.freq, rv.freq*2)
  401. }
  402. if cap(i.nextSegmentLocs) < int(rv.freq) {
  403. i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
  404. }
  405. rv.locs = i.nextSegmentLocs[:0]
  406. numLocsBytes, err := i.locReader.readUvarint()
  407. if err != nil {
  408. return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
  409. }
  410. j := 0
  411. startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
  412. for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
  413. err := i.readLocation(&i.nextLocs[j])
  414. if err != nil {
  415. return nil, err
  416. }
  417. rv.locs = append(rv.locs, &i.nextLocs[j])
  418. j++
  419. }
  420. }
  421. return rv, nil
  422. }
  423. // nextDocNum returns the next docNum on the postings list, and also
  424. // sets up the currChunk / loc related fields of the iterator.
  425. func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
  426. if i.normBits1Hit != 0 {
  427. if i.docNum1Hit == DocNum1HitFinished {
  428. return 0, false, nil
  429. }
  430. if i.docNum1Hit < atOrAfter {
  431. // advanced past our 1-hit
  432. i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
  433. return 0, false, nil
  434. }
  435. docNum := i.docNum1Hit
  436. i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
  437. return docNum, true, nil
  438. }
  439. if i.Actual == nil || !i.Actual.HasNext() {
  440. return 0, false, nil
  441. }
  442. if i.postings == nil || i.postings.postings == i.ActualBM {
  443. return i.nextDocNumAtOrAfterClean(atOrAfter)
  444. }
  445. i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
  446. if !i.Actual.HasNext() {
  447. // couldn't find anything
  448. return 0, false, nil
  449. }
  450. n := i.Actual.Next()
  451. allN := i.all.Next()
  452. chunkSize, err := getChunkSize(i.postings.sb.chunkMode, i.postings.postings.GetCardinality(), i.postings.sb.numDocs)
  453. if err != nil {
  454. return 0, false, err
  455. }
  456. nChunk := n / uint32(chunkSize)
  457. // when allN becomes >= to here, then allN is in the same chunk as nChunk.
  458. allNReachesNChunk := nChunk * uint32(chunkSize)
  459. // n is the next actual hit (excluding some postings), and
  460. // allN is the next hit in the full postings, and
  461. // if they don't match, move 'all' forwards until they do
  462. for allN != n {
  463. // we've reached same chunk, so move the freq/norm/loc decoders forward
  464. if i.includeFreqNorm && allN >= allNReachesNChunk {
  465. err := i.currChunkNext(nChunk)
  466. if err != nil {
  467. return 0, false, err
  468. }
  469. }
  470. allN = i.all.Next()
  471. }
  472. if i.includeFreqNorm && (i.currChunk != nChunk || i.freqNormReader.isNil()) {
  473. err := i.loadChunk(int(nChunk))
  474. if err != nil {
  475. return 0, false, fmt.Errorf("error loading chunk: %v", err)
  476. }
  477. }
  478. return uint64(n), true, nil
  479. }
  480. // optimization when the postings list is "clean" (e.g., no updates &
  481. // no deletions) where the all bitmap is the same as the actual bitmap
  482. func (i *PostingsIterator) nextDocNumAtOrAfterClean(
  483. atOrAfter uint64) (uint64, bool, error) {
  484. if !i.includeFreqNorm {
  485. i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
  486. if !i.Actual.HasNext() {
  487. return 0, false, nil // couldn't find anything
  488. }
  489. return uint64(i.Actual.Next()), true, nil
  490. }
  491. chunkSize, err := getChunkSize(i.postings.sb.chunkMode, i.postings.postings.GetCardinality(), i.postings.sb.numDocs)
  492. if err != nil {
  493. return 0, false, err
  494. }
  495. // freq-norm's needed, so maintain freq-norm chunk reader
  496. sameChunkNexts := 0 // # of times we called Next() in the same chunk
  497. n := i.Actual.Next()
  498. nChunk := n / uint32(chunkSize)
  499. for uint64(n) < atOrAfter && i.Actual.HasNext() {
  500. n = i.Actual.Next()
  501. nChunkPrev := nChunk
  502. nChunk = n / uint32(chunkSize)
  503. if nChunk != nChunkPrev {
  504. sameChunkNexts = 0
  505. } else {
  506. sameChunkNexts += 1
  507. }
  508. }
  509. if uint64(n) < atOrAfter {
  510. // couldn't find anything
  511. return 0, false, nil
  512. }
  513. for j := 0; j < sameChunkNexts; j++ {
  514. err := i.currChunkNext(nChunk)
  515. if err != nil {
  516. return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
  517. }
  518. }
  519. if i.currChunk != nChunk || i.freqNormReader.isNil() {
  520. err := i.loadChunk(int(nChunk))
  521. if err != nil {
  522. return 0, false, fmt.Errorf("error loading chunk: %v", err)
  523. }
  524. }
  525. return uint64(n), true, nil
  526. }
  527. func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
  528. if i.currChunk != nChunk || i.freqNormReader.isNil() {
  529. err := i.loadChunk(int(nChunk))
  530. if err != nil {
  531. return fmt.Errorf("error loading chunk: %v", err)
  532. }
  533. }
  534. // read off freq/offsets even though we don't care about them
  535. hasLocs, err := i.skipFreqNormReadHasLocs()
  536. if err != nil {
  537. return err
  538. }
  539. if i.includeLocs && hasLocs {
  540. numLocsBytes, err := i.locReader.readUvarint()
  541. if err != nil {
  542. return fmt.Errorf("error reading location numLocsBytes: %v", err)
  543. }
  544. // skip over all the location bytes
  545. i.locReader.SkipBytes(int(numLocsBytes))
  546. }
  547. return nil
  548. }
  549. // DocNum1Hit returns the docNum and true if this is "1-hit" optimized
  550. // and the docNum is available.
  551. func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
  552. if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
  553. return p.docNum1Hit, true
  554. }
  555. return 0, false
  556. }
  557. // ActualBitmap returns the underlying actual bitmap
  558. // which can be used up the stack for optimizations
  559. func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap {
  560. return p.ActualBM
  561. }
  562. // ReplaceActual replaces the ActualBM with the provided
  563. // bitmap
  564. func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap) {
  565. p.ActualBM = abm
  566. p.Actual = abm.Iterator()
  567. }
  568. // PostingsIteratorFromBitmap constructs a PostingsIterator given an
  569. // "actual" bitmap.
  570. func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
  571. includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
  572. return &PostingsIterator{
  573. ActualBM: bm,
  574. Actual: bm.Iterator(),
  575. includeFreqNorm: includeFreqNorm,
  576. includeLocs: includeLocs,
  577. }, nil
  578. }
  579. // PostingsIteratorFrom1Hit constructs a PostingsIterator given a
  580. // 1-hit docNum.
  581. func PostingsIteratorFrom1Hit(docNum1Hit uint64,
  582. includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
  583. return &PostingsIterator{
  584. docNum1Hit: docNum1Hit,
  585. normBits1Hit: NormBits1Hit,
  586. includeFreqNorm: includeFreqNorm,
  587. includeLocs: includeLocs,
  588. }, nil
  589. }
  590. // Posting is a single entry in a postings list
  591. type Posting struct {
  592. docNum uint64
  593. freq uint64
  594. norm float32
  595. locs []segment.Location
  596. }
  597. func (p *Posting) Size() int {
  598. sizeInBytes := reflectStaticSizePosting
  599. for _, entry := range p.locs {
  600. sizeInBytes += entry.Size()
  601. }
  602. return sizeInBytes
  603. }
  604. // Number returns the document number of this posting in this segment
  605. func (p *Posting) Number() uint64 {
  606. return p.docNum
  607. }
  608. // Frequency returns the frequencies of occurrence of this term in this doc/field
  609. func (p *Posting) Frequency() uint64 {
  610. return p.freq
  611. }
  612. // Norm returns the normalization factor for this posting
  613. func (p *Posting) Norm() float64 {
  614. return float64(p.norm)
  615. }
  616. // Locations returns the location information for each occurrence
  617. func (p *Posting) Locations() []segment.Location {
  618. return p.locs
  619. }
  620. // Location represents the location of a single occurrence
  621. type Location struct {
  622. field string
  623. pos uint64
  624. start uint64
  625. end uint64
  626. ap []uint64
  627. }
  628. func (l *Location) Size() int {
  629. return reflectStaticSizeLocation +
  630. len(l.field) +
  631. len(l.ap)*size.SizeOfUint64
  632. }
  633. // Field returns the name of the field (useful in composite fields to know
  634. // which original field the value came from)
  635. func (l *Location) Field() string {
  636. return l.field
  637. }
  638. // Start returns the start byte offset of this occurrence
  639. func (l *Location) Start() uint64 {
  640. return l.start
  641. }
  642. // End returns the end byte offset of this occurrence
  643. func (l *Location) End() uint64 {
  644. return l.end
  645. }
  646. // Pos returns the 1-based phrase position of this occurrence
  647. func (l *Location) Pos() uint64 {
  648. return l.pos
  649. }
  650. // ArrayPositions returns the array position vector associated with this occurrence
  651. func (l *Location) ArrayPositions() []uint64 {
  652. return l.ap
  653. }