123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908 |
- // Copyright (c) 2017 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package zap
-
- import (
- "encoding/binary"
- "fmt"
- "math"
- "reflect"
-
- "github.com/RoaringBitmap/roaring"
- segment "github.com/blevesearch/scorch_segment_api/v2"
- )
-
- var reflectStaticSizePostingsList int
- var reflectStaticSizePostingsIterator int
- var reflectStaticSizePosting int
- var reflectStaticSizeLocation int
-
- func init() {
- var pl PostingsList
- reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
- var pi PostingsIterator
- reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
- var p Posting
- reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
- var l Location
- reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
- }
-
- // FST or vellum value (uint64) encoding is determined by the top two
- // highest-order or most significant bits...
- //
- // encoding : MSB
- // name : 63 62 61...to...bit #0 (LSB)
- // ----------+---+---+---------------------------------------------------
- // general : 0 | 0 | 62-bits of postingsOffset.
- // ~ : 0 | 1 | reserved for future.
- // 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
- // ~ : 1 | 1 | reserved for future.
- //
- // Encoding "general" is able to handle all cases, where the
- // postingsOffset points to more information about the postings for
- // the term.
- //
- // Encoding "1-hit" is used to optimize a commonly seen case when a
- // term has only a single hit. For example, a term in the _id field
- // will have only 1 hit. The "1-hit" encoding is used for a term
- // in a field when...
- //
- // - term vector info is disabled for that field;
- // - and, the term appears in only a single doc for that field;
- // - and, the term's freq is exactly 1 in that single doc for that field;
- // - and, the docNum must fit into 31-bits;
- //
- // Otherwise, the "general" encoding is used instead.
- //
- // In the "1-hit" encoding, the field in that single doc may have
- // other terms, which is supported in the "1-hit" encoding by the
- // positive float31 norm.
-
- const FSTValEncodingMask = uint64(0xc000000000000000)
- const FSTValEncodingGeneral = uint64(0x0000000000000000)
- const FSTValEncoding1Hit = uint64(0x8000000000000000)
-
- func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
- return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
- }
-
- func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
- return (mask31Bits & v), (mask31Bits & (v >> 31))
- }
-
- const mask31Bits = uint64(0x000000007fffffff)
-
- func under32Bits(x uint64) bool {
- return x <= mask31Bits
- }
-
- const DocNum1HitFinished = math.MaxUint64
-
- var NormBits1Hit = uint64(math.Float32bits(float32(1)))
-
- // PostingsList is an in-memory representation of a postings list
- type PostingsList struct {
- sb *SegmentBase
- postingsOffset uint64
- freqOffset uint64
- locOffset uint64
- postings *roaring.Bitmap
- except *roaring.Bitmap
-
- // when normBits1Hit != 0, then this postings list came from a
- // 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
- docNum1Hit uint64
- normBits1Hit uint64
- }
-
- // represents an immutable, empty postings list
- var emptyPostingsList = &PostingsList{}
-
- func (p *PostingsList) Size() int {
- sizeInBytes := reflectStaticSizePostingsList + SizeOfPtr
-
- if p.except != nil {
- sizeInBytes += int(p.except.GetSizeInBytes())
- }
-
- return sizeInBytes
- }
-
- func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
- if p.normBits1Hit != 0 {
- receiver.Add(uint32(p.docNum1Hit))
- return
- }
-
- if p.postings != nil {
- receiver.Or(p.postings)
- }
- }
-
- // Iterator returns an iterator for this postings list
- func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
- prealloc segment.PostingsIterator) segment.PostingsIterator {
- if p.normBits1Hit == 0 && p.postings == nil {
- return emptyPostingsIterator
- }
-
- var preallocPI *PostingsIterator
- pi, ok := prealloc.(*PostingsIterator)
- if ok && pi != nil {
- preallocPI = pi
- }
- if preallocPI == emptyPostingsIterator {
- preallocPI = nil
- }
-
- return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
- }
-
- func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
- rv *PostingsIterator) *PostingsIterator {
- if rv == nil {
- rv = &PostingsIterator{}
- } else {
- freqNormReader := rv.freqNormReader
- if freqNormReader != nil {
- freqNormReader.Reset([]byte(nil))
- }
-
- locReader := rv.locReader
- if locReader != nil {
- locReader.Reset([]byte(nil))
- }
-
- freqChunkOffsets := rv.freqChunkOffsets[:0]
- locChunkOffsets := rv.locChunkOffsets[:0]
-
- nextLocs := rv.nextLocs[:0]
- nextSegmentLocs := rv.nextSegmentLocs[:0]
-
- buf := rv.buf
-
- *rv = PostingsIterator{} // clear the struct
-
- rv.freqNormReader = freqNormReader
- rv.locReader = locReader
-
- rv.freqChunkOffsets = freqChunkOffsets
- rv.locChunkOffsets = locChunkOffsets
-
- rv.nextLocs = nextLocs
- rv.nextSegmentLocs = nextSegmentLocs
-
- rv.buf = buf
- }
-
- rv.postings = p
- rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
- rv.includeLocs = includeLocs
-
- if p.normBits1Hit != 0 {
- // "1-hit" encoding
- rv.docNum1Hit = p.docNum1Hit
- rv.normBits1Hit = p.normBits1Hit
-
- if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
- rv.docNum1Hit = DocNum1HitFinished
- }
-
- return rv
- }
-
- // "general" encoding, check if empty
- if p.postings == nil {
- return rv
- }
-
- var n uint64
- var read int
-
- // prepare the freq chunk details
- if rv.includeFreqNorm {
- var numFreqChunks uint64
- numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
- rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
- } else {
- rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
- }
- for i := 0; i < int(numFreqChunks); i++ {
- rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- }
- rv.freqChunkStart = p.freqOffset + n
- }
-
- // prepare the loc chunk details
- if rv.includeLocs {
- n = 0
- var numLocChunks uint64
- numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- if cap(rv.locChunkOffsets) >= int(numLocChunks) {
- rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
- } else {
- rv.locChunkOffsets = make([]uint64, int(numLocChunks))
- }
- for i := 0; i < int(numLocChunks); i++ {
- rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- }
- rv.locChunkStart = p.locOffset + n
- }
-
- rv.all = p.postings.Iterator()
- if p.except != nil {
- rv.ActualBM = roaring.AndNot(p.postings, p.except)
- rv.Actual = rv.ActualBM.Iterator()
- } else {
- rv.ActualBM = p.postings
- rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
- }
-
- return rv
- }
-
- // Count returns the number of items on this postings list
- func (p *PostingsList) Count() uint64 {
- var n, e uint64
- if p.normBits1Hit != 0 {
- n = 1
- if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
- e = 1
- }
- } else if p.postings != nil {
- n = p.postings.GetCardinality()
- if p.except != nil {
- e = p.postings.AndCardinality(p.except)
- }
- }
- return n - e
- }
-
- func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
- rv.postingsOffset = postingsOffset
-
- // handle "1-hit" encoding special case
- if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
- return rv.init1Hit(postingsOffset)
- }
-
- // read the location of the freq/norm details
- var n uint64
- var read int
-
- rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
- n += uint64(read)
-
- rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- var postingsLen uint64
- postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
-
- if rv.postings == nil {
- rv.postings = roaring.NewBitmap()
- }
- _, err := rv.postings.FromBuffer(roaringBytes)
- if err != nil {
- return fmt.Errorf("error loading roaring bitmap: %v", err)
- }
-
- return nil
- }
-
- func (rv *PostingsList) init1Hit(fstVal uint64) error {
- docNum, normBits := FSTValDecode1Hit(fstVal)
-
- rv.docNum1Hit = docNum
- rv.normBits1Hit = normBits
-
- return nil
- }
-
- // PostingsIterator provides a way to iterate through the postings list
- type PostingsIterator struct {
- postings *PostingsList
- all roaring.IntPeekable
- Actual roaring.IntPeekable
- ActualBM *roaring.Bitmap
-
- currChunk uint32
- currChunkFreqNorm []byte
- currChunkLoc []byte
-
- freqNormReader *memUvarintReader
- locReader *memUvarintReader
-
- freqChunkOffsets []uint64
- freqChunkStart uint64
-
- locChunkOffsets []uint64
- locChunkStart uint64
-
- next Posting // reused across Next() calls
- nextLocs []Location // reused across Next() calls
- nextSegmentLocs []segment.Location // reused across Next() calls
-
- docNum1Hit uint64
- normBits1Hit uint64
-
- buf []byte
-
- includeFreqNorm bool
- includeLocs bool
- }
-
- var emptyPostingsIterator = &PostingsIterator{}
-
- func (i *PostingsIterator) Size() int {
- sizeInBytes := reflectStaticSizePostingsIterator + SizeOfPtr +
- len(i.currChunkFreqNorm) +
- len(i.currChunkLoc) +
- len(i.freqChunkOffsets)*SizeOfUint64 +
- len(i.locChunkOffsets)*SizeOfUint64 +
- i.next.Size()
-
- for _, entry := range i.nextLocs {
- sizeInBytes += entry.Size()
- }
-
- return sizeInBytes
- }
-
- func (i *PostingsIterator) loadChunk(chunk int) error {
- if i.includeFreqNorm {
- if chunk >= len(i.freqChunkOffsets) {
- return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
- chunk, len(i.freqChunkOffsets))
- }
-
- end, start := i.freqChunkStart, i.freqChunkStart
- s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
- start += s
- end += e
- i.currChunkFreqNorm = i.postings.sb.mem[start:end]
- if i.freqNormReader == nil {
- i.freqNormReader = newMemUvarintReader(i.currChunkFreqNorm)
- } else {
- i.freqNormReader.Reset(i.currChunkFreqNorm)
- }
- }
-
- if i.includeLocs {
- if chunk >= len(i.locChunkOffsets) {
- return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
- chunk, len(i.locChunkOffsets))
- }
-
- end, start := i.locChunkStart, i.locChunkStart
- s, e := readChunkBoundary(chunk, i.locChunkOffsets)
- start += s
- end += e
- i.currChunkLoc = i.postings.sb.mem[start:end]
- if i.locReader == nil {
- i.locReader = newMemUvarintReader(i.currChunkLoc)
- } else {
- i.locReader.Reset(i.currChunkLoc)
- }
- }
-
- i.currChunk = uint32(chunk)
- return nil
- }
-
- func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
- if i.normBits1Hit != 0 {
- return 1, i.normBits1Hit, false, nil
- }
-
- freqHasLocs, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
- }
-
- freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
-
- normBits, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
- }
-
- return freq, normBits, hasLocs, nil
- }
-
- func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
- if i.normBits1Hit != 0 {
- return false, nil
- }
-
- freqHasLocs, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return false, fmt.Errorf("error reading freqHasLocs: %v", err)
- }
-
- i.freqNormReader.SkipUvarint() // Skip normBits.
-
- return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
- }
-
- func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
- rv := freq << 1
- if hasLocs {
- rv = rv | 0x01 // 0'th LSB encodes whether there are locations
- }
- return rv
- }
-
- func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
- freq := freqHasLocs >> 1
- hasLocs := freqHasLocs&0x01 != 0
- return freq, hasLocs
- }
-
- // readLocation processes all the integers on the stream representing a single
- // location.
- func (i *PostingsIterator) readLocation(l *Location) error {
- // read off field
- fieldID, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location field: %v", err)
- }
- // read off pos
- pos, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location pos: %v", err)
- }
- // read off start
- start, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location start: %v", err)
- }
- // read off end
- end, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location end: %v", err)
- }
- // read off num array pos
- numArrayPos, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location num array pos: %v", err)
- }
-
- l.field = i.postings.sb.fieldsInv[fieldID]
- l.pos = pos
- l.start = start
- l.end = end
-
- if cap(l.ap) < int(numArrayPos) {
- l.ap = make([]uint64, int(numArrayPos))
- } else {
- l.ap = l.ap[:int(numArrayPos)]
- }
-
- // read off array positions
- for k := 0; k < int(numArrayPos); k++ {
- ap, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading array position: %v", err)
- }
-
- l.ap[k] = ap
- }
-
- return nil
- }
-
- // Next returns the next posting on the postings list, or nil at the end
- func (i *PostingsIterator) Next() (segment.Posting, error) {
- return i.nextAtOrAfter(0)
- }
-
- // Advance returns the posting at the specified docNum or it is not present
- // the next posting, or if the end is reached, nil
- func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
- return i.nextAtOrAfter(docNum)
- }
-
- // Next returns the next posting on the postings list, or nil at the end
- func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
- docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
- if err != nil || !exists {
- return nil, err
- }
-
- i.next = Posting{} // clear the struct
- rv := &i.next
- rv.docNum = docNum
-
- if !i.includeFreqNorm {
- return rv, nil
- }
-
- var normBits uint64
- var hasLocs bool
-
- rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
- if err != nil {
- return nil, err
- }
-
- rv.norm = math.Float32frombits(uint32(normBits))
-
- if i.includeLocs && hasLocs {
- // prepare locations into reused slices, where we assume
- // rv.freq >= "number of locs", since in a composite field,
- // some component fields might have their IncludeTermVector
- // flags disabled while other component fields are enabled
- if cap(i.nextLocs) >= int(rv.freq) {
- i.nextLocs = i.nextLocs[0:rv.freq]
- } else {
- i.nextLocs = make([]Location, rv.freq, rv.freq*2)
- }
- if cap(i.nextSegmentLocs) < int(rv.freq) {
- i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
- }
- rv.locs = i.nextSegmentLocs[:0]
-
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
- }
-
- j := 0
- startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
- for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
- err := i.readLocation(&i.nextLocs[j])
- if err != nil {
- return nil, err
- }
- rv.locs = append(rv.locs, &i.nextLocs[j])
- j++
- }
- }
-
- return rv, nil
- }
-
- var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
-
- // nextBytes returns the docNum and the encoded freq & loc bytes for
- // the next posting
- func (i *PostingsIterator) nextBytes() (
- docNumOut uint64, freq uint64, normBits uint64,
- bytesFreqNorm []byte, bytesLoc []byte, err error) {
- docNum, exists, err := i.nextDocNumAtOrAfter(0)
- if err != nil || !exists {
- return 0, 0, 0, nil, nil, err
- }
-
- if i.normBits1Hit != 0 {
- if i.buf == nil {
- i.buf = make([]byte, binary.MaxVarintLen64*2)
- }
- n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
- n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
- return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
- }
-
- startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
-
- var hasLocs bool
-
- freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
- if err != nil {
- return 0, 0, 0, nil, nil, err
- }
-
- endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
- bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
-
- if hasLocs {
- startLoc := len(i.currChunkLoc) - i.locReader.Len()
-
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return 0, 0, 0, nil, nil,
- fmt.Errorf("error reading location nextBytes numLocs: %v", err)
- }
-
- // skip over all the location bytes
- i.locReader.SkipBytes(int(numLocsBytes))
-
- endLoc := len(i.currChunkLoc) - i.locReader.Len()
- bytesLoc = i.currChunkLoc[startLoc:endLoc]
- }
-
- return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
- }
-
- // nextDocNum returns the next docNum on the postings list, and also
- // sets up the currChunk / loc related fields of the iterator.
- func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
- if i.normBits1Hit != 0 {
- if i.docNum1Hit == DocNum1HitFinished {
- return 0, false, nil
- }
- if i.docNum1Hit < atOrAfter {
- // advanced past our 1-hit
- i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
- return 0, false, nil
- }
- docNum := i.docNum1Hit
- i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
- return docNum, true, nil
- }
-
- if i.Actual == nil || !i.Actual.HasNext() {
- return 0, false, nil
- }
-
- if i.postings == nil || i.postings.postings == i.ActualBM {
- return i.nextDocNumAtOrAfterClean(atOrAfter)
- }
-
- i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
- if !i.Actual.HasNext() {
- // couldn't find anything
- return 0, false, nil
- }
-
- n := i.Actual.Next()
- allN := i.all.Next()
-
- nChunk := n / i.postings.sb.chunkFactor
-
- // when allN becomes >= to here, then allN is in the same chunk as nChunk.
- allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
-
- // n is the next actual hit (excluding some postings), and
- // allN is the next hit in the full postings, and
- // if they don't match, move 'all' forwards until they do
- for allN != n {
- // we've reached same chunk, so move the freq/norm/loc decoders forward
- if i.includeFreqNorm && allN >= allNReachesNChunk {
- err := i.currChunkNext(nChunk)
- if err != nil {
- return 0, false, err
- }
- }
-
- allN = i.all.Next()
- }
-
- if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return 0, false, fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- return uint64(n), true, nil
- }
-
- // optimization when the postings list is "clean" (e.g., no updates &
- // no deletions) where the all bitmap is the same as the actual bitmap
- func (i *PostingsIterator) nextDocNumAtOrAfterClean(
- atOrAfter uint64) (uint64, bool, error) {
- if !i.includeFreqNorm {
- i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
- if !i.Actual.HasNext() {
- return 0, false, nil // couldn't find anything
- }
-
- return uint64(i.Actual.Next()), true, nil
- }
-
- // freq-norm's needed, so maintain freq-norm chunk reader
- sameChunkNexts := 0 // # of times we called Next() in the same chunk
- n := i.Actual.Next()
- nChunk := n / i.postings.sb.chunkFactor
-
- for uint64(n) < atOrAfter && i.Actual.HasNext() {
- n = i.Actual.Next()
-
- nChunkPrev := nChunk
- nChunk = n / i.postings.sb.chunkFactor
-
- if nChunk != nChunkPrev {
- sameChunkNexts = 0
- } else {
- sameChunkNexts += 1
- }
- }
-
- if uint64(n) < atOrAfter {
- // couldn't find anything
- return 0, false, nil
- }
-
- for j := 0; j < sameChunkNexts; j++ {
- err := i.currChunkNext(nChunk)
- if err != nil {
- return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
- }
- }
-
- if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return 0, false, fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- return uint64(n), true, nil
- }
-
- func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
- if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- // read off freq/offsets even though we don't care about them
- hasLocs, err := i.skipFreqNormReadHasLocs()
- if err != nil {
- return err
- }
-
- if i.includeLocs && hasLocs {
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location numLocsBytes: %v", err)
- }
-
- // skip over all the location bytes
- i.locReader.SkipBytes(int(numLocsBytes))
- }
-
- return nil
- }
-
- // DocNum1Hit returns the docNum and true if this is "1-hit" optimized
- // and the docNum is available.
- func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
- if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
- return p.docNum1Hit, true
- }
- return 0, false
- }
-
- // ActualBitmap returns the underlying actual bitmap
- // which can be used up the stack for optimizations
- func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap {
- return p.ActualBM
- }
-
- // ReplaceActual replaces the ActualBM with the provided
- // bitmap
- func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap) {
- p.ActualBM = abm
- p.Actual = abm.Iterator()
- }
-
- // PostingsIteratorFromBitmap constructs a PostingsIterator given an
- // "actual" bitmap.
- func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
- includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
- return &PostingsIterator{
- ActualBM: bm,
- Actual: bm.Iterator(),
- includeFreqNorm: includeFreqNorm,
- includeLocs: includeLocs,
- }, nil
- }
-
- // PostingsIteratorFrom1Hit constructs a PostingsIterator given a
- // 1-hit docNum.
- func PostingsIteratorFrom1Hit(docNum1Hit uint64,
- includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
- return &PostingsIterator{
- docNum1Hit: docNum1Hit,
- normBits1Hit: NormBits1Hit,
- includeFreqNorm: includeFreqNorm,
- includeLocs: includeLocs,
- }, nil
- }
-
- // Posting is a single entry in a postings list
- type Posting struct {
- docNum uint64
- freq uint64
- norm float32
- locs []segment.Location
- }
-
- func (p *Posting) Size() int {
- sizeInBytes := reflectStaticSizePosting
-
- for _, entry := range p.locs {
- sizeInBytes += entry.Size()
- }
-
- return sizeInBytes
- }
-
- // Number returns the document number of this posting in this segment
- func (p *Posting) Number() uint64 {
- return p.docNum
- }
-
- // Frequency returns the frequencies of occurrence of this term in this doc/field
- func (p *Posting) Frequency() uint64 {
- return p.freq
- }
-
- // Norm returns the normalization factor for this posting
- func (p *Posting) Norm() float64 {
- return float64(p.norm)
- }
-
- // Locations returns the location information for each occurrence
- func (p *Posting) Locations() []segment.Location {
- return p.locs
- }
-
- // Location represents the location of a single occurrence
- type Location struct {
- field string
- pos uint64
- start uint64
- end uint64
- ap []uint64
- }
-
- func (l *Location) Size() int {
- return reflectStaticSizeLocation +
- len(l.field) +
- len(l.ap)*SizeOfUint64
- }
-
- // Field returns the name of the field (useful in composite fields to know
- // which original field the value came from)
- func (l *Location) Field() string {
- return l.field
- }
-
- // Start returns the start byte offset of this occurrence
- func (l *Location) Start() uint64 {
- return l.start
- }
-
- // End returns the end byte offset of this occurrence
- func (l *Location) End() uint64 {
- return l.end
- }
-
- // Pos returns the 1-based phrase position of this occurrence
- func (l *Location) Pos() uint64 {
- return l.pos
- }
-
- // ArrayPositions returns the array position vector associated with this occurrence
- func (l *Location) ArrayPositions() []uint64 {
- return l.ap
- }
|