123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- // Copyright (c) 2017 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package scorch
-
- import (
- "bytes"
- "fmt"
- "reflect"
- "sync/atomic"
-
- "github.com/blevesearch/bleve/v2/size"
- index "github.com/blevesearch/bleve_index_api"
- segment "github.com/blevesearch/scorch_segment_api/v2"
- )
-
- var reflectStaticSizeIndexSnapshotTermFieldReader int
-
- func init() {
- var istfr IndexSnapshotTermFieldReader
- reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
- }
-
- type IndexSnapshotTermFieldReader struct {
- term []byte
- field string
- snapshot *IndexSnapshot
- dicts []segment.TermDictionary
- postings []segment.PostingsList
- iterators []segment.PostingsIterator
- segmentOffset int
- includeFreq bool
- includeNorm bool
- includeTermVectors bool
- currPosting segment.Posting
- currID index.IndexInternalID
- recycle bool
- }
-
- func (i *IndexSnapshotTermFieldReader) Size() int {
- sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
- len(i.term) +
- len(i.field) +
- len(i.currID)
-
- for _, entry := range i.postings {
- sizeInBytes += entry.Size()
- }
-
- for _, entry := range i.iterators {
- sizeInBytes += entry.Size()
- }
-
- if i.currPosting != nil {
- sizeInBytes += i.currPosting.Size()
- }
-
- return sizeInBytes
- }
-
- func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
- rv := preAlloced
- if rv == nil {
- rv = &index.TermFieldDoc{}
- }
- // find the next hit
- for i.segmentOffset < len(i.iterators) {
- next, err := i.iterators[i.segmentOffset].Next()
- if err != nil {
- return nil, err
- }
- if next != nil {
- // make segment number into global number by adding offset
- globalOffset := i.snapshot.offsets[i.segmentOffset]
- nnum := next.Number()
- rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
- i.postingToTermFieldDoc(next, rv)
-
- i.currID = rv.ID
- i.currPosting = next
- return rv, nil
- }
- i.segmentOffset++
- }
- return nil, nil
- }
-
- func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
- if i.includeFreq {
- rv.Freq = next.Frequency()
- }
- if i.includeNorm {
- rv.Norm = next.Norm()
- }
- if i.includeTermVectors {
- locs := next.Locations()
- if cap(rv.Vectors) < len(locs) {
- rv.Vectors = make([]*index.TermFieldVector, len(locs))
- backing := make([]index.TermFieldVector, len(locs))
- for i := range backing {
- rv.Vectors[i] = &backing[i]
- }
- }
- rv.Vectors = rv.Vectors[:len(locs)]
- for i, loc := range locs {
- *rv.Vectors[i] = index.TermFieldVector{
- Start: loc.Start(),
- End: loc.End(),
- Pos: loc.Pos(),
- ArrayPositions: loc.ArrayPositions(),
- Field: loc.Field(),
- }
- }
- }
- }
-
- func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
- // FIXME do something better
- // for now, if we need to seek backwards, then restart from the beginning
- if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
- i2, err := i.snapshot.TermFieldReader(i.term, i.field,
- i.includeFreq, i.includeNorm, i.includeTermVectors)
- if err != nil {
- return nil, err
- }
- // close the current term field reader before replacing it with a new one
- _ = i.Close()
- *i = *(i2.(*IndexSnapshotTermFieldReader))
- }
- num, err := docInternalToNumber(ID)
- if err != nil {
- return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
- }
- segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
- if segIndex >= len(i.snapshot.segment) {
- return nil, fmt.Errorf("computed segment index %d out of bounds %d",
- segIndex, len(i.snapshot.segment))
- }
- // skip directly to the target segment
- i.segmentOffset = segIndex
- next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
- if err != nil {
- return nil, err
- }
- if next == nil {
- // we jumped directly to the segment that should have contained it
- // but it wasn't there, so reuse Next() which should correctly
- // get the next hit after it (we moved i.segmentOffset)
- return i.Next(preAlloced)
- }
-
- if preAlloced == nil {
- preAlloced = &index.TermFieldDoc{}
- }
- preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
- i.snapshot.offsets[segIndex])
- i.postingToTermFieldDoc(next, preAlloced)
- i.currID = preAlloced.ID
- i.currPosting = next
- return preAlloced, nil
- }
-
- func (i *IndexSnapshotTermFieldReader) Count() uint64 {
- var rv uint64
- for _, posting := range i.postings {
- rv += posting.Count()
- }
- return rv
- }
-
- func (i *IndexSnapshotTermFieldReader) Close() error {
- if i.snapshot != nil {
- atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
- i.snapshot.recycleTermFieldReader(i)
- }
- return nil
- }
|