|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817 |
- // Copyright (c) 2018 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package zap
-
- import (
- "bytes"
- "encoding/binary"
- "math"
- "sort"
- "sync"
-
- "github.com/RoaringBitmap/roaring"
- index "github.com/blevesearch/bleve_index_api"
- segment "github.com/blevesearch/scorch_segment_api/v2"
- "github.com/blevesearch/vellum"
- "github.com/golang/snappy"
- )
-
- var NewSegmentBufferNumResultsBump int = 100
- var NewSegmentBufferNumResultsFactor float64 = 1.0
- var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
-
- // ValidateDocFields can be set by applications to perform additional checks
- // on fields in a document being added to a new segment, by default it does
- // nothing.
- // This API is experimental and may be removed at any time.
- var ValidateDocFields = func(field index.Field) error {
- return nil
- }
-
- var defaultChunkFactor uint32 = 1024
-
- // New creates an in-memory zap-encoded SegmentBase from a set of Documents
- func (z *ZapPlugin) New(results []index.Document) (
- segment.Segment, uint64, error) {
- return z.newWithChunkFactor(results, defaultChunkFactor)
- }
-
- func (*ZapPlugin) newWithChunkFactor(results []index.Document,
- chunkFactor uint32) (segment.Segment, uint64, error) {
- s := interimPool.Get().(*interim)
-
- var br bytes.Buffer
- if s.lastNumDocs > 0 {
- // use previous results to initialize the buf with an estimate
- // size, but note that the interim instance comes from a
- // global interimPool, so multiple scorch instances indexing
- // different docs can lead to low quality estimates
- estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
- NewSegmentBufferNumResultsFactor)
- estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
- NewSegmentBufferAvgBytesPerDocFactor)
- br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
- }
-
- s.results = results
- s.chunkFactor = chunkFactor
- s.w = NewCountHashWriter(&br)
-
- storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
- err := s.convert()
- if err != nil {
- return nil, uint64(0), err
- }
-
- sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
- s.FieldsMap, s.FieldsInv, uint64(len(results)),
- storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
-
- if err == nil && s.reset() == nil {
- s.lastNumDocs = len(results)
- s.lastOutSize = len(br.Bytes())
- interimPool.Put(s)
- }
-
- return sb, uint64(len(br.Bytes())), err
- }
-
- var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
-
- // interim holds temporary working data used while converting from
- // analysis results to a zap-encoded segment
- type interim struct {
- results []index.Document
-
- chunkFactor uint32
-
- w *CountHashWriter
-
- // FieldsMap adds 1 to field id to avoid zero value issues
- // name -> field id + 1
- FieldsMap map[string]uint16
-
- // FieldsInv is the inverse of FieldsMap
- // field id -> name
- FieldsInv []string
-
- // Term dictionaries for each field
- // field id -> term -> postings list id + 1
- Dicts []map[string]uint64
-
- // Terms for each field, where terms are sorted ascending
- // field id -> []term
- DictKeys [][]string
-
- // Fields whose IncludeDocValues is true
- // field id -> bool
- IncludeDocValues []bool
-
- // postings id -> bitmap of docNums
- Postings []*roaring.Bitmap
-
- // postings id -> freq/norm's, one for each docNum in postings
- FreqNorms [][]interimFreqNorm
- freqNormsBacking []interimFreqNorm
-
- // postings id -> locs, one for each freq
- Locs [][]interimLoc
- locsBacking []interimLoc
-
- numTermsPerPostingsList []int // key is postings list id
- numLocsPerPostingsList []int // key is postings list id
-
- builder *vellum.Builder
- builderBuf bytes.Buffer
-
- metaBuf bytes.Buffer
-
- tmp0 []byte
- tmp1 []byte
-
- lastNumDocs int
- lastOutSize int
- }
-
- func (s *interim) reset() (err error) {
- s.results = nil
- s.chunkFactor = 0
- s.w = nil
- s.FieldsMap = nil
- s.FieldsInv = nil
- for i := range s.Dicts {
- s.Dicts[i] = nil
- }
- s.Dicts = s.Dicts[:0]
- for i := range s.DictKeys {
- s.DictKeys[i] = s.DictKeys[i][:0]
- }
- s.DictKeys = s.DictKeys[:0]
- for i := range s.IncludeDocValues {
- s.IncludeDocValues[i] = false
- }
- s.IncludeDocValues = s.IncludeDocValues[:0]
- for _, idn := range s.Postings {
- idn.Clear()
- }
- s.Postings = s.Postings[:0]
- s.FreqNorms = s.FreqNorms[:0]
- for i := range s.freqNormsBacking {
- s.freqNormsBacking[i] = interimFreqNorm{}
- }
- s.freqNormsBacking = s.freqNormsBacking[:0]
- s.Locs = s.Locs[:0]
- for i := range s.locsBacking {
- s.locsBacking[i] = interimLoc{}
- }
- s.locsBacking = s.locsBacking[:0]
- s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
- s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
- s.builderBuf.Reset()
- if s.builder != nil {
- err = s.builder.Reset(&s.builderBuf)
- }
- s.metaBuf.Reset()
- s.tmp0 = s.tmp0[:0]
- s.tmp1 = s.tmp1[:0]
- s.lastNumDocs = 0
- s.lastOutSize = 0
-
- return err
- }
-
- func (s *interim) grabBuf(size int) []byte {
- buf := s.tmp0
- if cap(buf) < size {
- buf = make([]byte, size)
- s.tmp0 = buf
- }
- return buf[0:size]
- }
-
- type interimStoredField struct {
- vals [][]byte
- typs []byte
- arrayposs [][]uint64 // array positions
- }
-
- type interimFreqNorm struct {
- freq uint64
- norm float32
- numLocs int
- }
-
- type interimLoc struct {
- fieldID uint16
- pos uint64
- start uint64
- end uint64
- arrayposs []uint64
- }
-
- func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
- s.FieldsMap = map[string]uint16{}
-
- s.getOrDefineField("_id") // _id field is fieldID 0
-
- for _, result := range s.results {
- result.VisitComposite(func(field index.CompositeField) {
- s.getOrDefineField(field.Name())
- })
- result.VisitFields(func(field index.Field) {
- s.getOrDefineField(field.Name())
- })
- }
-
- sort.Strings(s.FieldsInv[1:]) // keep _id as first field
-
- for fieldID, fieldName := range s.FieldsInv {
- s.FieldsMap[fieldName] = uint16(fieldID + 1)
- }
-
- if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
- s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
- } else {
- s.IncludeDocValues = make([]bool, len(s.FieldsInv))
- }
-
- s.prepareDicts()
-
- for _, dict := range s.DictKeys {
- sort.Strings(dict)
- }
-
- s.processDocuments()
-
- storedIndexOffset, err := s.writeStoredFields()
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- var fdvIndexOffset uint64
- var dictOffsets []uint64
-
- if len(s.results) > 0 {
- fdvIndexOffset, dictOffsets, err = s.writeDicts()
- if err != nil {
- return 0, 0, 0, nil, err
- }
- } else {
- dictOffsets = make([]uint64, len(s.FieldsInv))
- }
-
- fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
- }
-
- func (s *interim) getOrDefineField(fieldName string) int {
- fieldIDPlus1, exists := s.FieldsMap[fieldName]
- if !exists {
- fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
- s.FieldsMap[fieldName] = fieldIDPlus1
- s.FieldsInv = append(s.FieldsInv, fieldName)
-
- s.Dicts = append(s.Dicts, make(map[string]uint64))
-
- n := len(s.DictKeys)
- if n < cap(s.DictKeys) {
- s.DictKeys = s.DictKeys[:n+1]
- s.DictKeys[n] = s.DictKeys[n][:0]
- } else {
- s.DictKeys = append(s.DictKeys, []string(nil))
- }
- }
-
- return int(fieldIDPlus1 - 1)
- }
-
- // fill Dicts and DictKeys from analysis results
- func (s *interim) prepareDicts() {
- var pidNext int
-
- var totTFs int
- var totLocs int
-
- visitField := func(field index.Field) {
- fieldID := uint16(s.getOrDefineField(field.Name()))
-
- dict := s.Dicts[fieldID]
- dictKeys := s.DictKeys[fieldID]
-
- tfs := field.AnalyzedTokenFrequencies()
- for term, tf := range tfs {
- pidPlus1, exists := dict[term]
- if !exists {
- pidNext++
- pidPlus1 = uint64(pidNext)
-
- dict[term] = pidPlus1
- dictKeys = append(dictKeys, term)
-
- s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
- s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
- }
-
- pid := pidPlus1 - 1
-
- s.numTermsPerPostingsList[pid] += 1
- s.numLocsPerPostingsList[pid] += len(tf.Locations)
-
- totLocs += len(tf.Locations)
- }
-
- totTFs += len(tfs)
-
- s.DictKeys[fieldID] = dictKeys
- }
-
- for _, result := range s.results {
- // walk each composite field
- result.VisitComposite(func(field index.CompositeField) {
- visitField(field)
- })
-
- // walk each field
- result.VisitFields(visitField)
- }
-
- numPostingsLists := pidNext
-
- if cap(s.Postings) >= numPostingsLists {
- s.Postings = s.Postings[:numPostingsLists]
- } else {
- postings := make([]*roaring.Bitmap, numPostingsLists)
- copy(postings, s.Postings[:cap(s.Postings)])
- for i := 0; i < numPostingsLists; i++ {
- if postings[i] == nil {
- postings[i] = roaring.New()
- }
- }
- s.Postings = postings
- }
-
- if cap(s.FreqNorms) >= numPostingsLists {
- s.FreqNorms = s.FreqNorms[:numPostingsLists]
- } else {
- s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
- }
-
- if cap(s.freqNormsBacking) >= totTFs {
- s.freqNormsBacking = s.freqNormsBacking[:totTFs]
- } else {
- s.freqNormsBacking = make([]interimFreqNorm, totTFs)
- }
-
- freqNormsBacking := s.freqNormsBacking
- for pid, numTerms := range s.numTermsPerPostingsList {
- s.FreqNorms[pid] = freqNormsBacking[0:0]
- freqNormsBacking = freqNormsBacking[numTerms:]
- }
-
- if cap(s.Locs) >= numPostingsLists {
- s.Locs = s.Locs[:numPostingsLists]
- } else {
- s.Locs = make([][]interimLoc, numPostingsLists)
- }
-
- if cap(s.locsBacking) >= totLocs {
- s.locsBacking = s.locsBacking[:totLocs]
- } else {
- s.locsBacking = make([]interimLoc, totLocs)
- }
-
- locsBacking := s.locsBacking
- for pid, numLocs := range s.numLocsPerPostingsList {
- s.Locs[pid] = locsBacking[0:0]
- locsBacking = locsBacking[numLocs:]
- }
- }
-
- func (s *interim) processDocuments() {
- numFields := len(s.FieldsInv)
- reuseFieldLens := make([]int, numFields)
- reuseFieldTFs := make([]index.TokenFrequencies, numFields)
-
- for docNum, result := range s.results {
- for i := 0; i < numFields; i++ { // clear these for reuse
- reuseFieldLens[i] = 0
- reuseFieldTFs[i] = nil
- }
-
- s.processDocument(uint64(docNum), result,
- reuseFieldLens, reuseFieldTFs)
- }
- }
-
- func (s *interim) processDocument(docNum uint64,
- result index.Document,
- fieldLens []int, fieldTFs []index.TokenFrequencies) {
- visitField := func(field index.Field) {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- fieldLens[fieldID] += field.AnalyzedLength()
-
- existingFreqs := fieldTFs[fieldID]
- if existingFreqs != nil {
- existingFreqs.MergeAll(field.Name(), field.AnalyzedTokenFrequencies())
- } else {
- fieldTFs[fieldID] = field.AnalyzedTokenFrequencies()
- }
- }
-
- // walk each composite field
- result.VisitComposite(func(field index.CompositeField) {
- visitField(field)
- })
-
- // walk each field
- result.VisitFields(visitField)
-
- // now that it's been rolled up into fieldTFs, walk that
- for fieldID, tfs := range fieldTFs {
- dict := s.Dicts[fieldID]
- norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
-
- for term, tf := range tfs {
- pid := dict[term] - 1
- bs := s.Postings[pid]
- bs.Add(uint32(docNum))
-
- s.FreqNorms[pid] = append(s.FreqNorms[pid],
- interimFreqNorm{
- freq: uint64(tf.Frequency()),
- norm: norm,
- numLocs: len(tf.Locations),
- })
-
- if len(tf.Locations) > 0 {
- locs := s.Locs[pid]
-
- for _, loc := range tf.Locations {
- var locf = uint16(fieldID)
- if loc.Field != "" {
- locf = uint16(s.getOrDefineField(loc.Field))
- }
- var arrayposs []uint64
- if len(loc.ArrayPositions) > 0 {
- arrayposs = loc.ArrayPositions
- }
- locs = append(locs, interimLoc{
- fieldID: locf,
- pos: uint64(loc.Position),
- start: uint64(loc.Start),
- end: uint64(loc.End),
- arrayposs: arrayposs,
- })
- }
-
- s.Locs[pid] = locs
- }
- }
- }
- }
-
- func (s *interim) writeStoredFields() (
- storedIndexOffset uint64, err error) {
- varBuf := make([]byte, binary.MaxVarintLen64)
- metaEncode := func(val uint64) (int, error) {
- wb := binary.PutUvarint(varBuf, val)
- return s.metaBuf.Write(varBuf[:wb])
- }
-
- data, compressed := s.tmp0[:0], s.tmp1[:0]
- defer func() { s.tmp0, s.tmp1 = data, compressed }()
-
- // keyed by docNum
- docStoredOffsets := make([]uint64, len(s.results))
-
- // keyed by fieldID, for the current doc in the loop
- docStoredFields := map[uint16]interimStoredField{}
-
- for docNum, result := range s.results {
- for fieldID := range docStoredFields { // reset for next doc
- delete(docStoredFields, fieldID)
- }
-
- var validationErr error
- result.VisitFields(func(field index.Field) {
- fieldID := uint16(s.getOrDefineField(field.Name()))
-
- if field.Options().IsStored() {
- isf := docStoredFields[fieldID]
- isf.vals = append(isf.vals, field.Value())
- isf.typs = append(isf.typs, field.EncodedFieldType())
- isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
- docStoredFields[fieldID] = isf
- }
-
- if field.Options().IncludeDocValues() {
- s.IncludeDocValues[fieldID] = true
- }
-
- err := ValidateDocFields(field)
- if err != nil && validationErr == nil {
- validationErr = err
- }
- })
- if validationErr != nil {
- return 0, validationErr
- }
-
- var curr int
-
- s.metaBuf.Reset()
- data = data[:0]
-
- // _id field special case optimizes ExternalID() lookups
- idFieldVal := docStoredFields[uint16(0)].vals[0]
- _, err = metaEncode(uint64(len(idFieldVal)))
- if err != nil {
- return 0, err
- }
-
- // handle non-"_id" fields
- for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
- isf, exists := docStoredFields[uint16(fieldID)]
- if exists {
- curr, data, err = persistStoredFieldValues(
- fieldID, isf.vals, isf.typs, isf.arrayposs,
- curr, metaEncode, data)
- if err != nil {
- return 0, err
- }
- }
- }
-
- metaBytes := s.metaBuf.Bytes()
-
- compressed = snappy.Encode(compressed[:cap(compressed)], data)
-
- docStoredOffsets[docNum] = uint64(s.w.Count())
-
- _, err := writeUvarints(s.w,
- uint64(len(metaBytes)),
- uint64(len(idFieldVal)+len(compressed)))
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(metaBytes)
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(idFieldVal)
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(compressed)
- if err != nil {
- return 0, err
- }
- }
-
- storedIndexOffset = uint64(s.w.Count())
-
- for _, docStoredOffset := range docStoredOffsets {
- err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
- if err != nil {
- return 0, err
- }
- }
-
- return storedIndexOffset, nil
- }
-
- func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
- dictOffsets = make([]uint64, len(s.FieldsInv))
-
- fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
- fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
-
- buf := s.grabBuf(binary.MaxVarintLen64)
-
- tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
- locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
- fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
-
- var docTermMap [][]byte
-
- if s.builder == nil {
- s.builder, err = vellum.New(&s.builderBuf, nil)
- if err != nil {
- return 0, nil, err
- }
- }
-
- for fieldID, terms := range s.DictKeys {
- if cap(docTermMap) < len(s.results) {
- docTermMap = make([][]byte, len(s.results))
- } else {
- docTermMap = docTermMap[0:len(s.results)]
- for docNum := range docTermMap { // reset the docTermMap
- docTermMap[docNum] = docTermMap[docNum][:0]
- }
- }
-
- dict := s.Dicts[fieldID]
-
- for _, term := range terms { // terms are already sorted
- pid := dict[term] - 1
-
- postingsBS := s.Postings[pid]
-
- freqNorms := s.FreqNorms[pid]
- freqNormOffset := 0
-
- locs := s.Locs[pid]
- locOffset := 0
-
- postingsItr := postingsBS.Iterator()
- for postingsItr.HasNext() {
- docNum := uint64(postingsItr.Next())
-
- freqNorm := freqNorms[freqNormOffset]
-
- err = tfEncoder.Add(docNum,
- encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
- uint64(math.Float32bits(freqNorm.norm)))
- if err != nil {
- return 0, nil, err
- }
-
- if freqNorm.numLocs > 0 {
- numBytesLocs := 0
- for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
- numBytesLocs += totalUvarintBytes(
- uint64(loc.fieldID), loc.pos, loc.start, loc.end,
- uint64(len(loc.arrayposs)), loc.arrayposs)
- }
-
- err = locEncoder.Add(docNum, uint64(numBytesLocs))
- if err != nil {
- return 0, nil, err
- }
-
- for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
- err = locEncoder.Add(docNum,
- uint64(loc.fieldID), loc.pos, loc.start, loc.end,
- uint64(len(loc.arrayposs)))
- if err != nil {
- return 0, nil, err
- }
-
- err = locEncoder.Add(docNum, loc.arrayposs...)
- if err != nil {
- return 0, nil, err
- }
- }
-
- locOffset += freqNorm.numLocs
- }
-
- freqNormOffset++
-
- docTermMap[docNum] = append(
- append(docTermMap[docNum], term...),
- termSeparator)
- }
-
- tfEncoder.Close()
- locEncoder.Close()
-
- postingsOffset, err :=
- writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
- if err != nil {
- return 0, nil, err
- }
-
- if postingsOffset > uint64(0) {
- err = s.builder.Insert([]byte(term), postingsOffset)
- if err != nil {
- return 0, nil, err
- }
- }
-
- tfEncoder.Reset()
- locEncoder.Reset()
- }
-
- err = s.builder.Close()
- if err != nil {
- return 0, nil, err
- }
-
- // record where this dictionary starts
- dictOffsets[fieldID] = uint64(s.w.Count())
-
- vellumData := s.builderBuf.Bytes()
-
- // write out the length of the vellum data
- n := binary.PutUvarint(buf, uint64(len(vellumData)))
- _, err = s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
-
- // write this vellum to disk
- _, err = s.w.Write(vellumData)
- if err != nil {
- return 0, nil, err
- }
-
- // reset vellum for reuse
- s.builderBuf.Reset()
-
- err = s.builder.Reset(&s.builderBuf)
- if err != nil {
- return 0, nil, err
- }
-
- // write the field doc values
- if s.IncludeDocValues[fieldID] {
- for docNum, docTerms := range docTermMap {
- if len(docTerms) > 0 {
- err = fdvEncoder.Add(uint64(docNum), docTerms)
- if err != nil {
- return 0, nil, err
- }
- }
- }
- err = fdvEncoder.Close()
- if err != nil {
- return 0, nil, err
- }
-
- fdvOffsetsStart[fieldID] = uint64(s.w.Count())
-
- _, err = fdvEncoder.Write()
- if err != nil {
- return 0, nil, err
- }
-
- fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
-
- fdvEncoder.Reset()
- } else {
- fdvOffsetsStart[fieldID] = fieldNotUninverted
- fdvOffsetsEnd[fieldID] = fieldNotUninverted
- }
- }
-
- fdvIndexOffset = uint64(s.w.Count())
-
- for i := 0; i < len(fdvOffsetsStart); i++ {
- n := binary.PutUvarint(buf, fdvOffsetsStart[i])
- _, err := s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
- n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
- _, err = s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
- }
-
- return fdvIndexOffset, dictOffsets, nil
- }
-
- // returns the total # of bytes needed to encode the given uint64's
- // into binary.PutUVarint() encoding
- func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
- n = numUvarintBytes(a)
- n += numUvarintBytes(b)
- n += numUvarintBytes(c)
- n += numUvarintBytes(d)
- n += numUvarintBytes(e)
- for _, v := range more {
- n += numUvarintBytes(v)
- }
- return n
- }
-
- // returns # of bytes needed to encode x in binary.PutUvarint() encoding
- func numUvarintBytes(x uint64) (n int) {
- for x >= 0x80 {
- x >>= 7
- n++
- }
- return n + 1
- }
|