diff options
Diffstat (limited to 'vendor/github.com/blevesearch/zap/v13/dict.go')
-rw-r--r-- | vendor/github.com/blevesearch/zap/v13/dict.go | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/vendor/github.com/blevesearch/zap/v13/dict.go b/vendor/github.com/blevesearch/zap/v13/dict.go new file mode 100644 index 0000000000..ad4a8f8dc5 --- /dev/null +++ b/vendor/github.com/blevesearch/zap/v13/dict.go @@ -0,0 +1,263 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zap + +import ( + "bytes" + "fmt" + + "github.com/RoaringBitmap/roaring" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/index/scorch/segment" + "github.com/couchbase/vellum" +) + +// Dictionary is the zap representation of the term dictionary +type Dictionary struct { + sb *SegmentBase + field string + fieldID uint16 + fst *vellum.FST + fstReader *vellum.Reader +} + +// PostingsList returns the postings list for the specified term +func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, + prealloc segment.PostingsList) (segment.PostingsList, error) { + var preallocPL *PostingsList + pl, ok := prealloc.(*PostingsList) + if ok && pl != nil { + preallocPL = pl + } + return d.postingsList(term, except, preallocPL) +} + +func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { + if d.fstReader == nil { + if rv == nil || rv == emptyPostingsList { + return emptyPostingsList, nil + } + return d.postingsListInit(rv, except), nil + } + + postingsOffset, exists, err := d.fstReader.Get(term) + if err != nil { + return nil, fmt.Errorf("vellum err: %v", err) + } + if !exists { + if rv == nil || rv == emptyPostingsList { + return emptyPostingsList, nil + } + return d.postingsListInit(rv, except), nil + } + + return d.postingsListFromOffset(postingsOffset, except, rv) +} + +func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { + rv = d.postingsListInit(rv, except) + + err := rv.read(postingsOffset, d) + if err != nil { + return nil, err + } + + return rv, nil +} + +func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList { + if rv == nil || rv == emptyPostingsList { + rv = &PostingsList{} + } else { + postings := rv.postings + if postings != nil { + postings.Clear() + } + + *rv = PostingsList{} // clear the struct + + rv.postings = postings + } + rv.sb = d.sb + rv.except = except + return rv +} + +func (d *Dictionary) Contains(key []byte) (bool, error) { + return d.fst.Contains(key) +} + +// Iterator returns an iterator for this dictionary +func (d *Dictionary) Iterator() segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + if d.fst != nil { + itr, err := d.fst.Iterator(nil, nil) + if err == nil { + rv.itr = itr + } else if err != vellum.ErrIteratorDone { + rv.err = err + } + } + + return rv +} + +// PrefixIterator returns an iterator which only visits terms having the +// the specified prefix +func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + kBeg := []byte(prefix) + kEnd := segment.IncrementBytes(kBeg) + + if d.fst != nil { + itr, err := d.fst.Iterator(kBeg, kEnd) + if err == nil { + rv.itr = itr + } else if err != vellum.ErrIteratorDone { + rv.err = err + } + } + + return rv +} + +// RangeIterator returns an iterator which only visits terms between the +// start and end terms. NOTE: bleve.index API specifies the end is inclusive. +func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + // need to increment the end position to be inclusive + var endBytes []byte + if len(end) > 0 { + endBytes = []byte(end) + if endBytes[len(endBytes)-1] < 0xff { + endBytes[len(endBytes)-1]++ + } else { + endBytes = append(endBytes, 0xff) + } + } + + if d.fst != nil { + itr, err := d.fst.Iterator([]byte(start), endBytes) + if err == nil { + rv.itr = itr + } else if err != vellum.ErrIteratorDone { + rv.err = err + } + } + + return rv +} + +// AutomatonIterator returns an iterator which only visits terms +// having the the vellum automaton and start/end key range +func (d *Dictionary) AutomatonIterator(a vellum.Automaton, + startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator { + rv := &DictionaryIterator{ + d: d, + } + + if d.fst != nil { + itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive) + if err == nil { + rv.itr = itr + } else if err != vellum.ErrIteratorDone { + rv.err = err + } + } + + return rv +} + +func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, + includeCount bool) segment.DictionaryIterator { + + rv := &DictionaryIterator{ + d: d, + omitCount: !includeCount, + } + + var buf bytes.Buffer + builder, err := vellum.New(&buf, nil) + if err != nil { + rv.err = err + return rv + } + for _, term := range onlyTerms { + err = builder.Insert(term, 0) + if err != nil { + rv.err = err + return rv + } + } + err = builder.Close() + if err != nil { + rv.err = err + return rv + } + + onlyFST, err := vellum.Load(buf.Bytes()) + if err != nil { + rv.err = err + return rv + } + + itr, err := d.fst.Search(onlyFST, nil, nil) + if err == nil { + rv.itr = itr + } else if err != vellum.ErrIteratorDone { + rv.err = err + } + + return rv +} + +// DictionaryIterator is an iterator for term dictionary +type DictionaryIterator struct { + d *Dictionary + itr vellum.Iterator + err error + tmp PostingsList + entry index.DictEntry + omitCount bool +} + +// Next returns the next entry in the dictionary +func (i *DictionaryIterator) Next() (*index.DictEntry, error) { + if i.err != nil && i.err != vellum.ErrIteratorDone { + return nil, i.err + } else if i.itr == nil || i.err == vellum.ErrIteratorDone { + return nil, nil + } + term, postingsOffset := i.itr.Current() + i.entry.Term = string(term) + if !i.omitCount { + i.err = i.tmp.read(postingsOffset, i.d) + if i.err != nil { + return nil, i.err + } + i.entry.Count = i.tmp.Count() + } + i.err = i.itr.Next() + return &i.entry, nil +} |