123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- // Copyright (c) 2017 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package zap
-
- import (
- "bytes"
- "fmt"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/couchbase/vellum"
- )
-
- // Dictionary is the zap representation of the term dictionary
- type Dictionary struct {
- sb *SegmentBase
- field string
- fieldID uint16
- fst *vellum.FST
- fstReader *vellum.Reader
- }
-
- // PostingsList returns the postings list for the specified term
- func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
- prealloc segment.PostingsList) (segment.PostingsList, error) {
- var preallocPL *PostingsList
- pl, ok := prealloc.(*PostingsList)
- if ok && pl != nil {
- preallocPL = pl
- }
- return d.postingsList(term, except, preallocPL)
- }
-
- func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
- if d.fstReader == nil {
- if rv == nil || rv == emptyPostingsList {
- return emptyPostingsList, nil
- }
- return d.postingsListInit(rv, except), nil
- }
-
- postingsOffset, exists, err := d.fstReader.Get(term)
- if err != nil {
- return nil, fmt.Errorf("vellum err: %v", err)
- }
- if !exists {
- if rv == nil || rv == emptyPostingsList {
- return emptyPostingsList, nil
- }
- return d.postingsListInit(rv, except), nil
- }
-
- return d.postingsListFromOffset(postingsOffset, except, rv)
- }
-
- func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
- rv = d.postingsListInit(rv, except)
-
- err := rv.read(postingsOffset, d)
- if err != nil {
- return nil, err
- }
-
- return rv, nil
- }
-
- func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
- if rv == nil || rv == emptyPostingsList {
- rv = &PostingsList{}
- } else {
- postings := rv.postings
- if postings != nil {
- postings.Clear()
- }
-
- *rv = PostingsList{} // clear the struct
-
- rv.postings = postings
- }
- rv.sb = d.sb
- rv.except = except
- return rv
- }
-
- func (d *Dictionary) Contains(key []byte) (bool, error) {
- return d.fst.Contains(key)
- }
-
- // Iterator returns an iterator for this dictionary
- func (d *Dictionary) Iterator() segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- if d.fst != nil {
- itr, err := d.fst.Iterator(nil, nil)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
- }
-
- // PrefixIterator returns an iterator which only visits terms having the
- // the specified prefix
- func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- kBeg := []byte(prefix)
- kEnd := segment.IncrementBytes(kBeg)
-
- if d.fst != nil {
- itr, err := d.fst.Iterator(kBeg, kEnd)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
- }
-
- // RangeIterator returns an iterator which only visits terms between the
- // start and end terms. NOTE: bleve.index API specifies the end is inclusive.
- func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- // need to increment the end position to be inclusive
- var endBytes []byte
- if len(end) > 0 {
- endBytes = []byte(end)
- if endBytes[len(endBytes)-1] < 0xff {
- endBytes[len(endBytes)-1]++
- } else {
- endBytes = append(endBytes, 0xff)
- }
- }
-
- if d.fst != nil {
- itr, err := d.fst.Iterator([]byte(start), endBytes)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
- }
-
- // AutomatonIterator returns an iterator which only visits terms
- // having the the vellum automaton and start/end key range
- func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
- startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- if d.fst != nil {
- itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
- }
-
- func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
- includeCount bool) segment.DictionaryIterator {
-
- rv := &DictionaryIterator{
- d: d,
- omitCount: !includeCount,
- }
-
- var buf bytes.Buffer
- builder, err := vellum.New(&buf, nil)
- if err != nil {
- rv.err = err
- return rv
- }
- for _, term := range onlyTerms {
- err = builder.Insert(term, 0)
- if err != nil {
- rv.err = err
- return rv
- }
- }
- err = builder.Close()
- if err != nil {
- rv.err = err
- return rv
- }
-
- onlyFST, err := vellum.Load(buf.Bytes())
- if err != nil {
- rv.err = err
- return rv
- }
-
- itr, err := d.fst.Search(onlyFST, nil, nil)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
-
- return rv
- }
-
- // DictionaryIterator is an iterator for term dictionary
- type DictionaryIterator struct {
- d *Dictionary
- itr vellum.Iterator
- err error
- tmp PostingsList
- entry index.DictEntry
- omitCount bool
- }
-
- // Next returns the next entry in the dictionary
- func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
- if i.err != nil && i.err != vellum.ErrIteratorDone {
- return nil, i.err
- } else if i.itr == nil || i.err == vellum.ErrIteratorDone {
- return nil, nil
- }
- term, postingsOffset := i.itr.Current()
- i.entry.Term = string(term)
- if !i.omitCount {
- i.err = i.tmp.read(postingsOffset, i.d)
- if i.err != nil {
- return nil, i.err
- }
- i.entry.Count = i.tmp.Count()
- }
- i.err = i.itr.Next()
- return &i.entry, nil
- }
|