summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/blevesearch/bleve
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/blevesearch/bleve')
-rw-r--r--vendor/github.com/blevesearch/bleve/.travis.yml10
-rw-r--r--vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go49
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/geo.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/parse.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/go.mod25
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/introducer.go95
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/merge.go85
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/optimize.go74
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/persister.go69
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/scorch.go78
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go58
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go148
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md158
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go151
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go230
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go61
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go263
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go311
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go126
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go172
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go862
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go839
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go897
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go43
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go572
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go145
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/zap.md177
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go77
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go138
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/stats.go10
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go3
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/topn.go13
-rw-r--r--vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go5
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go17
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go50
41 files changed, 737 insertions, 5335 deletions
diff --git a/vendor/github.com/blevesearch/bleve/.travis.yml b/vendor/github.com/blevesearch/bleve/.travis.yml
index e00e7b9948..7b7297afe3 100644
--- a/vendor/github.com/blevesearch/bleve/.travis.yml
+++ b/vendor/github.com/blevesearch/bleve/.travis.yml
@@ -3,9 +3,9 @@ sudo: false
language: go
go:
- - "1.10.x"
- - "1.11.x"
- "1.12.x"
+ - "1.13.x"
+ - "1.14.x"
script:
- go get golang.org/x/tools/cmd/cover
@@ -16,11 +16,7 @@ script:
- go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/)
- go test ./test -v -indexType scorch
- - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
- echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
- else
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
- fi
+ - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
- docs/project-code-coverage.sh
- docs/build_children.sh
diff --git a/vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go b/vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go
new file mode 100644
index 0000000000..225bb0664d
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/analysis/lang/en/stemmer_en_snowball.go
@@ -0,0 +1,49 @@
+// Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+ "github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/registry"
+
+ "github.com/blevesearch/snowballstem"
+ "github.com/blevesearch/snowballstem/english"
+)
+
+const SnowballStemmerName = "stemmer_en_snowball"
+
+type EnglishStemmerFilter struct {
+}
+
+func NewEnglishStemmerFilter() *EnglishStemmerFilter {
+ return &EnglishStemmerFilter{}
+}
+
+func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+ for _, token := range input {
+ env := snowballstem.NewEnv(string(token.Term))
+ english.Stem(env)
+ token.Term = []byte(env.Current())
+ }
+ return input
+}
+
+func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+ return NewEnglishStemmerFilter(), nil
+}
+
+func init() {
+ registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
+}
diff --git a/vendor/github.com/blevesearch/bleve/geo/geo.go b/vendor/github.com/blevesearch/bleve/geo/geo.go
index 583451e308..b18ace4337 100644
--- a/vendor/github.com/blevesearch/bleve/geo/geo.go
+++ b/vendor/github.com/blevesearch/bleve/geo/geo.go
@@ -33,14 +33,16 @@ var minLonRad = minLon * degreesToRadian
var minLatRad = minLat * degreesToRadian
var maxLonRad = maxLon * degreesToRadian
var maxLatRad = maxLat * degreesToRadian
-var geoTolerance = 1E-6
+var geoTolerance = 1e-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
+var geoHashMaxLength = 12
+
// Point represents a geo point.
type Point struct {
- Lon float64
- Lat float64
+ Lon float64 `json:"lon"`
+ Lat float64 `json:"lat"`
}
// MortonHash computes the morton hash value for the provided geo point
diff --git a/vendor/github.com/blevesearch/bleve/geo/parse.go b/vendor/github.com/blevesearch/bleve/geo/parse.go
index 5d833d9110..8286805ff7 100644
--- a/vendor/github.com/blevesearch/bleve/geo/parse.go
+++ b/vendor/github.com/blevesearch/bleve/geo/parse.go
@@ -85,9 +85,11 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
} else {
// geohash
- lat, lon = DecodeGeoHash(geoStr)
- foundLat = true
- foundLon = true
+ if len(geoStr) <= geoHashMaxLength {
+ lat, lon = DecodeGeoHash(geoStr)
+ foundLat = true
+ foundLon = true
+ }
}
}
diff --git a/vendor/github.com/blevesearch/bleve/go.mod b/vendor/github.com/blevesearch/bleve/go.mod
new file mode 100644
index 0000000000..d38cf8f921
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/go.mod
@@ -0,0 +1,25 @@
+module github.com/blevesearch/bleve
+
+go 1.13
+
+require (
+ github.com/RoaringBitmap/roaring v0.4.21
+ github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
+ github.com/blevesearch/go-porterstemmer v1.0.3
+ github.com/blevesearch/segment v0.9.0
+ github.com/blevesearch/snowballstem v0.9.0
+ github.com/blevesearch/zap/v11 v11.0.7
+ github.com/blevesearch/zap/v12 v12.0.7
+ github.com/couchbase/ghistogram v0.1.0 // indirect
+ github.com/couchbase/moss v0.1.0
+ github.com/couchbase/vellum v1.0.1
+ github.com/golang/protobuf v1.3.2
+ github.com/kljensen/snowball v0.6.0
+ github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
+ github.com/spf13/cobra v0.0.5
+ github.com/steveyen/gtreap v0.1.0
+ github.com/syndtr/goleveldb v1.0.0
+ github.com/willf/bitset v1.1.10
+ go.etcd.io/bbolt v1.3.4
+ golang.org/x/text v0.3.0
+)
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
index ac627796f5..e5f00f80e1 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
@@ -21,7 +21,6 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/zap"
)
type segmentIntroduction struct {
@@ -77,11 +76,6 @@ OUTER:
case persist := <-s.persists:
s.introducePersist(persist)
- case revertTo := <-s.revertToSnapshots:
- err := s.revertToSnapshot(revertTo)
- if err != nil {
- continue OUTER
- }
}
var epochCurr uint64
@@ -312,6 +306,8 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
close(persist.applied)
}
+// The introducer should definitely handle the segmentMerge.notify
+// channel before exiting the introduceMerge.
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
@@ -409,11 +405,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
switch nextMerge.new.(type) {
- case *zap.SegmentBase:
+ case segment.PersistedSegment:
+ fileSegments++
+ default:
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
memSegments++
- case *zap.Segment:
- fileSegments++
}
}
@@ -443,86 +439,11 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
close(nextMerge.notify)
}
-func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
- atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
- defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
-
- if revertTo.snapshot == nil {
- err := fmt.Errorf("Cannot revert to a nil snapshot")
- revertTo.applied <- err
- return err
- }
-
- // acquire lock
- s.rootLock.Lock()
-
- // prepare a new index snapshot, based on next snapshot
- newSnapshot := &IndexSnapshot{
- parent: s,
- segment: make([]*SegmentSnapshot, len(revertTo.snapshot.segment)),
- offsets: revertTo.snapshot.offsets,
- internal: revertTo.snapshot.internal,
- epoch: s.nextSnapshotEpoch,
- refs: 1,
- creator: "revertToSnapshot",
- }
- s.nextSnapshotEpoch++
-
- var docsToPersistCount, memSegments, fileSegments uint64
- // iterate through segments
- for i, segmentSnapshot := range revertTo.snapshot.segment {
- newSnapshot.segment[i] = &SegmentSnapshot{
- id: segmentSnapshot.id,
- segment: segmentSnapshot.segment,
- deleted: segmentSnapshot.deleted,
- cachedDocs: segmentSnapshot.cachedDocs,
- creator: segmentSnapshot.creator,
- }
- newSnapshot.segment[i].segment.AddRef()
-
- // remove segment from ineligibleForRemoval map
- filename := zapFileName(segmentSnapshot.id)
- delete(s.ineligibleForRemoval, filename)
-
- if isMemorySegment(segmentSnapshot) {
- docsToPersistCount += segmentSnapshot.Count()
- memSegments++
- } else {
- fileSegments++
- }
- }
-
- atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
- atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
- atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
-
- if revertTo.persisted != nil {
- s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
- }
-
- newSnapshot.updateSize()
- // swap in new snapshot
- rootPrev := s.root
- s.root = newSnapshot
-
- atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
- // release lock
- s.rootLock.Unlock()
-
- if rootPrev != nil {
- _ = rootPrev.DecRef()
- }
-
- close(revertTo.applied)
-
- return nil
-}
-
func isMemorySegment(s *SegmentSnapshot) bool {
switch s.segment.(type) {
- case *zap.SegmentBase:
- return true
- default:
+ case segment.PersistedSegment:
return false
+ default:
+ return true
}
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
index d7144772fd..37dca529a6 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
@@ -25,7 +25,6 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/mergeplan"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/zap"
)
func (s *Scorch) mergerLoop() {
@@ -131,18 +130,18 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
options *mergeplan.MergePlanOptions) error {
- // build list of zap segments in this snapshot
- var onlyZapSnapshots []mergeplan.Segment
+ // build list of persisted segments in this snapshot
+ var onlyPersistedSnapshots []mergeplan.Segment
for _, segmentSnapshot := range ourSnapshot.segment {
- if _, ok := segmentSnapshot.segment.(*zap.Segment); ok {
- onlyZapSnapshots = append(onlyZapSnapshots, segmentSnapshot)
+ if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
+ onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
}
}
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
// give this list to the planner
- resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
+ resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
if err != nil {
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
return fmt.Errorf("merge planning err: %v", err)
@@ -157,8 +156,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
- var notifications []chan *IndexSnapshot
var filenames []string
+
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@@ -169,24 +168,24 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
oldMap := make(map[uint64]*SegmentSnapshot)
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
- segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
+ segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
for _, planSegment := range task.Segments {
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
oldMap[segSnapshot.id] = segSnapshot
- if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
+ if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
if segSnapshot.LiveSize() == 0 {
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
oldMap[segSnapshot.id] = nil
} else {
- segmentsToMerge = append(segmentsToMerge, zapSeg)
+ segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
- path := zapSeg.Path()
+ path := persistedSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
@@ -203,8 +202,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
fileMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
- newDocNums, _, err := zap.Merge(segmentsToMerge, docsToDrop, path,
- DefaultChunkFactor, s.closeCh, s)
+ newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
+ s.closeCh, s)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
@@ -222,17 +221,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
return fmt.Errorf("merging failed: %v", err)
}
- seg, err = zap.Open(path)
+ seg, err = s.segPlugin.Open(path)
if err != nil {
s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
- err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
- if err != nil {
- s.unmarkIneligibleForRemoval(filename)
- return fmt.Errorf("merge validation failed: %v", err)
- }
oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@@ -246,9 +240,8 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
old: oldMap,
oldNewDocNums: oldNewDocNums,
new: seg,
- notify: make(chan *IndexSnapshot, 1),
+ notify: make(chan *IndexSnapshot),
}
- notifications = append(notifications, sm.notify)
// give it to the introducer
select {
@@ -259,20 +252,21 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
}
- atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
- }
-
- for _, notification := range notifications {
- select {
- case <-s.closeCh:
- atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1)
- return segment.ErrClosed
- case newSnapshot := <-notification:
- atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
- if newSnapshot != nil {
- _ = newSnapshot.DecRef()
- }
+ introStartTime := time.Now()
+ // it is safe to blockingly wait for the merge introduction
+ // here as the introducer is bound to handle the notify channel.
+ newSnapshot := <-sm.notify
+ introTime := uint64(time.Since(introStartTime))
+ atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
+ if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
+ atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
+ }
+ atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
+ if newSnapshot != nil {
+ _ = newSnapshot.DecRef()
}
+
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
}
// once all the newly merged segment introductions are done,
@@ -297,8 +291,8 @@ type segmentMerge struct {
// persisted segment, and synchronously introduce that new segment
// into the root
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
- sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
- chunkFactor uint32) (*IndexSnapshot, uint64, error) {
+ sbs []segment.Segment, sbsDrops []*roaring.Bitmap,
+ sbsIndexes []int) (*IndexSnapshot, uint64, error) {
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
memMergeZapStartTime := time.Now()
@@ -310,7 +304,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
path := s.path + string(os.PathSeparator) + filename
newDocNums, _, err :=
- zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh, s)
+ s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s)
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
@@ -325,15 +319,11 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
return nil, 0, err
}
- seg, err := zap.Open(path)
+ seg, err := s.segPlugin.Open(path)
if err != nil {
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err
}
- err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
- if err != nil {
- return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
- }
// update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
@@ -344,7 +334,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64),
new: seg,
- notify: make(chan *IndexSnapshot, 1),
+ notify: make(chan *IndexSnapshot),
}
for i, idx := range sbsIndexes {
@@ -360,14 +350,13 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
case s.merges <- sm:
}
- select { // wait for introduction to complete
- case <-s.closeCh:
- return nil, 0, segment.ErrClosed
- case newSnapshot := <-sm.notify:
+ // blockingly wait for the introduction to complete
+ newSnapshot := <-sm.notify
+ if newSnapshot != nil {
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
- return newSnapshot, newSegmentID, nil
}
+ return newSnapshot, newSegmentID, nil
}
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go b/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
index b33e3be3d4..b9cb9228af 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
@@ -18,10 +18,8 @@ import (
"fmt"
"github.com/RoaringBitmap/roaring"
-
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/zap"
)
var OptimizeConjunction = true
@@ -81,25 +79,25 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
}
for i := range o.snapshot.segment {
- itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
- if !ok || itr0.ActualBM == nil {
+ itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
+ if !ok || itr0.ActualBitmap() == nil {
continue
}
- itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
- if !ok || itr1.ActualBM == nil {
+ itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
+ if !ok || itr1.ActualBitmap() == nil {
continue
}
- bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
+ bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
for _, tfr := range o.tfrs[2:] {
- itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
- if !ok || itr.ActualBM == nil {
+ itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
+ if !ok || itr.ActualBitmap() == nil {
continue
}
- bm.And(itr.ActualBM)
+ bm.And(itr.ActualBitmap())
}
// in this conjunction optimization, the postings iterators
@@ -107,10 +105,9 @@ func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
// regular conjunction searcher machinery will still be used,
// but the underlying bitmap will be smaller.
for _, tfr := range o.tfrs {
- itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
- if ok && itr.ActualBM != nil {
- itr.ActualBM = bm
- itr.Actual = bm.Iterator()
+ itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
+ if ok && itr.ActualBitmap() != nil {
+ itr.ReplaceActual(bm)
}
}
}
@@ -191,9 +188,9 @@ OUTER:
continue OUTER
}
- itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
- // We optimize zap postings iterators only.
+ // We only optimize postings iterators that support this operation.
return nil, nil
}
@@ -201,12 +198,6 @@ OUTER:
// can perform several optimizations up-front here.
docNum1Hit, ok := itr.DocNum1Hit()
if ok {
- if docNum1Hit == zap.DocNum1HitFinished {
- // An empty docNum here means the entire AND is empty.
- oTFR.iterators[i] = segment.AnEmptyPostingsIterator
- continue OUTER
- }
-
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
// The docNum1Hit doesn't match the previous
// docNum1HitLast, so the entire AND is empty.
@@ -220,14 +211,14 @@ OUTER:
continue
}
- if itr.ActualBM == nil {
+ if itr.ActualBitmap() == nil {
// An empty actual bitmap means the entire AND is empty.
oTFR.iterators[i] = segment.AnEmptyPostingsIterator
continue OUTER
}
// Collect the actual bitmap for more processing later.
- actualBMs = append(actualBMs, itr.ActualBM)
+ actualBMs = append(actualBMs, itr.ActualBitmap())
}
if docNum1HitLastOk {
@@ -245,11 +236,7 @@ OUTER:
// The actual bitmaps and docNum1Hits all contain or have
// the same 1-hit docNum, so that's our AND'ed result.
- oTFR.iterators[i], err = zap.PostingsIteratorFrom1Hit(
- docNum1HitLast, zap.NormBits1Hit, false, false)
- if err != nil {
- return nil, nil
- }
+ oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
continue OUTER
}
@@ -263,11 +250,7 @@ OUTER:
if len(actualBMs) == 1 {
// If we've only 1 actual bitmap, then that's our result.
- oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
- actualBMs[0], false, false)
- if err != nil {
- return nil, nil
- }
+ oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(actualBMs[0])
continue OUTER
}
@@ -279,11 +262,7 @@ OUTER:
bm.And(actualBM)
}
- oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
- bm, false, false)
- if err != nil {
- return nil, nil
- }
+ oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
@@ -337,13 +316,13 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
var cMax uint64
for _, tfr := range o.tfrs {
- itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
- if itr.ActualBM != nil {
- c := itr.ActualBM.GetCardinality()
+ if itr.ActualBitmap() != nil {
+ c := itr.ActualBitmap().GetCardinality()
if cMax < c {
cMax = c
}
@@ -379,7 +358,7 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
actualBMs = actualBMs[:0]
for _, tfr := range o.tfrs {
- itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
@@ -390,8 +369,8 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
continue
}
- if itr.ActualBM != nil {
- actualBMs = append(actualBMs, itr.ActualBM)
+ if itr.ActualBitmap() != nil {
+ actualBMs = append(actualBMs, itr.ActualBitmap())
}
}
@@ -410,10 +389,7 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
bm.AddMany(docNums)
- oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(bm, false, false)
- if err != nil {
- return nil, nil
- }
+ oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
return oTFR, nil
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
index 064e9e6a85..30e75df77f 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
@@ -32,12 +32,9 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/zap"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
-var DefaultChunkFactor uint32 = 1024
-
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
// persistence of segments with the default safe batch option.
// If the default safe batch option results in high number of
@@ -253,7 +250,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// Check the merger lag by counting the segment files on disk,
- numFilesOnDisk, _ := s.diskFileStats()
+ numFilesOnDisk, _, _ := s.diskFileStats(nil)
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
@@ -280,7 +277,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
- numFilesOnDisk, _ = s.diskFileStats()
+ numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
// Persister pause until the merger catches up to reduce the segment
@@ -305,7 +302,7 @@ OUTER:
// let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
- numFilesOnDisk, _ = s.diskFileStats()
+ numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
return lastMergedEpoch, persistWatchers
@@ -360,13 +357,13 @@ var DefaultMinSegmentsForInMemoryMerge = 2
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
bool, error) {
// collect the in-memory zap segments (SegmentBase instances)
- var sbs []*zap.SegmentBase
+ var sbs []segment.Segment
var sbsDrops []*roaring.Bitmap
var sbsIndexes []int
for i, segmentSnapshot := range snapshot.segment {
- if sb, ok := segmentSnapshot.segment.(*zap.SegmentBase); ok {
- sbs = append(sbs, sb)
+ if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); !ok {
+ sbs = append(sbs, segmentSnapshot.segment)
sbsDrops = append(sbsDrops, segmentSnapshot.deleted)
sbsIndexes = append(sbsIndexes, i)
}
@@ -377,7 +374,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
}
newSnapshot, newSegmentID, err := s.mergeSegmentBases(
- snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
+ snapshot, sbs, sbsDrops, sbsIndexes)
if err != nil {
return false, err
}
@@ -459,13 +456,13 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
if err != nil {
return err
}
- err = metaBucket.Put([]byte("type"), []byte(zap.Type))
+ err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
if err != nil {
return err
}
buf := make([]byte, binary.MaxVarintLen32)
- binary.BigEndian.PutUint32(buf, zap.Version)
- err = metaBucket.Put([]byte("version"), buf)
+ binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
+ err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
if err != nil {
return err
}
@@ -494,11 +491,19 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err
}
switch seg := segmentSnapshot.segment.(type) {
- case *zap.SegmentBase:
+ case segment.PersistedSegment:
+ path := seg.Path()
+ filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
+ err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
+ if err != nil {
+ return err
+ }
+ filenames = append(filenames, filename)
+ case segment.UnpersistedSegment:
// need to persist this to disk
filename := zapFileName(segmentSnapshot.id)
path := s.path + string(os.PathSeparator) + filename
- err = zap.PersistSegmentBase(seg, path)
+ err = seg.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment: %v", err)
}
@@ -508,14 +513,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err
}
filenames = append(filenames, filename)
- case *zap.Segment:
- path := seg.Path()
- filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
- err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
- if err != nil {
- return err
- }
- filenames = append(filenames, filename)
+
default:
return fmt.Errorf("unknown segment type: %T", seg)
}
@@ -553,7 +551,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
}
}()
for segmentID, path := range newSegmentPaths {
- newSegments[segmentID], err = zap.Open(path)
+ newSegments[segmentID], err = s.segPlugin.Open(path)
if err != nil {
return fmt.Errorf("error opening new segment at %s, %v", path, err)
}
@@ -609,6 +607,8 @@ var boltPathKey = []byte{'p'}
var boltDeletedKey = []byte{'d'}
var boltInternalKey = []byte{'i'}
var boltMetaDataKey = []byte{'m'}
+var boltMetaDataSegmentTypeKey = []byte("type")
+var boltMetaDataSegmentVersionKey = []byte("version")
func (s *Scorch) loadFromBolt() error {
return s.rootBolt.View(func(tx *bolt.Tx) error {
@@ -693,6 +693,23 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
refs: 1,
creator: "loadSnapshot",
}
+ // first we look for the meta-data bucket, this will tell us
+ // which segment type/version was used for this snapshot
+ // all operations for this scorch will use this type/version
+ metaBucket := snapshot.Bucket(boltMetaDataKey)
+ if metaBucket == nil {
+ _ = rv.DecRef()
+ return nil, fmt.Errorf("meta-data bucket missing")
+ }
+ segmentType := string(metaBucket.Get(boltMetaDataSegmentTypeKey))
+ segmentVersion := binary.BigEndian.Uint32(
+ metaBucket.Get(boltMetaDataSegmentVersionKey))
+ err := s.loadSegmentPlugin(segmentType, segmentVersion)
+ if err != nil {
+ _ = rv.DecRef()
+ return nil, fmt.Errorf(
+ "unable to load correct segment wrapper: %v", err)
+ }
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -737,7 +754,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
- segment, err := zap.Open(segmentPath)
+ segment, err := s.segPlugin.Open(segmentPath)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
index 44a97d1ea6..80f9e3a797 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
@@ -28,10 +28,9 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
const Name = "scorch"
@@ -67,7 +66,6 @@ type Scorch struct {
persists chan *persistIntroduction
merges chan *segmentMerge
introducerNotifier chan *epochWatcher
- revertToSnapshots chan *snapshotReversion
persisterNotifier chan *epochWatcher
rootBolt *bolt.DB
asyncTasks sync.WaitGroup
@@ -78,6 +76,8 @@ type Scorch struct {
pauseLock sync.RWMutex
pauseCount uint64
+
+ segPlugin segment.Plugin
}
type internalStats struct {
@@ -101,7 +101,25 @@ func NewScorch(storeName string,
nextSnapshotEpoch: 1,
closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{},
+ segPlugin: defaultSegmentPlugin,
+ }
+
+ // check if the caller has requested a specific segment type/version
+ forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
+ if ok {
+ forcedSegmentType, ok2 := config["forceSegmentType"].(string)
+ if !ok2 {
+ return nil, fmt.Errorf(
+ "forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
+ }
+
+ err := rv.loadSegmentPlugin(forcedSegmentType,
+ uint32(forcedSegmentVersion))
+ if err != nil {
+ return nil, err
+ }
}
+
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
ro, ok := config["read_only"].(bool)
if ok {
@@ -221,8 +239,8 @@ func (s *Scorch) openBolt() error {
s.persists = make(chan *persistIntroduction)
s.merges = make(chan *segmentMerge)
s.introducerNotifier = make(chan *epochWatcher, 1)
- s.revertToSnapshots = make(chan *snapshotReversion)
s.persisterNotifier = make(chan *epochWatcher, 1)
+ s.closeCh = make(chan struct{})
if !s.readOnly && s.path != "" {
err := s.removeOldZapFiles() // Before persister or merger create any new files.
@@ -263,7 +281,10 @@ func (s *Scorch) Close() (err error) {
err = s.rootBolt.Close()
s.rootLock.Lock()
if s.root != nil {
- _ = s.root.DecRef()
+ err2 := s.root.DecRef()
+ if err == nil {
+ err = err2
+ }
}
s.root = nil
s.rootLock.Unlock()
@@ -349,7 +370,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
var newSegment segment.Segment
var bufBytes uint64
if len(analysisResults) > 0 {
- newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
+ newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
if err != nil {
return err
}
@@ -466,8 +487,9 @@ func (s *Scorch) Stats() json.Marshaler {
return &s.stats
}
-func (s *Scorch) diskFileStats() (uint64, uint64) {
- var numFilesOnDisk, numBytesUsedDisk uint64
+func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
+ uint64, uint64) {
+ var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
if s.path != "" {
finfos, err := ioutil.ReadDir(s.path)
if err == nil {
@@ -475,24 +497,47 @@ func (s *Scorch) diskFileStats() (uint64, uint64) {
if !finfo.IsDir() {
numBytesUsedDisk += uint64(finfo.Size())
numFilesOnDisk++
+ if rootSegmentPaths != nil {
+ fname := s.path + string(os.PathSeparator) + finfo.Name()
+ if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
+ numBytesOnDiskByRoot += uint64(finfo.Size())
+ }
+ }
}
}
}
}
- return numFilesOnDisk, numBytesUsedDisk
+ // if no root files path given, then consider all disk files.
+ if rootSegmentPaths == nil {
+ return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
+ }
+
+ return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
+}
+
+func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
+ rv := make(map[string]struct{}, len(s.root.segment))
+ for _, segmentSnapshot := range s.root.segment {
+ if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
+ rv[seg.Path()] = struct{}{}
+ }
+ }
+ return rv
}
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
- numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
+ s.rootLock.RLock()
+ rootSegPaths := s.rootDiskSegmentsPaths()
+ m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
+ s.rootLock.RUnlock()
+
+ numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
- s.rootLock.RLock()
- m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
- s.rootLock.RUnlock()
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
m["updates"] = m["TotUpdates"]
@@ -507,8 +552,11 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["num_items_introduced"] = m["TotIntroducedItems"]
m["num_items_persisted"] = m["TotPersistedItems"]
m["num_recs_to_persist"] = m["TotItemsToPersist"]
- m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
- m["num_files_on_disk"] = m["CurOnDiskFiles"]
+ // total disk bytes found in index directory inclusive of older snapshots
+ m["num_bytes_used_disk"] = numBytesUsedDisk
+ // total disk bytes by the latest root index, exclusive of older snapshots
+ m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
+ m["num_files_on_disk"] = numFilesOnDisk
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
index fdc407a747..340db73a67 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
@@ -105,10 +105,6 @@ func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return false, nil
}
-func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
- return nil, nil
-}
-
type EmptyPostingsList struct{}
func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
@@ -130,6 +126,10 @@ func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil
}
+func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
+ return nil, nil
+}
+
func (e *EmptyPostingsIterator) Size() int {
return 0
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go
new file mode 100644
index 0000000000..d8aaa0b6d1
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/plugin.go
@@ -0,0 +1,58 @@
+// Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+ "github.com/RoaringBitmap/roaring"
+ "github.com/blevesearch/bleve/index"
+)
+
+// Plugin represents the essential functions required by a package to plug in
+// it's segment implementation
+type Plugin interface {
+
+ // Type is the name for this segment plugin
+ Type() string
+
+ // Version is a numeric value identifying a specific version of this type.
+ // When incompatible changes are made to a particular type of plugin, the
+ // version must be incremented.
+ Version() uint32
+
+ // New takes a set of AnalysisResults and turns them into a new Segment
+ New(results []*index.AnalysisResult) (Segment, uint64, error)
+
+ // Open attempts to open the file at the specified path and
+ // return the corresponding Segment
+ Open(path string) (Segment, error)
+
+ // Merge takes a set of Segments, and creates a new segment on disk at
+ // the specified path.
+ // Drops is a set of bitmaps (one for each segment) indicating which
+ // documents can be dropped from the segments during the merge.
+ // If the closeCh channel is closed, Merge will cease doing work at
+ // the next opportunity, and return an error (closed).
+ // StatsReporter can optionally be provided, in which case progress
+ // made during the merge is reported while operation continues.
+ // Returns:
+ // A slice of new document numbers (one for each input segment),
+ // this allows the caller to know a particular document's new
+ // document number in the newly merged segment.
+ // The number of bytes written to the new segment file.
+ // An error, if any occurred.
+ Merge(segments []Segment, drops []*roaring.Bitmap, path string,
+ closeCh chan struct{}, s StatsReporter) (
+ [][]uint64, uint64, error)
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
index 34c2bc2048..ddd0d09102 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
@@ -50,6 +50,16 @@ type Segment interface {
DecRef() error
}
+type UnpersistedSegment interface {
+ Segment
+ Persist(path string) error
+}
+
+type PersistedSegment interface {
+ Segment
+ Path() string
+}
+
type TermDictionary interface {
PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
@@ -96,6 +106,12 @@ type PostingsIterator interface {
Size() int
}
+type OptimizablePostingsIterator interface {
+ ActualBitmap() *roaring.Bitmap
+ DocNum1Hit() (uint64, bool)
+ ReplaceActual(*roaring.Bitmap)
+}
+
type Posting interface {
Number() uint64
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go
new file mode 100644
index 0000000000..9a4d6c76c9
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go
@@ -0,0 +1,148 @@
+// Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+ "github.com/RoaringBitmap/roaring"
+ "math"
+ "reflect"
+)
+
+var reflectStaticSizeUnadornedPostingsIteratorBitmap int
+var reflectStaticSizeUnadornedPostingsIterator1Hit int
+var reflectStaticSizeUnadornedPosting int
+
+
+func init() {
+ var pib UnadornedPostingsIteratorBitmap
+ reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
+ var pi1h UnadornedPostingsIterator1Hit
+ reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
+ var up UnadornedPosting
+ reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
+}
+
+type UnadornedPostingsIteratorBitmap struct{
+ actual roaring.IntPeekable
+ actualBM *roaring.Bitmap
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Next() (Posting, error) {
+ return i.nextAtOrAfter(0)
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Advance(docNum uint64) (Posting, error) {
+ return i.nextAtOrAfter(docNum)
+}
+
+func (i *UnadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
+ docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
+ if !exists {
+ return nil, nil
+ }
+ return UnadornedPosting(docNum), nil
+}
+
+func (i *UnadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
+ if i.actual == nil || !i.actual.HasNext() {
+ return 0, false
+ }
+ i.actual.AdvanceIfNeeded(uint32(atOrAfter))
+
+ if !i.actual.HasNext() {
+ return 0, false // couldn't find anything
+ }
+
+ return uint64(i.actual.Next()), true
+}
+
+func (i *UnadornedPostingsIteratorBitmap) Size() int {
+ return reflectStaticSizeUnadornedPostingsIteratorBitmap
+}
+
+func NewUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) PostingsIterator {
+ return &UnadornedPostingsIteratorBitmap{
+ actualBM: bm,
+ actual: bm.Iterator(),
+ }
+}
+
+const docNum1HitFinished = math.MaxUint64
+
+type UnadornedPostingsIterator1Hit struct{
+ docNum uint64
+}
+
+func (i *UnadornedPostingsIterator1Hit) Next() (Posting, error) {
+ return i.nextAtOrAfter(0)
+}
+
+func (i *UnadornedPostingsIterator1Hit) Advance(docNum uint64) (Posting, error) {
+ return i.nextAtOrAfter(docNum)
+}
+
+func (i *UnadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (Posting, error) {
+ docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
+ if !exists {
+ return nil, nil
+ }
+ return UnadornedPosting(docNum), nil
+}
+
+func (i *UnadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
+ if i.docNum == docNum1HitFinished {
+ return 0, false
+ }
+ if i.docNum < atOrAfter {
+ // advanced past our 1-hit
+ i.docNum = docNum1HitFinished // consume our 1-hit docNum
+ return 0, false
+ }
+ docNum := i.docNum
+ i.docNum = docNum1HitFinished // consume our 1-hit docNum
+ return docNum, true
+}
+
+func (i *UnadornedPostingsIterator1Hit) Size() int {
+ return reflectStaticSizeUnadornedPostingsIterator1Hit
+}
+
+func NewUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) PostingsIterator {
+ return &UnadornedPostingsIterator1Hit{
+ docNum1Hit,
+ }
+}
+
+type UnadornedPosting uint64
+
+func (p UnadornedPosting) Number() uint64 {
+ return uint64(p)
+}
+
+func (p UnadornedPosting) Frequency() uint64 {
+ return 0
+}
+
+func (p UnadornedPosting) Norm() float64 {
+ return 0
+}
+
+func (p UnadornedPosting) Locations() []Location {
+ return nil
+}
+
+func (p UnadornedPosting) Size() int {
+ return reflectStaticSizeUnadornedPosting
+} \ No newline at end of file
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md
deleted file mode 100644
index 0facb669fd..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/README.md
+++ /dev/null
@@ -1,158 +0,0 @@
-# zap file format
-
-Advanced ZAP File Format Documentation is [here](zap.md).
-
-The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written.
-
-Current usage:
-
-- mmap the entire file
-- crc-32 bytes and version are in fixed position at end of the file
-- reading remainder of footer could be version specific
-- remainder of footer gives us:
- - 3 important offsets (docValue , fields index and stored data index)
- - 2 important values (number of docs and chunk factor)
-- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
-- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data. the first bytes of that section tell us the size of data so that we know where it ends.
-- access to all other indexed data follows the following pattern:
- - first know the field name -> convert to id
- - next navigate to term dictionary for that field
- - some operations stop here and do dictionary ops
- - next use dictionary to navigate to posting list for a specific term
- - walk posting list
- - if necessary, walk posting details as we go
- - if location info is desired, consult location bitmap to see if it is there
-
-## stored fields section
-
-- for each document
- - preparation phase:
- - produce a slice of metadata bytes and data bytes
- - produce these slices in field id order
- - field value is appended to the data slice
- - metadata slice is varint encoded with the following values for each field value
- - field id (uint16)
- - field type (byte)
- - field value start offset in uncompressed data slice (uint64)
- - field value length (uint64)
- - field number of array positions (uint64)
- - one additional value for each array position (uint64)
- - compress the data slice using snappy
- - file writing phase:
- - remember the start offset for this document
- - write out meta data length (varint uint64)
- - write out compressed data length (varint uint64)
- - write out the metadata bytes
- - write out the compressed data bytes
-
-## stored fields idx
-
-- for each document
- - write start offset (remembered from previous section) of stored data (big endian uint64)
-
-With this index and a known document number, we have direct access to all the stored field data.
-
-## posting details (freq/norm) section
-
-- for each posting list
- - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
- - produce a slice remembering offsets of where each chunk starts
- - preparation phase:
- - for each hit in the posting list
- - if this hit is in next chunk close out encoding of last chunk and record offset start of next
- - encode term frequency (uint64)
- - encode norm factor (float32)
- - file writing phase:
- - remember start position for this posting list details
- - write out number of chunks that follow (varint uint64)
- - write out length of each chunk (each a varint uint64)
- - write out the byte slice containing all the chunk data
-
-If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
-
-## posting details (location) section
-
-- for each posting list
- - produce a slice containing multiple consecutive chunks (each chunk is varint stream)
- - produce a slice remembering offsets of where each chunk starts
- - preparation phase:
- - for each hit in the posting list
- - if this hit is in next chunk close out encoding of last chunk and record offset start of next
- - encode field (uint16)
- - encode field pos (uint64)
- - encode field start (uint64)
- - encode field end (uint64)
- - encode number of array positions to follow (uint64)
- - encode each array position (each uint64)
- - file writing phase:
- - remember start position for this posting list details
- - write out number of chunks that follow (varint uint64)
- - write out length of each chunk (each a varint uint64)
- - write out the byte slice containing all the chunk data
-
-If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
-
-## postings list section
-
-- for each posting list
- - preparation phase:
- - encode roaring bitmap posting list to bytes (so we know the length)
- - file writing phase:
- - remember the start position for this posting list
- - write freq/norm details offset (remembered from previous, as varint uint64)
- - write location details offset (remembered from previous, as varint uint64)
- - write length of encoded roaring bitmap
- - write the serialized roaring bitmap data
-
-## dictionary
-
-- for each field
- - preparation phase:
- - encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
- - file writing phase:
- - remember the start position of this persistDictionary
- - write length of vellum data (varint uint64)
- - write out vellum data
-
-## fields section
-
-- for each field
- - file writing phase:
- - remember start offset for each field
- - write dictionary address (remembered from previous) (varint uint64)
- - write length of field name (varint uint64)
- - write field name bytes
-
-## fields idx
-
-- for each field
- - file writing phase:
- - write big endian uint64 of start offset for each field
-
-NOTE: currently we don't know or record the length of this fields index. Instead we rely on the fact that we know it immediately precedes a footer of known size.
-
-## fields DocValue
-
-- for each field
- - preparation phase:
- - produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
- - produce a slice remembering the length of each chunk
- - file writing phase:
- - remember the start position of this first field DocValue offset in the footer
- - write out number of chunks that follow (varint uint64)
- - write out length of each chunk (each a varint uint64)
- - write out the byte slice containing all the chunk data
-
-NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
-read operation leverage that meta information to extract the document specific data from the file.
-
-## footer
-
-- file writing phase
- - write number of docs (big endian uint64)
- - write stored field index location (big endian uint64)
- - write field index location (big endian uint64)
- - write field docValue location (big endian uint64)
- - write out chunk factor (big endian uint32)
- - write out version (big endian uint32)
- - write out file CRC of everything preceding this (big endian uint32)
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
deleted file mode 100644
index c02333cee0..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bufio"
- "github.com/couchbase/vellum"
- "math"
- "os"
-)
-
-const Version uint32 = 11
-
-const Type string = "zap"
-
-const fieldNotUninverted = math.MaxUint64
-
-// PersistSegmentBase persists SegmentBase in the zap file format.
-func PersistSegmentBase(sb *SegmentBase, path string) error {
- flag := os.O_RDWR | os.O_CREATE
-
- f, err := os.OpenFile(path, flag, 0600)
- if err != nil {
- return err
- }
-
- cleanup := func() {
- _ = f.Close()
- _ = os.Remove(path)
- }
-
- br := bufio.NewWriter(f)
-
- _, err = br.Write(sb.mem)
- if err != nil {
- cleanup()
- return err
- }
-
- err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
- sb.chunkFactor, sb.memCRC, br)
- if err != nil {
- cleanup()
- return err
- }
-
- err = br.Flush()
- if err != nil {
- cleanup()
- return err
- }
-
- err = f.Sync()
- if err != nil {
- cleanup()
- return err
- }
-
- err = f.Close()
- if err != nil {
- cleanup()
- return err
- }
-
- return nil
-}
-
-func persistStoredFieldValues(fieldID int,
- storedFieldValues [][]byte, stf []byte, spf [][]uint64,
- curr int, metaEncode varintEncoder, data []byte) (
- int, []byte, error) {
- for i := 0; i < len(storedFieldValues); i++ {
- // encode field
- _, err := metaEncode(uint64(fieldID))
- if err != nil {
- return 0, nil, err
- }
- // encode type
- _, err = metaEncode(uint64(stf[i]))
- if err != nil {
- return 0, nil, err
- }
- // encode start offset
- _, err = metaEncode(uint64(curr))
- if err != nil {
- return 0, nil, err
- }
- // end len
- _, err = metaEncode(uint64(len(storedFieldValues[i])))
- if err != nil {
- return 0, nil, err
- }
- // encode number of array pos
- _, err = metaEncode(uint64(len(spf[i])))
- if err != nil {
- return 0, nil, err
- }
- // encode all array positions
- for _, pos := range spf[i] {
- _, err = metaEncode(pos)
- if err != nil {
- return 0, nil, err
- }
- }
-
- data = append(data, storedFieldValues[i]...)
- curr += len(storedFieldValues[i])
- }
-
- return curr, data, nil
-}
-
-func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
- fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
- storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
- dictLocs []uint64) (*SegmentBase, error) {
- sb := &SegmentBase{
- mem: mem,
- memCRC: memCRC,
- chunkFactor: chunkFactor,
- fieldsMap: fieldsMap,
- fieldsInv: fieldsInv,
- numDocs: numDocs,
- storedIndexOffset: storedIndexOffset,
- fieldsIndexOffset: fieldsIndexOffset,
- docValueOffset: docValueOffset,
- dictLocs: dictLocs,
- fieldDvReaders: make(map[uint16]*docValueReader),
- fieldFSTs: make(map[uint16]*vellum.FST),
- }
- sb.updateSize()
-
- err := sb.loadDvReaders()
- if err != nil {
- return nil, err
- }
-
- return sb, nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
deleted file mode 100644
index b9ff8179b3..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "encoding/binary"
- "io"
- "reflect"
-
- "github.com/golang/snappy"
-)
-
-var reflectStaticSizeMetaData int
-
-func init() {
- var md MetaData
- reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
-}
-
-var termSeparator byte = 0xff
-var termSeparatorSplitSlice = []byte{termSeparator}
-
-type chunkedContentCoder struct {
- final []byte
- chunkSize uint64
- currChunk uint64
- chunkLens []uint64
-
- w io.Writer
- progressiveWrite bool
-
- chunkMetaBuf bytes.Buffer
- chunkBuf bytes.Buffer
-
- chunkMeta []MetaData
-
- compressed []byte // temp buf for snappy compression
-}
-
-// MetaData represents the data information inside a
-// chunk.
-type MetaData struct {
- DocNum uint64 // docNum of the data inside the chunk
- DocDvOffset uint64 // offset of data inside the chunk for the given docid
-}
-
-// newChunkedContentCoder returns a new chunk content coder which
-// packs data into chunks based on the provided chunkSize
-func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
- w io.Writer, progressiveWrite bool) *chunkedContentCoder {
- total := maxDocNum/chunkSize + 1
- rv := &chunkedContentCoder{
- chunkSize: chunkSize,
- chunkLens: make([]uint64, total),
- chunkMeta: make([]MetaData, 0, total),
- w: w,
- progressiveWrite: progressiveWrite,
- }
-
- return rv
-}
-
-// Reset lets you reuse this chunked content coder. Buffers are reset
-// and re used. You cannot change the chunk size.
-func (c *chunkedContentCoder) Reset() {
- c.currChunk = 0
- c.final = c.final[:0]
- c.chunkBuf.Reset()
- c.chunkMetaBuf.Reset()
- for i := range c.chunkLens {
- c.chunkLens[i] = 0
- }
- c.chunkMeta = c.chunkMeta[:0]
-}
-
-// Close indicates you are done calling Add() this allows
-// the final chunk to be encoded.
-func (c *chunkedContentCoder) Close() error {
- return c.flushContents()
-}
-
-func (c *chunkedContentCoder) flushContents() error {
- // flush the contents, with meta information at first
- buf := make([]byte, binary.MaxVarintLen64)
- n := binary.PutUvarint(buf, uint64(len(c.chunkMeta)))
- _, err := c.chunkMetaBuf.Write(buf[:n])
- if err != nil {
- return err
- }
-
- // write out the metaData slice
- for _, meta := range c.chunkMeta {
- _, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
- if err != nil {
- return err
- }
- }
-
- // write the metadata to final data
- metaData := c.chunkMetaBuf.Bytes()
- c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
- // write the compressed data to the final data
- c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
- c.final = append(c.final, c.compressed...)
-
- c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
-
- if c.progressiveWrite {
- _, err := c.w.Write(c.final)
- if err != nil {
- return err
- }
- c.final = c.final[:0]
- }
-
- return nil
-}
-
-// Add encodes the provided byte slice into the correct chunk for the provided
-// doc num. You MUST call Add() with increasing docNums.
-func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
- chunk := docNum / c.chunkSize
- if chunk != c.currChunk {
- // flush out the previous chunk details
- err := c.flushContents()
- if err != nil {
- return err
- }
- // clearing the chunk specific meta for next chunk
- c.chunkBuf.Reset()
- c.chunkMetaBuf.Reset()
- c.chunkMeta = c.chunkMeta[:0]
- c.currChunk = chunk
- }
-
- // get the starting offset for this doc
- dvOffset := c.chunkBuf.Len()
- dvSize, err := c.chunkBuf.Write(vals)
- if err != nil {
- return err
- }
-
- c.chunkMeta = append(c.chunkMeta, MetaData{
- DocNum: docNum,
- DocDvOffset: uint64(dvOffset + dvSize),
- })
- return nil
-}
-
-// Write commits all the encoded chunked contents to the provided writer.
-//
-// | ..... data ..... | chunk offsets (varints)
-// | position of chunk offsets (uint64) | number of offsets (uint64) |
-//
-func (c *chunkedContentCoder) Write() (int, error) {
- var tw int
-
- if c.final != nil {
- // write out the data section first
- nw, err := c.w.Write(c.final)
- tw += nw
- if err != nil {
- return tw, err
- }
- }
-
- chunkOffsetsStart := uint64(tw)
-
- if cap(c.final) < binary.MaxVarintLen64 {
- c.final = make([]byte, binary.MaxVarintLen64)
- } else {
- c.final = c.final[0:binary.MaxVarintLen64]
- }
- chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
- // write out the chunk offsets
- for _, chunkOffset := range chunkOffsets {
- n := binary.PutUvarint(c.final, chunkOffset)
- nw, err := c.w.Write(c.final[:n])
- tw += nw
- if err != nil {
- return tw, err
- }
- }
-
- chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
-
- c.final = c.final[0:8]
- // write out the length of chunk offsets
- binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
- nw, err := c.w.Write(c.final)
- tw += nw
- if err != nil {
- return tw, err
- }
-
- // write out the number of chunks
- binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
- nw, err = c.w.Write(c.final)
- tw += nw
- if err != nil {
- return tw, err
- }
-
- c.final = c.final[:0]
-
- return tw, nil
-}
-
-// ReadDocValueBoundary elicits the start, end offsets from a
-// metaData header slice
-func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
- var start uint64
- if chunk > 0 {
- start = metaHeaders[chunk-1].DocDvOffset
- }
- return start, metaHeaders[chunk].DocDvOffset
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
deleted file mode 100644
index 50290f8882..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "hash/crc32"
- "io"
-
- "github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-// CountHashWriter is a wrapper around a Writer which counts the number of
-// bytes which have been written and computes a crc32 hash
-type CountHashWriter struct {
- w io.Writer
- crc uint32
- n int
- s segment.StatsReporter
-}
-
-// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
-func NewCountHashWriter(w io.Writer) *CountHashWriter {
- return &CountHashWriter{w: w}
-}
-
-func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter {
- return &CountHashWriter{w: w, s: s}
-}
-
-// Write writes the provided bytes to the wrapped writer and counts the bytes
-func (c *CountHashWriter) Write(b []byte) (int, error) {
- n, err := c.w.Write(b)
- c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
- c.n += n
- if c.s != nil {
- c.s.ReportBytesWritten(uint64(n))
- }
- return n, err
-}
-
-// Count returns the number of bytes written
-func (c *CountHashWriter) Count() int {
- return c.n
-}
-
-// Sum32 returns the CRC-32 hash of the content written to this writer
-func (c *CountHashWriter) Sum32() uint32 {
- return c.crc
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
deleted file mode 100644
index ad4a8f8dc5..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "fmt"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/couchbase/vellum"
-)
-
-// Dictionary is the zap representation of the term dictionary
-type Dictionary struct {
- sb *SegmentBase
- field string
- fieldID uint16
- fst *vellum.FST
- fstReader *vellum.Reader
-}
-
-// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
- prealloc segment.PostingsList) (segment.PostingsList, error) {
- var preallocPL *PostingsList
- pl, ok := prealloc.(*PostingsList)
- if ok && pl != nil {
- preallocPL = pl
- }
- return d.postingsList(term, except, preallocPL)
-}
-
-func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
- if d.fstReader == nil {
- if rv == nil || rv == emptyPostingsList {
- return emptyPostingsList, nil
- }
- return d.postingsListInit(rv, except), nil
- }
-
- postingsOffset, exists, err := d.fstReader.Get(term)
- if err != nil {
- return nil, fmt.Errorf("vellum err: %v", err)
- }
- if !exists {
- if rv == nil || rv == emptyPostingsList {
- return emptyPostingsList, nil
- }
- return d.postingsListInit(rv, except), nil
- }
-
- return d.postingsListFromOffset(postingsOffset, except, rv)
-}
-
-func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
- rv = d.postingsListInit(rv, except)
-
- err := rv.read(postingsOffset, d)
- if err != nil {
- return nil, err
- }
-
- return rv, nil
-}
-
-func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
- if rv == nil || rv == emptyPostingsList {
- rv = &PostingsList{}
- } else {
- postings := rv.postings
- if postings != nil {
- postings.Clear()
- }
-
- *rv = PostingsList{} // clear the struct
-
- rv.postings = postings
- }
- rv.sb = d.sb
- rv.except = except
- return rv
-}
-
-func (d *Dictionary) Contains(key []byte) (bool, error) {
- return d.fst.Contains(key)
-}
-
-// Iterator returns an iterator for this dictionary
-func (d *Dictionary) Iterator() segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- if d.fst != nil {
- itr, err := d.fst.Iterator(nil, nil)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
-}
-
-// PrefixIterator returns an iterator which only visits terms having the
-// the specified prefix
-func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- kBeg := []byte(prefix)
- kEnd := segment.IncrementBytes(kBeg)
-
- if d.fst != nil {
- itr, err := d.fst.Iterator(kBeg, kEnd)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
-}
-
-// RangeIterator returns an iterator which only visits terms between the
-// start and end terms. NOTE: bleve.index API specifies the end is inclusive.
-func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- // need to increment the end position to be inclusive
- var endBytes []byte
- if len(end) > 0 {
- endBytes = []byte(end)
- if endBytes[len(endBytes)-1] < 0xff {
- endBytes[len(endBytes)-1]++
- } else {
- endBytes = append(endBytes, 0xff)
- }
- }
-
- if d.fst != nil {
- itr, err := d.fst.Iterator([]byte(start), endBytes)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
-}
-
-// AutomatonIterator returns an iterator which only visits terms
-// having the the vellum automaton and start/end key range
-func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
- startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
- rv := &DictionaryIterator{
- d: d,
- }
-
- if d.fst != nil {
- itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
- }
-
- return rv
-}
-
-func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
- includeCount bool) segment.DictionaryIterator {
-
- rv := &DictionaryIterator{
- d: d,
- omitCount: !includeCount,
- }
-
- var buf bytes.Buffer
- builder, err := vellum.New(&buf, nil)
- if err != nil {
- rv.err = err
- return rv
- }
- for _, term := range onlyTerms {
- err = builder.Insert(term, 0)
- if err != nil {
- rv.err = err
- return rv
- }
- }
- err = builder.Close()
- if err != nil {
- rv.err = err
- return rv
- }
-
- onlyFST, err := vellum.Load(buf.Bytes())
- if err != nil {
- rv.err = err
- return rv
- }
-
- itr, err := d.fst.Search(onlyFST, nil, nil)
- if err == nil {
- rv.itr = itr
- } else if err != vellum.ErrIteratorDone {
- rv.err = err
- }
-
- return rv
-}
-
-// DictionaryIterator is an iterator for term dictionary
-type DictionaryIterator struct {
- d *Dictionary
- itr vellum.Iterator
- err error
- tmp PostingsList
- entry index.DictEntry
- omitCount bool
-}
-
-// Next returns the next entry in the dictionary
-func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
- if i.err != nil && i.err != vellum.ErrIteratorDone {
- return nil, i.err
- } else if i.itr == nil || i.err == vellum.ErrIteratorDone {
- return nil, nil
- }
- term, postingsOffset := i.itr.Current()
- i.entry.Term = string(term)
- if !i.omitCount {
- i.err = i.tmp.read(postingsOffset, i.d)
- if i.err != nil {
- return nil, i.err
- }
- i.entry.Count = i.tmp.Count()
- }
- i.err = i.itr.Next()
- return &i.entry, nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
deleted file mode 100644
index a819ca239f..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
+++ /dev/null
@@ -1,311 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "math"
- "reflect"
- "sort"
-
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/size"
- "github.com/golang/snappy"
-)
-
-var reflectStaticSizedocValueReader int
-
-func init() {
- var dvi docValueReader
- reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
-}
-
-type docNumTermsVisitor func(docNum uint64, terms []byte) error
-
-type docVisitState struct {
- dvrs map[uint16]*docValueReader
- segment *SegmentBase
-}
-
-type docValueReader struct {
- field string
- curChunkNum uint64
- chunkOffsets []uint64
- dvDataLoc uint64
- curChunkHeader []MetaData
- curChunkData []byte // compressed data cache
- uncompressed []byte // temp buf for snappy decompression
-}
-
-func (di *docValueReader) size() int {
- return reflectStaticSizedocValueReader + size.SizeOfPtr +
- len(di.field) +
- len(di.chunkOffsets)*size.SizeOfUint64 +
- len(di.curChunkHeader)*reflectStaticSizeMetaData +
- len(di.curChunkData)
-}
-
-func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
- if rv == nil {
- rv = &docValueReader{}
- }
-
- rv.field = di.field
- rv.curChunkNum = math.MaxUint64
- rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
- rv.dvDataLoc = di.dvDataLoc
- rv.curChunkHeader = rv.curChunkHeader[:0]
- rv.curChunkData = nil
- rv.uncompressed = rv.uncompressed[:0]
-
- return rv
-}
-
-func (di *docValueReader) fieldName() string {
- return di.field
-}
-
-func (di *docValueReader) curChunkNumber() uint64 {
- return di.curChunkNum
-}
-
-func (s *SegmentBase) loadFieldDocValueReader(field string,
- fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
- // get the docValue offset for the given fields
- if fieldDvLocStart == fieldNotUninverted {
- // no docValues found, nothing to do
- return nil, nil
- }
-
- // read the number of chunks, and chunk offsets position
- var numChunks, chunkOffsetsPosition uint64
-
- if fieldDvLocEnd-fieldDvLocStart > 16 {
- numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
- // read the length of chunk offsets
- chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
- // acquire position of chunk offsets
- chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
- } else {
- return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
- }
-
- fdvIter := &docValueReader{
- curChunkNum: math.MaxUint64,
- field: field,
- chunkOffsets: make([]uint64, int(numChunks)),
- }
-
- // read the chunk offsets
- var offset uint64
- for i := 0; i < int(numChunks); i++ {
- loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
- if read <= 0 {
- return nil, fmt.Errorf("corrupted chunk offset during segment load")
- }
- fdvIter.chunkOffsets[i] = loc
- offset += uint64(read)
- }
-
- // set the data offset
- fdvIter.dvDataLoc = fieldDvLocStart
-
- return fdvIter, nil
-}
-
-func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
- // advance to the chunk where the docValues
- // reside for the given docNum
- destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
- start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
- if start >= end {
- di.curChunkHeader = di.curChunkHeader[:0]
- di.curChunkData = nil
- di.curChunkNum = chunkNumber
- di.uncompressed = di.uncompressed[:0]
- return nil
- }
-
- destChunkDataLoc += start
- curChunkEnd += end
-
- // read the number of docs reside in the chunk
- numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
- if read <= 0 {
- return fmt.Errorf("failed to read the chunk")
- }
- chunkMetaLoc := destChunkDataLoc + uint64(read)
-
- offset := uint64(0)
- if cap(di.curChunkHeader) < int(numDocs) {
- di.curChunkHeader = make([]MetaData, int(numDocs))
- } else {
- di.curChunkHeader = di.curChunkHeader[:int(numDocs)]
- }
- for i := 0; i < int(numDocs); i++ {
- di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
- offset += uint64(read)
- di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
- offset += uint64(read)
- }
-
- compressedDataLoc := chunkMetaLoc + offset
- dataLength := curChunkEnd - compressedDataLoc
- di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
- di.curChunkNum = chunkNumber
- di.uncompressed = di.uncompressed[:0]
- return nil
-}
-
-func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
- for i := 0; i < len(di.chunkOffsets); i++ {
- err := di.loadDvChunk(uint64(i), s)
- if err != nil {
- return err
- }
- if di.curChunkData == nil || len(di.curChunkHeader) == 0 {
- continue
- }
-
- // uncompress the already loaded data
- uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
- if err != nil {
- return err
- }
- di.uncompressed = uncompressed
-
- start := uint64(0)
- for _, entry := range di.curChunkHeader {
- err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
- if err != nil {
- return err
- }
-
- start = entry.DocDvOffset
- }
- }
-
- return nil
-}
-
-func (di *docValueReader) visitDocValues(docNum uint64,
- visitor index.DocumentFieldTermVisitor) error {
- // binary search the term locations for the docNum
- start, end := di.getDocValueLocs(docNum)
- if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
- return nil
- }
-
- var uncompressed []byte
- var err error
- // use the uncompressed copy if available
- if len(di.uncompressed) > 0 {
- uncompressed = di.uncompressed
- } else {
- // uncompress the already loaded data
- uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
- if err != nil {
- return err
- }
- di.uncompressed = uncompressed
- }
-
- // pick the terms for the given docNum
- uncompressed = uncompressed[start:end]
- for {
- i := bytes.Index(uncompressed, termSeparatorSplitSlice)
- if i < 0 {
- break
- }
-
- visitor(di.field, uncompressed[0:i])
- uncompressed = uncompressed[i+1:]
- }
-
- return nil
-}
-
-func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
- i := sort.Search(len(di.curChunkHeader), func(i int) bool {
- return di.curChunkHeader[i].DocNum >= docNum
- })
- if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
- return ReadDocValueBoundary(i, di.curChunkHeader)
- }
- return math.MaxUint64, math.MaxUint64
-}
-
-// VisitDocumentFieldTerms is an implementation of the
-// DocumentFieldTermVisitable interface
-func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
- visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
- segment.DocVisitState, error) {
- dvs, ok := dvsIn.(*docVisitState)
- if !ok || dvs == nil {
- dvs = &docVisitState{}
- } else {
- if dvs.segment != s {
- dvs.segment = s
- dvs.dvrs = nil
- }
- }
-
- var fieldIDPlus1 uint16
- if dvs.dvrs == nil {
- dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
- for _, field := range fields {
- if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
- continue
- }
- fieldID := fieldIDPlus1 - 1
- if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
- dvIter != nil {
- dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
- }
- }
- }
-
- // find the chunkNumber where the docValues are stored
- docInChunk := localDocNum / uint64(s.chunkFactor)
- var dvr *docValueReader
- for _, field := range fields {
- if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
- continue
- }
- fieldID := fieldIDPlus1 - 1
- if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
- // check if the chunk is already loaded
- if docInChunk != dvr.curChunkNumber() {
- err := dvr.loadDvChunk(docInChunk, s)
- if err != nil {
- return dvs, err
- }
- }
-
- _ = dvr.visitDocValues(localDocNum, visitor)
- }
- }
- return dvs, nil
-}
-
-// VisitableDocValueFields returns the list of fields with
-// persisted doc value terms ready to be visitable using the
-// VisitDocumentFieldTerms method.
-func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
- return s.fieldDvNames, nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
deleted file mode 100644
index cd6ff73c79..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
-
- "github.com/couchbase/vellum"
-)
-
-// enumerator provides an ordered traversal of multiple vellum
-// iterators. Like JOIN of iterators, the enumerator produces a
-// sequence of (key, iteratorIndex, value) tuples, sorted by key ASC,
-// then iteratorIndex ASC, where the same key might be seen or
-// repeated across multiple child iterators.
-type enumerator struct {
- itrs []vellum.Iterator
- currKs [][]byte
- currVs []uint64
-
- lowK []byte
- lowIdxs []int
- lowCurr int
-}
-
-// newEnumerator returns a new enumerator over the vellum Iterators
-func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
- rv := &enumerator{
- itrs: itrs,
- currKs: make([][]byte, len(itrs)),
- currVs: make([]uint64, len(itrs)),
- lowIdxs: make([]int, 0, len(itrs)),
- }
- for i, itr := range rv.itrs {
- rv.currKs[i], rv.currVs[i] = itr.Current()
- }
- rv.updateMatches(false)
- if rv.lowK == nil && len(rv.lowIdxs) == 0 {
- return rv, vellum.ErrIteratorDone
- }
- return rv, nil
-}
-
-// updateMatches maintains the low key matches based on the currKs
-func (m *enumerator) updateMatches(skipEmptyKey bool) {
- m.lowK = nil
- m.lowIdxs = m.lowIdxs[:0]
- m.lowCurr = 0
-
- for i, key := range m.currKs {
- if (key == nil && m.currVs[i] == 0) || // in case of empty iterator
- (len(key) == 0 && skipEmptyKey) { // skip empty keys
- continue
- }
-
- cmp := bytes.Compare(key, m.lowK)
- if cmp < 0 || len(m.lowIdxs) == 0 {
- // reached a new low
- m.lowK = key
- m.lowIdxs = m.lowIdxs[:0]
- m.lowIdxs = append(m.lowIdxs, i)
- } else if cmp == 0 {
- m.lowIdxs = append(m.lowIdxs, i)
- }
- }
-}
-
-// Current returns the enumerator's current key, iterator-index, and
-// value. If the enumerator is not pointing at a valid value (because
-// Next returned an error previously), Current will return nil,0,0.
-func (m *enumerator) Current() ([]byte, int, uint64) {
- var i int
- var v uint64
- if m.lowCurr < len(m.lowIdxs) {
- i = m.lowIdxs[m.lowCurr]
- v = m.currVs[i]
- }
- return m.lowK, i, v
-}
-
-// Next advances the enumerator to the next key/iterator/value result,
-// else vellum.ErrIteratorDone is returned.
-func (m *enumerator) Next() error {
- m.lowCurr += 1
- if m.lowCurr >= len(m.lowIdxs) {
- // move all the current low iterators forwards
- for _, vi := range m.lowIdxs {
- err := m.itrs[vi].Next()
- if err != nil && err != vellum.ErrIteratorDone {
- return err
- }
- m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
- }
- // can skip any empty keys encountered at this point
- m.updateMatches(true)
- }
- if m.lowK == nil && len(m.lowIdxs) == 0 {
- return vellum.ErrIteratorDone
- }
- return nil
-}
-
-// Close all the underlying Iterators. The first error, if any, will
-// be returned.
-func (m *enumerator) Close() error {
- var rv error
- for _, itr := range m.itrs {
- err := itr.Close()
- if rv == nil {
- rv = err
- }
- }
- return rv
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
deleted file mode 100644
index 571d06edb6..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "encoding/binary"
- "io"
-)
-
-type chunkedIntCoder struct {
- final []byte
- chunkSize uint64
- chunkBuf bytes.Buffer
- chunkLens []uint64
- currChunk uint64
-
- buf []byte
-}
-
-// newChunkedIntCoder returns a new chunk int coder which packs data into
-// chunks based on the provided chunkSize and supports up to the specified
-// maxDocNum
-func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
- total := maxDocNum/chunkSize + 1
- rv := &chunkedIntCoder{
- chunkSize: chunkSize,
- chunkLens: make([]uint64, total),
- final: make([]byte, 0, 64),
- }
-
- return rv
-}
-
-// Reset lets you reuse this chunked int coder. buffers are reset and reused
-// from previous use. you cannot change the chunk size or max doc num.
-func (c *chunkedIntCoder) Reset() {
- c.final = c.final[:0]
- c.chunkBuf.Reset()
- c.currChunk = 0
- for i := range c.chunkLens {
- c.chunkLens[i] = 0
- }
-}
-
-// Add encodes the provided integers into the correct chunk for the provided
-// doc num. You MUST call Add() with increasing docNums.
-func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
- chunk := docNum / c.chunkSize
- if chunk != c.currChunk {
- // starting a new chunk
- c.Close()
- c.chunkBuf.Reset()
- c.currChunk = chunk
- }
-
- if len(c.buf) < binary.MaxVarintLen64 {
- c.buf = make([]byte, binary.MaxVarintLen64)
- }
-
- for _, val := range vals {
- wb := binary.PutUvarint(c.buf, val)
- _, err := c.chunkBuf.Write(c.buf[:wb])
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
- chunk := docNum / c.chunkSize
- if chunk != c.currChunk {
- // starting a new chunk
- c.Close()
- c.chunkBuf.Reset()
- c.currChunk = chunk
- }
-
- _, err := c.chunkBuf.Write(buf)
- return err
-}
-
-// Close indicates you are done calling Add() this allows the final chunk
-// to be encoded.
-func (c *chunkedIntCoder) Close() {
- encodingBytes := c.chunkBuf.Bytes()
- c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
- c.final = append(c.final, encodingBytes...)
- c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
-}
-
-// Write commits all the encoded chunked integers to the provided writer.
-func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
- bufNeeded := binary.MaxVarintLen64 * (1 + len(c.chunkLens))
- if len(c.buf) < bufNeeded {
- c.buf = make([]byte, bufNeeded)
- }
- buf := c.buf
-
- // convert the chunk lengths into chunk offsets
- chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
-
- // write out the number of chunks & each chunk offsets
- n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
- for _, chunkOffset := range chunkOffsets {
- n += binary.PutUvarint(buf[n:], chunkOffset)
- }
-
- tw, err := w.Write(buf[:n])
- if err != nil {
- return tw, err
- }
-
- // write out the data
- nw, err := w.Write(c.final)
- tw += nw
- if err != nil {
- return tw, err
- }
- return tw, nil
-}
-
-func (c *chunkedIntCoder) FinalSize() int {
- return len(c.final)
-}
-
-// modifyLengthsToEndOffsets converts the chunk length array
-// to a chunk offset array. The readChunkBoundary
-// will figure out the start and end of every chunk from
-// these offsets. Starting offset of i'th index is stored
-// in i-1'th position except for 0'th index and ending offset
-// is stored at i'th index position.
-// For 0'th element, starting position is always zero.
-// eg:
-// Lens -> 5 5 5 5 => 5 10 15 20
-// Lens -> 0 5 0 5 => 0 5 5 10
-// Lens -> 0 0 0 5 => 0 0 0 5
-// Lens -> 5 0 0 0 => 5 5 5 5
-// Lens -> 0 5 0 0 => 0 5 5 5
-// Lens -> 0 0 5 0 => 0 0 5 5
-func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
- var runningOffset uint64
- var index, i int
- for i = 1; i <= len(lengths); i++ {
- runningOffset += lengths[i-1]
- lengths[index] = runningOffset
- index++
- }
- return lengths
-}
-
-func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
- var start uint64
- if chunk > 0 {
- start = offsets[chunk-1]
- }
- return start, offsets[chunk]
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
deleted file mode 100644
index 50bd7207a5..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
+++ /dev/null
@@ -1,862 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bufio"
- "bytes"
- "encoding/binary"
- "fmt"
- "math"
- "os"
- "sort"
-
- "github.com/RoaringBitmap/roaring"
- seg "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/couchbase/vellum"
- "github.com/golang/snappy"
-)
-
-var DefaultFileMergerBufferSize = 1024 * 1024
-
-// ValidateMerge can be set by applications to perform additional checks
-// on a new segment produced by a merge, by default this does nothing.
-// Caller should provide EITHER segments or memSegments, but not both.
-// This API is experimental and may be removed at any time.
-var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
- return nil
-}
-
-const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
-
-// Merge takes a slice of zap segments and bit masks describing which
-// documents may be dropped, and creates a new segment containing the
-// remaining data. This new segment is built at the specified path,
-// with the provided chunkFactor.
-func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
- chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
- [][]uint64, uint64, error) {
- segmentBases := make([]*SegmentBase, len(segments))
- for segmenti, segment := range segments {
- segmentBases[segmenti] = &segment.SegmentBase
- }
-
- return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh, s)
-}
-
-func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
- chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
- [][]uint64, uint64, error) {
- flag := os.O_RDWR | os.O_CREATE
-
- f, err := os.OpenFile(path, flag, 0600)
- if err != nil {
- return nil, 0, err
- }
-
- cleanup := func() {
- _ = f.Close()
- _ = os.Remove(path)
- }
-
- // buffer the output
- br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
-
- // wrap it for counting (tracking offsets)
- cr := NewCountHashWriterWithStatsReporter(br, s)
-
- newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
- MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh)
- if err != nil {
- cleanup()
- return nil, 0, err
- }
-
- err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
- docValueOffset, chunkFactor, cr.Sum32(), cr)
- if err != nil {
- cleanup()
- return nil, 0, err
- }
-
- err = br.Flush()
- if err != nil {
- cleanup()
- return nil, 0, err
- }
-
- err = f.Sync()
- if err != nil {
- cleanup()
- return nil, 0, err
- }
-
- err = f.Close()
- if err != nil {
- cleanup()
- return nil, 0, err
- }
-
- return newDocNums, uint64(cr.Count()), nil
-}
-
-func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
- chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) (
- newDocNums [][]uint64,
- numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
- dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
- err error) {
- docValueOffset = uint64(fieldNotUninverted)
-
- var fieldsSame bool
- fieldsSame, fieldsInv = mergeFields(segments)
- fieldsMap = mapFields(fieldsInv)
-
- numDocs = computeNewDocCount(segments, drops)
-
- if isClosed(closeCh) {
- return nil, 0, 0, 0, 0, nil, nil, nil, seg.ErrClosed
- }
-
- if numDocs > 0 {
- storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
- fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh)
- if err != nil {
- return nil, 0, 0, 0, 0, nil, nil, nil, err
- }
-
- dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
- fieldsInv, fieldsMap, fieldsSame,
- newDocNums, numDocs, chunkFactor, cr, closeCh)
- if err != nil {
- return nil, 0, 0, 0, 0, nil, nil, nil, err
- }
- } else {
- dictLocs = make([]uint64, len(fieldsInv))
- }
-
- fieldsIndexOffset, err = persistFields(fieldsInv, cr, dictLocs)
- if err != nil {
- return nil, 0, 0, 0, 0, nil, nil, nil, err
- }
-
- return newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, nil
-}
-
-// mapFields takes the fieldsInv list and returns a map of fieldName
-// to fieldID+1
-func mapFields(fields []string) map[string]uint16 {
- rv := make(map[string]uint16, len(fields))
- for i, fieldName := range fields {
- rv[fieldName] = uint16(i) + 1
- }
- return rv
-}
-
-// computeNewDocCount determines how many documents will be in the newly
-// merged segment when obsoleted docs are dropped
-func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
- var newDocCount uint64
- for segI, segment := range segments {
- newDocCount += segment.numDocs
- if drops[segI] != nil {
- newDocCount -= drops[segI].GetCardinality()
- }
- }
- return newDocCount
-}
-
-func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
- fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
- newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
- w *CountHashWriter, closeCh chan struct{}) ([]uint64, uint64, error) {
-
- var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
- var bufLoc []uint64
-
- var postings *PostingsList
- var postItr *PostingsIterator
-
- rv := make([]uint64, len(fieldsInv))
- fieldDvLocsStart := make([]uint64, len(fieldsInv))
- fieldDvLocsEnd := make([]uint64, len(fieldsInv))
-
- tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
- locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
-
- var vellumBuf bytes.Buffer
- newVellum, err := vellum.New(&vellumBuf, nil)
- if err != nil {
- return nil, 0, err
- }
-
- newRoaring := roaring.NewBitmap()
-
- // for each field
- for fieldID, fieldName := range fieldsInv {
-
- // collect FST iterators from all active segments for this field
- var newDocNums [][]uint64
- var drops []*roaring.Bitmap
- var dicts []*Dictionary
- var itrs []vellum.Iterator
-
- var segmentsInFocus []*SegmentBase
-
- for segmentI, segment := range segments {
-
- // check for the closure in meantime
- if isClosed(closeCh) {
- return nil, 0, seg.ErrClosed
- }
-
- dict, err2 := segment.dictionary(fieldName)
- if err2 != nil {
- return nil, 0, err2
- }
- if dict != nil && dict.fst != nil {
- itr, err2 := dict.fst.Iterator(nil, nil)
- if err2 != nil && err2 != vellum.ErrIteratorDone {
- return nil, 0, err2
- }
- if itr != nil {
- newDocNums = append(newDocNums, newDocNumsIn[segmentI])
- if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
- drops = append(drops, dropsIn[segmentI])
- } else {
- drops = append(drops, nil)
- }
- dicts = append(dicts, dict)
- itrs = append(itrs, itr)
- segmentsInFocus = append(segmentsInFocus, segment)
- }
- }
- }
-
- var prevTerm []byte
-
- newRoaring.Clear()
-
- var lastDocNum, lastFreq, lastNorm uint64
-
- // determines whether to use "1-hit" encoding optimization
- // when a term appears in only 1 doc, with no loc info,
- // has freq of 1, and the docNum fits into 31-bits
- use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
- if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
- docNum := uint64(newRoaring.Minimum())
- if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
- return true, docNum, lastNorm
- }
- }
- return false, 0, 0
- }
-
- finishTerm := func(term []byte) error {
- tfEncoder.Close()
- locEncoder.Close()
-
- postingsOffset, err := writePostings(newRoaring,
- tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
- if err != nil {
- return err
- }
-
- if postingsOffset > 0 {
- err = newVellum.Insert(term, postingsOffset)
- if err != nil {
- return err
- }
- }
-
- newRoaring.Clear()
-
- tfEncoder.Reset()
- locEncoder.Reset()
-
- lastDocNum = 0
- lastFreq = 0
- lastNorm = 0
-
- return nil
- }
-
- enumerator, err := newEnumerator(itrs)
-
- for err == nil {
- term, itrI, postingsOffset := enumerator.Current()
-
- if !bytes.Equal(prevTerm, term) {
- // check for the closure in meantime
- if isClosed(closeCh) {
- return nil, 0, seg.ErrClosed
- }
-
- // if the term changed, write out the info collected
- // for the previous term
- err = finishTerm(prevTerm)
- if err != nil {
- return nil, 0, err
- }
- }
-
- postings, err = dicts[itrI].postingsListFromOffset(
- postingsOffset, drops[itrI], postings)
- if err != nil {
- return nil, 0, err
- }
-
- postItr = postings.iterator(true, true, true, postItr)
-
- if fieldsSame {
- // can optimize by copying freq/norm/loc bytes directly
- lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
- term, postItr, newDocNums[itrI], newRoaring,
- tfEncoder, locEncoder)
- } else {
- lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
- fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
- tfEncoder, locEncoder, bufLoc)
- }
- if err != nil {
- return nil, 0, err
- }
-
- prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
- prevTerm = append(prevTerm, term...)
-
- err = enumerator.Next()
- }
- if err != vellum.ErrIteratorDone {
- return nil, 0, err
- }
-
- err = finishTerm(prevTerm)
- if err != nil {
- return nil, 0, err
- }
-
- dictOffset := uint64(w.Count())
-
- err = newVellum.Close()
- if err != nil {
- return nil, 0, err
- }
- vellumData := vellumBuf.Bytes()
-
- // write out the length of the vellum data
- n := binary.PutUvarint(bufMaxVarintLen64, uint64(len(vellumData)))
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return nil, 0, err
- }
-
- // write this vellum to disk
- _, err = w.Write(vellumData)
- if err != nil {
- return nil, 0, err
- }
-
- rv[fieldID] = dictOffset
-
- // get the field doc value offset (start)
- fieldDvLocsStart[fieldID] = uint64(w.Count())
-
- // update the field doc values
- fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
-
- fdvReadersAvailable := false
- var dvIterClone *docValueReader
- for segmentI, segment := range segmentsInFocus {
- // check for the closure in meantime
- if isClosed(closeCh) {
- return nil, 0, seg.ErrClosed
- }
-
- fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
- if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
- dvIter != nil {
- fdvReadersAvailable = true
- dvIterClone = dvIter.cloneInto(dvIterClone)
- err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
- if newDocNums[segmentI][docNum] == docDropped {
- return nil
- }
- err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
- if err != nil {
- return err
- }
- return nil
- })
- if err != nil {
- return nil, 0, err
- }
- }
- }
-
- if fdvReadersAvailable {
- err = fdvEncoder.Close()
- if err != nil {
- return nil, 0, err
- }
-
- // persist the doc value details for this field
- _, err = fdvEncoder.Write()
- if err != nil {
- return nil, 0, err
- }
-
- // get the field doc value offset (end)
- fieldDvLocsEnd[fieldID] = uint64(w.Count())
- } else {
- fieldDvLocsStart[fieldID] = fieldNotUninverted
- fieldDvLocsEnd[fieldID] = fieldNotUninverted
- }
-
- // reset vellum buffer and vellum builder
- vellumBuf.Reset()
- err = newVellum.Reset(&vellumBuf)
- if err != nil {
- return nil, 0, err
- }
- }
-
- fieldDvLocsOffset := uint64(w.Count())
-
- buf := bufMaxVarintLen64
- for i := 0; i < len(fieldDvLocsStart); i++ {
- n := binary.PutUvarint(buf, fieldDvLocsStart[i])
- _, err := w.Write(buf[:n])
- if err != nil {
- return nil, 0, err
- }
- n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
- _, err = w.Write(buf[:n])
- if err != nil {
- return nil, 0, err
- }
- }
-
- return rv, fieldDvLocsOffset, nil
-}
-
-func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
- newDocNums []uint64, newRoaring *roaring.Bitmap,
- tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
- lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
- next, err := postItr.Next()
- for next != nil && err == nil {
- hitNewDocNum := newDocNums[next.Number()]
- if hitNewDocNum == docDropped {
- return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
- }
-
- newRoaring.Add(uint32(hitNewDocNum))
-
- nextFreq := next.Frequency()
- nextNorm := uint64(math.Float32bits(float32(next.Norm())))
-
- locs := next.Locations()
-
- err = tfEncoder.Add(hitNewDocNum,
- encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- if len(locs) > 0 {
- numBytesLocs := 0
- for _, loc := range locs {
- ap := loc.ArrayPositions()
- numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
- loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
- }
-
- err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- for _, loc := range locs {
- ap := loc.ArrayPositions()
- if cap(bufLoc) < 5+len(ap) {
- bufLoc = make([]uint64, 0, 5+len(ap))
- }
- args := bufLoc[0:5]
- args[0] = uint64(fieldsMap[loc.Field()] - 1)
- args[1] = loc.Pos()
- args[2] = loc.Start()
- args[3] = loc.End()
- args[4] = uint64(len(ap))
- args = append(args, ap...)
- err = locEncoder.Add(hitNewDocNum, args...)
- if err != nil {
- return 0, 0, 0, nil, err
- }
- }
- }
-
- lastDocNum = hitNewDocNum
- lastFreq = nextFreq
- lastNorm = nextNorm
-
- next, err = postItr.Next()
- }
-
- return lastDocNum, lastFreq, lastNorm, bufLoc, err
-}
-
-func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
- newDocNums []uint64, newRoaring *roaring.Bitmap,
- tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
- lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
- nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
- postItr.nextBytes()
- for err == nil && len(nextFreqNormBytes) > 0 {
- hitNewDocNum := newDocNums[nextDocNum]
- if hitNewDocNum == docDropped {
- return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
- }
-
- newRoaring.Add(uint32(hitNewDocNum))
- err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
- if err != nil {
- return 0, 0, 0, err
- }
-
- if len(nextLocBytes) > 0 {
- err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
- if err != nil {
- return 0, 0, 0, err
- }
- }
-
- lastDocNum = hitNewDocNum
- lastFreq = nextFreq
- lastNorm = nextNorm
-
- nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
- postItr.nextBytes()
- }
-
- return lastDocNum, lastFreq, lastNorm, err
-}
-
-func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
- use1HitEncoding func(uint64) (bool, uint64, uint64),
- w *CountHashWriter, bufMaxVarintLen64 []byte) (
- offset uint64, err error) {
- termCardinality := postings.GetCardinality()
- if termCardinality <= 0 {
- return 0, nil
- }
-
- if use1HitEncoding != nil {
- encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
- if encodeAs1Hit {
- return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
- }
- }
-
- tfOffset := uint64(w.Count())
- _, err = tfEncoder.Write(w)
- if err != nil {
- return 0, err
- }
-
- locOffset := uint64(w.Count())
- _, err = locEncoder.Write(w)
- if err != nil {
- return 0, err
- }
-
- postingsOffset := uint64(w.Count())
-
- n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return 0, err
- }
-
- n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return 0, err
- }
-
- _, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
- if err != nil {
- return 0, err
- }
-
- return postingsOffset, nil
-}
-
-type varintEncoder func(uint64) (int, error)
-
-func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
- fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
- w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) {
- var rv [][]uint64 // The remapped or newDocNums for each segment.
-
- var newDocNum uint64
-
- var curr int
- var data, compressed []byte
- var metaBuf bytes.Buffer
- varBuf := make([]byte, binary.MaxVarintLen64)
- metaEncode := func(val uint64) (int, error) {
- wb := binary.PutUvarint(varBuf, val)
- return metaBuf.Write(varBuf[:wb])
- }
-
- vals := make([][][]byte, len(fieldsInv))
- typs := make([][]byte, len(fieldsInv))
- poss := make([][][]uint64, len(fieldsInv))
-
- var posBuf []uint64
-
- docNumOffsets := make([]uint64, newSegDocCount)
-
- vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
- defer visitDocumentCtxPool.Put(vdc)
-
- // for each segment
- for segI, segment := range segments {
- // check for the closure in meantime
- if isClosed(closeCh) {
- return 0, nil, seg.ErrClosed
- }
-
- segNewDocNums := make([]uint64, segment.numDocs)
-
- dropsI := drops[segI]
-
- // optimize when the field mapping is the same across all
- // segments and there are no deletions, via byte-copying
- // of stored docs bytes directly to the writer
- if fieldsSame && (dropsI == nil || dropsI.GetCardinality() == 0) {
- err := segment.copyStoredDocs(newDocNum, docNumOffsets, w)
- if err != nil {
- return 0, nil, err
- }
-
- for i := uint64(0); i < segment.numDocs; i++ {
- segNewDocNums[i] = newDocNum
- newDocNum++
- }
- rv = append(rv, segNewDocNums)
-
- continue
- }
-
- // for each doc num
- for docNum := uint64(0); docNum < segment.numDocs; docNum++ {
- // TODO: roaring's API limits docNums to 32-bits?
- if dropsI != nil && dropsI.Contains(uint32(docNum)) {
- segNewDocNums[docNum] = docDropped
- continue
- }
-
- segNewDocNums[docNum] = newDocNum
-
- curr = 0
- metaBuf.Reset()
- data = data[:0]
-
- posTemp := posBuf
-
- // collect all the data
- for i := 0; i < len(fieldsInv); i++ {
- vals[i] = vals[i][:0]
- typs[i] = typs[i][:0]
- poss[i] = poss[i][:0]
- }
- err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
- fieldID := int(fieldsMap[field]) - 1
- vals[fieldID] = append(vals[fieldID], value)
- typs[fieldID] = append(typs[fieldID], typ)
-
- // copy array positions to preserve them beyond the scope of this callback
- var curPos []uint64
- if len(pos) > 0 {
- if cap(posTemp) < len(pos) {
- posBuf = make([]uint64, len(pos)*len(fieldsInv))
- posTemp = posBuf
- }
- curPos = posTemp[0:len(pos)]
- copy(curPos, pos)
- posTemp = posTemp[len(pos):]
- }
- poss[fieldID] = append(poss[fieldID], curPos)
-
- return true
- })
- if err != nil {
- return 0, nil, err
- }
-
- // _id field special case optimizes ExternalID() lookups
- idFieldVal := vals[uint16(0)][0]
- _, err = metaEncode(uint64(len(idFieldVal)))
- if err != nil {
- return 0, nil, err
- }
-
- // now walk the non-"_id" fields in order
- for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
- storedFieldValues := vals[fieldID]
-
- stf := typs[fieldID]
- spf := poss[fieldID]
-
- var err2 error
- curr, data, err2 = persistStoredFieldValues(fieldID,
- storedFieldValues, stf, spf, curr, metaEncode, data)
- if err2 != nil {
- return 0, nil, err2
- }
- }
-
- metaBytes := metaBuf.Bytes()
-
- compressed = snappy.Encode(compressed[:cap(compressed)], data)
-
- // record where we're about to start writing
- docNumOffsets[newDocNum] = uint64(w.Count())
-
- // write out the meta len and compressed data len
- _, err = writeUvarints(w,
- uint64(len(metaBytes)),
- uint64(len(idFieldVal)+len(compressed)))
- if err != nil {
- return 0, nil, err
- }
- // now write the meta
- _, err = w.Write(metaBytes)
- if err != nil {
- return 0, nil, err
- }
- // now write the _id field val (counted as part of the 'compressed' data)
- _, err = w.Write(idFieldVal)
- if err != nil {
- return 0, nil, err
- }
- // now write the compressed data
- _, err = w.Write(compressed)
- if err != nil {
- return 0, nil, err
- }
-
- newDocNum++
- }
-
- rv = append(rv, segNewDocNums)
- }
-
- // return value is the start of the stored index
- storedIndexOffset := uint64(w.Count())
-
- // now write out the stored doc index
- for _, docNumOffset := range docNumOffsets {
- err := binary.Write(w, binary.BigEndian, docNumOffset)
- if err != nil {
- return 0, nil, err
- }
- }
-
- return storedIndexOffset, rv, nil
-}
-
-// copyStoredDocs writes out a segment's stored doc info, optimized by
-// using a single Write() call for the entire set of bytes. The
-// newDocNumOffsets is filled with the new offsets for each doc.
-func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
- w *CountHashWriter) error {
- if s.numDocs <= 0 {
- return nil
- }
-
- indexOffset0, storedOffset0, _, _, _ :=
- s.getDocStoredOffsets(0) // the segment's first doc
-
- indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
- s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
-
- storedOffset0New := uint64(w.Count())
-
- storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
- _, err := w.Write(storedBytes)
- if err != nil {
- return err
- }
-
- // remap the storedOffset's for the docs into new offsets relative
- // to storedOffset0New, filling the given docNumOffsetsOut array
- for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
- storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
- storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
- newDocNumOffsets[newDocNum] = storedOffsetNew
- newDocNum += 1
- }
-
- return nil
-}
-
-// mergeFields builds a unified list of fields used across all the
-// input segments, and computes whether the fields are the same across
-// segments (which depends on fields to be sorted in the same way
-// across segments)
-func mergeFields(segments []*SegmentBase) (bool, []string) {
- fieldsSame := true
-
- var segment0Fields []string
- if len(segments) > 0 {
- segment0Fields = segments[0].Fields()
- }
-
- fieldsExist := map[string]struct{}{}
- for _, segment := range segments {
- fields := segment.Fields()
- for fieldi, field := range fields {
- fieldsExist[field] = struct{}{}
- if len(segment0Fields) != len(fields) || segment0Fields[fieldi] != field {
- fieldsSame = false
- }
- }
- }
-
- rv := make([]string, 0, len(fieldsExist))
- // ensure _id stays first
- rv = append(rv, "_id")
- for k := range fieldsExist {
- if k != "_id" {
- rv = append(rv, k)
- }
- }
-
- sort.Strings(rv[1:]) // leave _id as first
-
- return fieldsSame, rv
-}
-
-func isClosed(closeCh chan struct{}) bool {
- select {
- case <-closeCh:
- return true
- default:
- return false
- }
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
deleted file mode 100644
index c108ec16dd..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
+++ /dev/null
@@ -1,839 +0,0 @@
-// Copyright (c) 2018 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "encoding/binary"
- "math"
- "sort"
- "sync"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/analysis"
- "github.com/blevesearch/bleve/document"
- "github.com/blevesearch/bleve/index"
- "github.com/couchbase/vellum"
- "github.com/golang/snappy"
-)
-
-var NewSegmentBufferNumResultsBump int = 100
-var NewSegmentBufferNumResultsFactor float64 = 1.0
-var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
-
-// ValidateDocFields can be set by applications to perform additional checks
-// on fields in a document being added to a new segment, by default it does
-// nothing.
-// This API is experimental and may be removed at any time.
-var ValidateDocFields = func(field document.Field) error {
- return nil
-}
-
-// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
-// SegmentBase from analysis results
-func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
- chunkFactor uint32) (*SegmentBase, uint64, error) {
- s := interimPool.Get().(*interim)
-
- var br bytes.Buffer
- if s.lastNumDocs > 0 {
- // use previous results to initialize the buf with an estimate
- // size, but note that the interim instance comes from a
- // global interimPool, so multiple scorch instances indexing
- // different docs can lead to low quality estimates
- estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
- NewSegmentBufferNumResultsFactor)
- estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
- NewSegmentBufferAvgBytesPerDocFactor)
- br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
- }
-
- s.results = results
- s.chunkFactor = chunkFactor
- s.w = NewCountHashWriter(&br)
-
- storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
- err := s.convert()
- if err != nil {
- return nil, uint64(0), err
- }
-
- sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
- s.FieldsMap, s.FieldsInv, uint64(len(results)),
- storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
-
- if err == nil && s.reset() == nil {
- s.lastNumDocs = len(results)
- s.lastOutSize = len(br.Bytes())
- interimPool.Put(s)
- }
-
- return sb, uint64(len(br.Bytes())), err
-}
-
-var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
-
-// interim holds temporary working data used while converting from
-// analysis results to a zap-encoded segment
-type interim struct {
- results []*index.AnalysisResult
-
- chunkFactor uint32
-
- w *CountHashWriter
-
- // FieldsMap adds 1 to field id to avoid zero value issues
- // name -> field id + 1
- FieldsMap map[string]uint16
-
- // FieldsInv is the inverse of FieldsMap
- // field id -> name
- FieldsInv []string
-
- // Term dictionaries for each field
- // field id -> term -> postings list id + 1
- Dicts []map[string]uint64
-
- // Terms for each field, where terms are sorted ascending
- // field id -> []term
- DictKeys [][]string
-
- // Fields whose IncludeDocValues is true
- // field id -> bool
- IncludeDocValues []bool
-
- // postings id -> bitmap of docNums
- Postings []*roaring.Bitmap
-
- // postings id -> freq/norm's, one for each docNum in postings
- FreqNorms [][]interimFreqNorm
- freqNormsBacking []interimFreqNorm
-
- // postings id -> locs, one for each freq
- Locs [][]interimLoc
- locsBacking []interimLoc
-
- numTermsPerPostingsList []int // key is postings list id
- numLocsPerPostingsList []int // key is postings list id
-
- builder *vellum.Builder
- builderBuf bytes.Buffer
-
- metaBuf bytes.Buffer
-
- tmp0 []byte
- tmp1 []byte
-
- lastNumDocs int
- lastOutSize int
-}
-
-func (s *interim) reset() (err error) {
- s.results = nil
- s.chunkFactor = 0
- s.w = nil
- s.FieldsMap = nil
- s.FieldsInv = nil
- for i := range s.Dicts {
- s.Dicts[i] = nil
- }
- s.Dicts = s.Dicts[:0]
- for i := range s.DictKeys {
- s.DictKeys[i] = s.DictKeys[i][:0]
- }
- s.DictKeys = s.DictKeys[:0]
- for i := range s.IncludeDocValues {
- s.IncludeDocValues[i] = false
- }
- s.IncludeDocValues = s.IncludeDocValues[:0]
- for _, idn := range s.Postings {
- idn.Clear()
- }
- s.Postings = s.Postings[:0]
- s.FreqNorms = s.FreqNorms[:0]
- for i := range s.freqNormsBacking {
- s.freqNormsBacking[i] = interimFreqNorm{}
- }
- s.freqNormsBacking = s.freqNormsBacking[:0]
- s.Locs = s.Locs[:0]
- for i := range s.locsBacking {
- s.locsBacking[i] = interimLoc{}
- }
- s.locsBacking = s.locsBacking[:0]
- s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
- s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
- s.builderBuf.Reset()
- if s.builder != nil {
- err = s.builder.Reset(&s.builderBuf)
- }
- s.metaBuf.Reset()
- s.tmp0 = s.tmp0[:0]
- s.tmp1 = s.tmp1[:0]
- s.lastNumDocs = 0
- s.lastOutSize = 0
-
- return err
-}
-
-func (s *interim) grabBuf(size int) []byte {
- buf := s.tmp0
- if cap(buf) < size {
- buf = make([]byte, size)
- s.tmp0 = buf
- }
- return buf[0:size]
-}
-
-type interimStoredField struct {
- vals [][]byte
- typs []byte
- arrayposs [][]uint64 // array positions
-}
-
-type interimFreqNorm struct {
- freq uint64
- norm float32
- numLocs int
-}
-
-type interimLoc struct {
- fieldID uint16
- pos uint64
- start uint64
- end uint64
- arrayposs []uint64
-}
-
-func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
- s.FieldsMap = map[string]uint16{}
-
- s.getOrDefineField("_id") // _id field is fieldID 0
-
- for _, result := range s.results {
- for _, field := range result.Document.CompositeFields {
- s.getOrDefineField(field.Name())
- }
- for _, field := range result.Document.Fields {
- s.getOrDefineField(field.Name())
- }
- }
-
- sort.Strings(s.FieldsInv[1:]) // keep _id as first field
-
- for fieldID, fieldName := range s.FieldsInv {
- s.FieldsMap[fieldName] = uint16(fieldID + 1)
- }
-
- if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
- s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
- } else {
- s.IncludeDocValues = make([]bool, len(s.FieldsInv))
- }
-
- s.prepareDicts()
-
- for _, dict := range s.DictKeys {
- sort.Strings(dict)
- }
-
- s.processDocuments()
-
- storedIndexOffset, err := s.writeStoredFields()
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- var fdvIndexOffset uint64
- var dictOffsets []uint64
-
- if len(s.results) > 0 {
- fdvIndexOffset, dictOffsets, err = s.writeDicts()
- if err != nil {
- return 0, 0, 0, nil, err
- }
- } else {
- dictOffsets = make([]uint64, len(s.FieldsInv))
- }
-
- fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
- if err != nil {
- return 0, 0, 0, nil, err
- }
-
- return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
-}
-
-func (s *interim) getOrDefineField(fieldName string) int {
- fieldIDPlus1, exists := s.FieldsMap[fieldName]
- if !exists {
- fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
- s.FieldsMap[fieldName] = fieldIDPlus1
- s.FieldsInv = append(s.FieldsInv, fieldName)
-
- s.Dicts = append(s.Dicts, make(map[string]uint64))
-
- n := len(s.DictKeys)
- if n < cap(s.DictKeys) {
- s.DictKeys = s.DictKeys[:n+1]
- s.DictKeys[n] = s.DictKeys[n][:0]
- } else {
- s.DictKeys = append(s.DictKeys, []string(nil))
- }
- }
-
- return int(fieldIDPlus1 - 1)
-}
-
-// fill Dicts and DictKeys from analysis results
-func (s *interim) prepareDicts() {
- var pidNext int
-
- var totTFs int
- var totLocs int
-
- visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
- dict := s.Dicts[fieldID]
- dictKeys := s.DictKeys[fieldID]
-
- for term, tf := range tfs {
- pidPlus1, exists := dict[term]
- if !exists {
- pidNext++
- pidPlus1 = uint64(pidNext)
-
- dict[term] = pidPlus1
- dictKeys = append(dictKeys, term)
-
- s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
- s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
- }
-
- pid := pidPlus1 - 1
-
- s.numTermsPerPostingsList[pid] += 1
- s.numLocsPerPostingsList[pid] += len(tf.Locations)
-
- totLocs += len(tf.Locations)
- }
-
- totTFs += len(tfs)
-
- s.DictKeys[fieldID] = dictKeys
- }
-
- for _, result := range s.results {
- // walk each composite field
- for _, field := range result.Document.CompositeFields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- _, tf := field.Analyze()
- visitField(fieldID, tf)
- }
-
- // walk each field
- for i, field := range result.Document.Fields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- tf := result.Analyzed[i]
- visitField(fieldID, tf)
- }
- }
-
- numPostingsLists := pidNext
-
- if cap(s.Postings) >= numPostingsLists {
- s.Postings = s.Postings[:numPostingsLists]
- } else {
- postings := make([]*roaring.Bitmap, numPostingsLists)
- copy(postings, s.Postings[:cap(s.Postings)])
- for i := 0; i < numPostingsLists; i++ {
- if postings[i] == nil {
- postings[i] = roaring.New()
- }
- }
- s.Postings = postings
- }
-
- if cap(s.FreqNorms) >= numPostingsLists {
- s.FreqNorms = s.FreqNorms[:numPostingsLists]
- } else {
- s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
- }
-
- if cap(s.freqNormsBacking) >= totTFs {
- s.freqNormsBacking = s.freqNormsBacking[:totTFs]
- } else {
- s.freqNormsBacking = make([]interimFreqNorm, totTFs)
- }
-
- freqNormsBacking := s.freqNormsBacking
- for pid, numTerms := range s.numTermsPerPostingsList {
- s.FreqNorms[pid] = freqNormsBacking[0:0]
- freqNormsBacking = freqNormsBacking[numTerms:]
- }
-
- if cap(s.Locs) >= numPostingsLists {
- s.Locs = s.Locs[:numPostingsLists]
- } else {
- s.Locs = make([][]interimLoc, numPostingsLists)
- }
-
- if cap(s.locsBacking) >= totLocs {
- s.locsBacking = s.locsBacking[:totLocs]
- } else {
- s.locsBacking = make([]interimLoc, totLocs)
- }
-
- locsBacking := s.locsBacking
- for pid, numLocs := range s.numLocsPerPostingsList {
- s.Locs[pid] = locsBacking[0:0]
- locsBacking = locsBacking[numLocs:]
- }
-}
-
-func (s *interim) processDocuments() {
- numFields := len(s.FieldsInv)
- reuseFieldLens := make([]int, numFields)
- reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
-
- for docNum, result := range s.results {
- for i := 0; i < numFields; i++ { // clear these for reuse
- reuseFieldLens[i] = 0
- reuseFieldTFs[i] = nil
- }
-
- s.processDocument(uint64(docNum), result,
- reuseFieldLens, reuseFieldTFs)
- }
-}
-
-func (s *interim) processDocument(docNum uint64,
- result *index.AnalysisResult,
- fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
- visitField := func(fieldID uint16, fieldName string,
- ln int, tf analysis.TokenFrequencies) {
- fieldLens[fieldID] += ln
-
- existingFreqs := fieldTFs[fieldID]
- if existingFreqs != nil {
- existingFreqs.MergeAll(fieldName, tf)
- } else {
- fieldTFs[fieldID] = tf
- }
- }
-
- // walk each composite field
- for _, field := range result.Document.CompositeFields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- ln, tf := field.Analyze()
- visitField(fieldID, field.Name(), ln, tf)
- }
-
- // walk each field
- for i, field := range result.Document.Fields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- ln := result.Length[i]
- tf := result.Analyzed[i]
- visitField(fieldID, field.Name(), ln, tf)
- }
-
- // now that it's been rolled up into fieldTFs, walk that
- for fieldID, tfs := range fieldTFs {
- dict := s.Dicts[fieldID]
- norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
-
- for term, tf := range tfs {
- pid := dict[term] - 1
- bs := s.Postings[pid]
- bs.Add(uint32(docNum))
-
- s.FreqNorms[pid] = append(s.FreqNorms[pid],
- interimFreqNorm{
- freq: uint64(tf.Frequency()),
- norm: norm,
- numLocs: len(tf.Locations),
- })
-
- if len(tf.Locations) > 0 {
- locs := s.Locs[pid]
-
- for _, loc := range tf.Locations {
- var locf = uint16(fieldID)
- if loc.Field != "" {
- locf = uint16(s.getOrDefineField(loc.Field))
- }
- var arrayposs []uint64
- if len(loc.ArrayPositions) > 0 {
- arrayposs = loc.ArrayPositions
- }
- locs = append(locs, interimLoc{
- fieldID: locf,
- pos: uint64(loc.Position),
- start: uint64(loc.Start),
- end: uint64(loc.End),
- arrayposs: arrayposs,
- })
- }
-
- s.Locs[pid] = locs
- }
- }
- }
-}
-
-func (s *interim) writeStoredFields() (
- storedIndexOffset uint64, err error) {
- varBuf := make([]byte, binary.MaxVarintLen64)
- metaEncode := func(val uint64) (int, error) {
- wb := binary.PutUvarint(varBuf, val)
- return s.metaBuf.Write(varBuf[:wb])
- }
-
- data, compressed := s.tmp0[:0], s.tmp1[:0]
- defer func() { s.tmp0, s.tmp1 = data, compressed }()
-
- // keyed by docNum
- docStoredOffsets := make([]uint64, len(s.results))
-
- // keyed by fieldID, for the current doc in the loop
- docStoredFields := map[uint16]interimStoredField{}
-
- for docNum, result := range s.results {
- for fieldID := range docStoredFields { // reset for next doc
- delete(docStoredFields, fieldID)
- }
-
- for _, field := range result.Document.Fields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
-
- opts := field.Options()
-
- if opts.IsStored() {
- isf := docStoredFields[fieldID]
- isf.vals = append(isf.vals, field.Value())
- isf.typs = append(isf.typs, encodeFieldType(field))
- isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
- docStoredFields[fieldID] = isf
- }
-
- if opts.IncludeDocValues() {
- s.IncludeDocValues[fieldID] = true
- }
-
- err := ValidateDocFields(field)
- if err != nil {
- return 0, err
- }
- }
-
- var curr int
-
- s.metaBuf.Reset()
- data = data[:0]
-
- // _id field special case optimizes ExternalID() lookups
- idFieldVal := docStoredFields[uint16(0)].vals[0]
- _, err = metaEncode(uint64(len(idFieldVal)))
- if err != nil {
- return 0, err
- }
-
- // handle non-"_id" fields
- for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
- isf, exists := docStoredFields[uint16(fieldID)]
- if exists {
- curr, data, err = persistStoredFieldValues(
- fieldID, isf.vals, isf.typs, isf.arrayposs,
- curr, metaEncode, data)
- if err != nil {
- return 0, err
- }
- }
- }
-
- metaBytes := s.metaBuf.Bytes()
-
- compressed = snappy.Encode(compressed[:cap(compressed)], data)
-
- docStoredOffsets[docNum] = uint64(s.w.Count())
-
- _, err := writeUvarints(s.w,
- uint64(len(metaBytes)),
- uint64(len(idFieldVal)+len(compressed)))
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(metaBytes)
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(idFieldVal)
- if err != nil {
- return 0, err
- }
-
- _, err = s.w.Write(compressed)
- if err != nil {
- return 0, err
- }
- }
-
- storedIndexOffset = uint64(s.w.Count())
-
- for _, docStoredOffset := range docStoredOffsets {
- err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
- if err != nil {
- return 0, err
- }
- }
-
- return storedIndexOffset, nil
-}
-
-func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
- dictOffsets = make([]uint64, len(s.FieldsInv))
-
- fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
- fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
-
- buf := s.grabBuf(binary.MaxVarintLen64)
-
- tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
- locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
- fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
-
- var docTermMap [][]byte
-
- if s.builder == nil {
- s.builder, err = vellum.New(&s.builderBuf, nil)
- if err != nil {
- return 0, nil, err
- }
- }
-
- for fieldID, terms := range s.DictKeys {
- if cap(docTermMap) < len(s.results) {
- docTermMap = make([][]byte, len(s.results))
- } else {
- docTermMap = docTermMap[0:len(s.results)]
- for docNum := range docTermMap { // reset the docTermMap
- docTermMap[docNum] = docTermMap[docNum][:0]
- }
- }
-
- dict := s.Dicts[fieldID]
-
- for _, term := range terms { // terms are already sorted
- pid := dict[term] - 1
-
- postingsBS := s.Postings[pid]
-
- freqNorms := s.FreqNorms[pid]
- freqNormOffset := 0
-
- locs := s.Locs[pid]
- locOffset := 0
-
- postingsItr := postingsBS.Iterator()
- for postingsItr.HasNext() {
- docNum := uint64(postingsItr.Next())
-
- freqNorm := freqNorms[freqNormOffset]
-
- err = tfEncoder.Add(docNum,
- encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
- uint64(math.Float32bits(freqNorm.norm)))
- if err != nil {
- return 0, nil, err
- }
-
- if freqNorm.numLocs > 0 {
- numBytesLocs := 0
- for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
- numBytesLocs += totalUvarintBytes(
- uint64(loc.fieldID), loc.pos, loc.start, loc.end,
- uint64(len(loc.arrayposs)), loc.arrayposs)
- }
-
- err = locEncoder.Add(docNum, uint64(numBytesLocs))
- if err != nil {
- return 0, nil, err
- }
-
- for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
- err = locEncoder.Add(docNum,
- uint64(loc.fieldID), loc.pos, loc.start, loc.end,
- uint64(len(loc.arrayposs)))
- if err != nil {
- return 0, nil, err
- }
-
- err = locEncoder.Add(docNum, loc.arrayposs...)
- if err != nil {
- return 0, nil, err
- }
- }
-
- locOffset += freqNorm.numLocs
- }
-
- freqNormOffset++
-
- docTermMap[docNum] = append(
- append(docTermMap[docNum], term...),
- termSeparator)
- }
-
- tfEncoder.Close()
- locEncoder.Close()
-
- postingsOffset, err :=
- writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
- if err != nil {
- return 0, nil, err
- }
-
- if postingsOffset > uint64(0) {
- err = s.builder.Insert([]byte(term), postingsOffset)
- if err != nil {
- return 0, nil, err
- }
- }
-
- tfEncoder.Reset()
- locEncoder.Reset()
- }
-
- err = s.builder.Close()
- if err != nil {
- return 0, nil, err
- }
-
- // record where this dictionary starts
- dictOffsets[fieldID] = uint64(s.w.Count())
-
- vellumData := s.builderBuf.Bytes()
-
- // write out the length of the vellum data
- n := binary.PutUvarint(buf, uint64(len(vellumData)))
- _, err = s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
-
- // write this vellum to disk
- _, err = s.w.Write(vellumData)
- if err != nil {
- return 0, nil, err
- }
-
- // reset vellum for reuse
- s.builderBuf.Reset()
-
- err = s.builder.Reset(&s.builderBuf)
- if err != nil {
- return 0, nil, err
- }
-
- // write the field doc values
- if s.IncludeDocValues[fieldID] {
- for docNum, docTerms := range docTermMap {
- if len(docTerms) > 0 {
- err = fdvEncoder.Add(uint64(docNum), docTerms)
- if err != nil {
- return 0, nil, err
- }
- }
- }
- err = fdvEncoder.Close()
- if err != nil {
- return 0, nil, err
- }
-
- fdvOffsetsStart[fieldID] = uint64(s.w.Count())
-
- _, err = fdvEncoder.Write()
- if err != nil {
- return 0, nil, err
- }
-
- fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
-
- fdvEncoder.Reset()
- } else {
- fdvOffsetsStart[fieldID] = fieldNotUninverted
- fdvOffsetsEnd[fieldID] = fieldNotUninverted
- }
- }
-
- fdvIndexOffset = uint64(s.w.Count())
-
- for i := 0; i < len(fdvOffsetsStart); i++ {
- n := binary.PutUvarint(buf, fdvOffsetsStart[i])
- _, err := s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
- n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
- _, err = s.w.Write(buf[:n])
- if err != nil {
- return 0, nil, err
- }
- }
-
- return fdvIndexOffset, dictOffsets, nil
-}
-
-func encodeFieldType(f document.Field) byte {
- fieldType := byte('x')
- switch f.(type) {
- case *document.TextField:
- fieldType = 't'
- case *document.NumericField:
- fieldType = 'n'
- case *document.DateTimeField:
- fieldType = 'd'
- case *document.BooleanField:
- fieldType = 'b'
- case *document.GeoPointField:
- fieldType = 'g'
- case *document.CompositeField:
- fieldType = 'c'
- }
- return fieldType
-}
-
-// returns the total # of bytes needed to encode the given uint64's
-// into binary.PutUVarint() encoding
-func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
- n = numUvarintBytes(a)
- n += numUvarintBytes(b)
- n += numUvarintBytes(c)
- n += numUvarintBytes(d)
- n += numUvarintBytes(e)
- for _, v := range more {
- n += numUvarintBytes(v)
- }
- return n
-}
-
-// returns # of bytes needed to encode x in binary.PutUvarint() encoding
-func numUvarintBytes(x uint64) (n int) {
- for x >= 0x80 {
- x >>= 7
- n++
- }
- return n + 1
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
deleted file mode 100644
index 4c43fdb9b9..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
+++ /dev/null
@@ -1,897 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "encoding/binary"
- "fmt"
- "math"
- "reflect"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/size"
-)
-
-var reflectStaticSizePostingsList int
-var reflectStaticSizePostingsIterator int
-var reflectStaticSizePosting int
-var reflectStaticSizeLocation int
-
-func init() {
- var pl PostingsList
- reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
- var pi PostingsIterator
- reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
- var p Posting
- reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
- var l Location
- reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
-}
-
-// FST or vellum value (uint64) encoding is determined by the top two
-// highest-order or most significant bits...
-//
-// encoding : MSB
-// name : 63 62 61...to...bit #0 (LSB)
-// ----------+---+---+---------------------------------------------------
-// general : 0 | 0 | 62-bits of postingsOffset.
-// ~ : 0 | 1 | reserved for future.
-// 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
-// ~ : 1 | 1 | reserved for future.
-//
-// Encoding "general" is able to handle all cases, where the
-// postingsOffset points to more information about the postings for
-// the term.
-//
-// Encoding "1-hit" is used to optimize a commonly seen case when a
-// term has only a single hit. For example, a term in the _id field
-// will have only 1 hit. The "1-hit" encoding is used for a term
-// in a field when...
-//
-// - term vector info is disabled for that field;
-// - and, the term appears in only a single doc for that field;
-// - and, the term's freq is exactly 1 in that single doc for that field;
-// - and, the docNum must fit into 31-bits;
-//
-// Otherwise, the "general" encoding is used instead.
-//
-// In the "1-hit" encoding, the field in that single doc may have
-// other terms, which is supported in the "1-hit" encoding by the
-// positive float31 norm.
-
-const FSTValEncodingMask = uint64(0xc000000000000000)
-const FSTValEncodingGeneral = uint64(0x0000000000000000)
-const FSTValEncoding1Hit = uint64(0x8000000000000000)
-
-func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
- return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
-}
-
-func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
- return (mask31Bits & v), (mask31Bits & (v >> 31))
-}
-
-const mask31Bits = uint64(0x000000007fffffff)
-
-func under32Bits(x uint64) bool {
- return x <= mask31Bits
-}
-
-const DocNum1HitFinished = math.MaxUint64
-
-var NormBits1Hit = uint64(math.Float32bits(float32(1)))
-
-// PostingsList is an in-memory representation of a postings list
-type PostingsList struct {
- sb *SegmentBase
- postingsOffset uint64
- freqOffset uint64
- locOffset uint64
- postings *roaring.Bitmap
- except *roaring.Bitmap
-
- // when normBits1Hit != 0, then this postings list came from a
- // 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
- docNum1Hit uint64
- normBits1Hit uint64
-}
-
-// represents an immutable, empty postings list
-var emptyPostingsList = &PostingsList{}
-
-func (p *PostingsList) Size() int {
- sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
-
- if p.except != nil {
- sizeInBytes += int(p.except.GetSizeInBytes())
- }
-
- return sizeInBytes
-}
-
-func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
- if p.normBits1Hit != 0 {
- receiver.Add(uint32(p.docNum1Hit))
- return
- }
-
- if p.postings != nil {
- receiver.Or(p.postings)
- }
-}
-
-// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
- prealloc segment.PostingsIterator) segment.PostingsIterator {
- if p.normBits1Hit == 0 && p.postings == nil {
- return emptyPostingsIterator
- }
-
- var preallocPI *PostingsIterator
- pi, ok := prealloc.(*PostingsIterator)
- if ok && pi != nil {
- preallocPI = pi
- }
- if preallocPI == emptyPostingsIterator {
- preallocPI = nil
- }
-
- return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
-}
-
-func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
- rv *PostingsIterator) *PostingsIterator {
- if rv == nil {
- rv = &PostingsIterator{}
- } else {
- freqNormReader := rv.freqNormReader
- if freqNormReader != nil {
- freqNormReader.Reset([]byte(nil))
- }
-
- locReader := rv.locReader
- if locReader != nil {
- locReader.Reset([]byte(nil))
- }
-
- freqChunkOffsets := rv.freqChunkOffsets[:0]
- locChunkOffsets := rv.locChunkOffsets[:0]
-
- nextLocs := rv.nextLocs[:0]
- nextSegmentLocs := rv.nextSegmentLocs[:0]
-
- buf := rv.buf
-
- *rv = PostingsIterator{} // clear the struct
-
- rv.freqNormReader = freqNormReader
- rv.locReader = locReader
-
- rv.freqChunkOffsets = freqChunkOffsets
- rv.locChunkOffsets = locChunkOffsets
-
- rv.nextLocs = nextLocs
- rv.nextSegmentLocs = nextSegmentLocs
-
- rv.buf = buf
- }
-
- rv.postings = p
- rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
- rv.includeLocs = includeLocs
-
- if p.normBits1Hit != 0 {
- // "1-hit" encoding
- rv.docNum1Hit = p.docNum1Hit
- rv.normBits1Hit = p.normBits1Hit
-
- if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
- rv.docNum1Hit = DocNum1HitFinished
- }
-
- return rv
- }
-
- // "general" encoding, check if empty
- if p.postings == nil {
- return rv
- }
-
- var n uint64
- var read int
-
- // prepare the freq chunk details
- if rv.includeFreqNorm {
- var numFreqChunks uint64
- numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
- rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
- } else {
- rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
- }
- for i := 0; i < int(numFreqChunks); i++ {
- rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- }
- rv.freqChunkStart = p.freqOffset + n
- }
-
- // prepare the loc chunk details
- if rv.includeLocs {
- n = 0
- var numLocChunks uint64
- numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- if cap(rv.locChunkOffsets) >= int(numLocChunks) {
- rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
- } else {
- rv.locChunkOffsets = make([]uint64, int(numLocChunks))
- }
- for i := 0; i < int(numLocChunks); i++ {
- rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
- }
- rv.locChunkStart = p.locOffset + n
- }
-
- rv.all = p.postings.Iterator()
- if p.except != nil {
- rv.ActualBM = roaring.AndNot(p.postings, p.except)
- rv.Actual = rv.ActualBM.Iterator()
- } else {
- rv.ActualBM = p.postings
- rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
- }
-
- return rv
-}
-
-// Count returns the number of items on this postings list
-func (p *PostingsList) Count() uint64 {
- var n, e uint64
- if p.normBits1Hit != 0 {
- n = 1
- if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
- e = 1
- }
- } else if p.postings != nil {
- n = p.postings.GetCardinality()
- if p.except != nil {
- e = p.postings.AndCardinality(p.except)
- }
- }
- return n - e
-}
-
-func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
- rv.postingsOffset = postingsOffset
-
- // handle "1-hit" encoding special case
- if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
- return rv.init1Hit(postingsOffset)
- }
-
- // read the location of the freq/norm details
- var n uint64
- var read int
-
- rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
- n += uint64(read)
-
- rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- var postingsLen uint64
- postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
-
- if rv.postings == nil {
- rv.postings = roaring.NewBitmap()
- }
- _, err := rv.postings.FromBuffer(roaringBytes)
- if err != nil {
- return fmt.Errorf("error loading roaring bitmap: %v", err)
- }
-
- return nil
-}
-
-func (rv *PostingsList) init1Hit(fstVal uint64) error {
- docNum, normBits := FSTValDecode1Hit(fstVal)
-
- rv.docNum1Hit = docNum
- rv.normBits1Hit = normBits
-
- return nil
-}
-
-// PostingsIterator provides a way to iterate through the postings list
-type PostingsIterator struct {
- postings *PostingsList
- all roaring.IntPeekable
- Actual roaring.IntPeekable
- ActualBM *roaring.Bitmap
-
- currChunk uint32
- currChunkFreqNorm []byte
- currChunkLoc []byte
-
- freqNormReader *segment.MemUvarintReader
- locReader *segment.MemUvarintReader
-
- freqChunkOffsets []uint64
- freqChunkStart uint64
-
- locChunkOffsets []uint64
- locChunkStart uint64
-
- next Posting // reused across Next() calls
- nextLocs []Location // reused across Next() calls
- nextSegmentLocs []segment.Location // reused across Next() calls
-
- docNum1Hit uint64
- normBits1Hit uint64
-
- buf []byte
-
- includeFreqNorm bool
- includeLocs bool
-}
-
-var emptyPostingsIterator = &PostingsIterator{}
-
-func (i *PostingsIterator) Size() int {
- sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
- len(i.currChunkFreqNorm) +
- len(i.currChunkLoc) +
- len(i.freqChunkOffsets)*size.SizeOfUint64 +
- len(i.locChunkOffsets)*size.SizeOfUint64 +
- i.next.Size()
-
- for _, entry := range i.nextLocs {
- sizeInBytes += entry.Size()
- }
-
- return sizeInBytes
-}
-
-func (i *PostingsIterator) loadChunk(chunk int) error {
- if i.includeFreqNorm {
- if chunk >= len(i.freqChunkOffsets) {
- return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
- chunk, len(i.freqChunkOffsets))
- }
-
- end, start := i.freqChunkStart, i.freqChunkStart
- s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
- start += s
- end += e
- i.currChunkFreqNorm = i.postings.sb.mem[start:end]
- if i.freqNormReader == nil {
- i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
- } else {
- i.freqNormReader.Reset(i.currChunkFreqNorm)
- }
- }
-
- if i.includeLocs {
- if chunk >= len(i.locChunkOffsets) {
- return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
- chunk, len(i.locChunkOffsets))
- }
-
- end, start := i.locChunkStart, i.locChunkStart
- s, e := readChunkBoundary(chunk, i.locChunkOffsets)
- start += s
- end += e
- i.currChunkLoc = i.postings.sb.mem[start:end]
- if i.locReader == nil {
- i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
- } else {
- i.locReader.Reset(i.currChunkLoc)
- }
- }
-
- i.currChunk = uint32(chunk)
- return nil
-}
-
-func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
- if i.normBits1Hit != 0 {
- return 1, i.normBits1Hit, false, nil
- }
-
- freqHasLocs, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
- }
-
- freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
-
- normBits, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
- }
-
- return freq, normBits, hasLocs, nil
-}
-
-func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
- if i.normBits1Hit != 0 {
- return false, nil
- }
-
- freqHasLocs, err := i.freqNormReader.ReadUvarint()
- if err != nil {
- return false, fmt.Errorf("error reading freqHasLocs: %v", err)
- }
-
- i.freqNormReader.SkipUvarint() // Skip normBits.
-
- return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
-}
-
-func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
- rv := freq << 1
- if hasLocs {
- rv = rv | 0x01 // 0'th LSB encodes whether there are locations
- }
- return rv
-}
-
-func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
- freq := freqHasLocs >> 1
- hasLocs := freqHasLocs&0x01 != 0
- return freq, hasLocs
-}
-
-// readLocation processes all the integers on the stream representing a single
-// location.
-func (i *PostingsIterator) readLocation(l *Location) error {
- // read off field
- fieldID, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location field: %v", err)
- }
- // read off pos
- pos, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location pos: %v", err)
- }
- // read off start
- start, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location start: %v", err)
- }
- // read off end
- end, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location end: %v", err)
- }
- // read off num array pos
- numArrayPos, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location num array pos: %v", err)
- }
-
- l.field = i.postings.sb.fieldsInv[fieldID]
- l.pos = pos
- l.start = start
- l.end = end
-
- if cap(l.ap) < int(numArrayPos) {
- l.ap = make([]uint64, int(numArrayPos))
- } else {
- l.ap = l.ap[:int(numArrayPos)]
- }
-
- // read off array positions
- for k := 0; k < int(numArrayPos); k++ {
- ap, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading array position: %v", err)
- }
-
- l.ap[k] = ap
- }
-
- return nil
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() (segment.Posting, error) {
- return i.nextAtOrAfter(0)
-}
-
-// Advance returns the posting at the specified docNum or it is not present
-// the next posting, or if the end is reached, nil
-func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
- return i.nextAtOrAfter(docNum)
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
- docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
- if err != nil || !exists {
- return nil, err
- }
-
- i.next = Posting{} // clear the struct
- rv := &i.next
- rv.docNum = docNum
-
- if !i.includeFreqNorm {
- return rv, nil
- }
-
- var normBits uint64
- var hasLocs bool
-
- rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
- if err != nil {
- return nil, err
- }
-
- rv.norm = math.Float32frombits(uint32(normBits))
-
- if i.includeLocs && hasLocs {
- // prepare locations into reused slices, where we assume
- // rv.freq >= "number of locs", since in a composite field,
- // some component fields might have their IncludeTermVector
- // flags disabled while other component fields are enabled
- if cap(i.nextLocs) >= int(rv.freq) {
- i.nextLocs = i.nextLocs[0:rv.freq]
- } else {
- i.nextLocs = make([]Location, rv.freq, rv.freq*2)
- }
- if cap(i.nextSegmentLocs) < int(rv.freq) {
- i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
- }
- rv.locs = i.nextSegmentLocs[:0]
-
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
- }
-
- j := 0
- startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
- for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
- err := i.readLocation(&i.nextLocs[j])
- if err != nil {
- return nil, err
- }
- rv.locs = append(rv.locs, &i.nextLocs[j])
- j++
- }
- }
-
- return rv, nil
-}
-
-var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
-
-// nextBytes returns the docNum and the encoded freq & loc bytes for
-// the next posting
-func (i *PostingsIterator) nextBytes() (
- docNumOut uint64, freq uint64, normBits uint64,
- bytesFreqNorm []byte, bytesLoc []byte, err error) {
- docNum, exists, err := i.nextDocNumAtOrAfter(0)
- if err != nil || !exists {
- return 0, 0, 0, nil, nil, err
- }
-
- if i.normBits1Hit != 0 {
- if i.buf == nil {
- i.buf = make([]byte, binary.MaxVarintLen64*2)
- }
- n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
- n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
- return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
- }
-
- startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
-
- var hasLocs bool
-
- freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
- if err != nil {
- return 0, 0, 0, nil, nil, err
- }
-
- endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
- bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
-
- if hasLocs {
- startLoc := len(i.currChunkLoc) - i.locReader.Len()
-
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return 0, 0, 0, nil, nil,
- fmt.Errorf("error reading location nextBytes numLocs: %v", err)
- }
-
- // skip over all the location bytes
- i.locReader.SkipBytes(int(numLocsBytes))
-
- endLoc := len(i.currChunkLoc) - i.locReader.Len()
- bytesLoc = i.currChunkLoc[startLoc:endLoc]
- }
-
- return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
-}
-
-// nextDocNum returns the next docNum on the postings list, and also
-// sets up the currChunk / loc related fields of the iterator.
-func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
- if i.normBits1Hit != 0 {
- if i.docNum1Hit == DocNum1HitFinished {
- return 0, false, nil
- }
- if i.docNum1Hit < atOrAfter {
- // advanced past our 1-hit
- i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
- return 0, false, nil
- }
- docNum := i.docNum1Hit
- i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
- return docNum, true, nil
- }
-
- if i.Actual == nil || !i.Actual.HasNext() {
- return 0, false, nil
- }
-
- if i.postings == nil || i.postings.postings == i.ActualBM {
- return i.nextDocNumAtOrAfterClean(atOrAfter)
- }
-
- i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
- if !i.Actual.HasNext() {
- // couldn't find anything
- return 0, false, nil
- }
-
- n := i.Actual.Next()
- allN := i.all.Next()
-
- nChunk := n / i.postings.sb.chunkFactor
-
- // when allN becomes >= to here, then allN is in the same chunk as nChunk.
- allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
-
- // n is the next actual hit (excluding some postings), and
- // allN is the next hit in the full postings, and
- // if they don't match, move 'all' forwards until they do
- for allN != n {
- // we've reached same chunk, so move the freq/norm/loc decoders forward
- if i.includeFreqNorm && allN >= allNReachesNChunk {
- err := i.currChunkNext(nChunk)
- if err != nil {
- return 0, false, err
- }
- }
-
- allN = i.all.Next()
- }
-
- if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return 0, false, fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- return uint64(n), true, nil
-}
-
-// optimization when the postings list is "clean" (e.g., no updates &
-// no deletions) where the all bitmap is the same as the actual bitmap
-func (i *PostingsIterator) nextDocNumAtOrAfterClean(
- atOrAfter uint64) (uint64, bool, error) {
-
- if !i.includeFreqNorm {
- i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
-
- if !i.Actual.HasNext() {
- return 0, false, nil // couldn't find anything
- }
-
- return uint64(i.Actual.Next()), true, nil
- }
-
- // freq-norm's needed, so maintain freq-norm chunk reader
- sameChunkNexts := 0 // # of times we called Next() in the same chunk
- n := i.Actual.Next()
- nChunk := n / i.postings.sb.chunkFactor
-
- for uint64(n) < atOrAfter && i.Actual.HasNext() {
- n = i.Actual.Next()
-
- nChunkPrev := nChunk
- nChunk = n / i.postings.sb.chunkFactor
-
- if nChunk != nChunkPrev {
- sameChunkNexts = 0
- } else {
- sameChunkNexts += 1
- }
- }
-
- if uint64(n) < atOrAfter {
- // couldn't find anything
- return 0, false, nil
- }
-
- for j := 0; j < sameChunkNexts; j++ {
- err := i.currChunkNext(nChunk)
- if err != nil {
- return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
- }
- }
-
- if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return 0, false, fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- return uint64(n), true, nil
-}
-
-func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
- if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return fmt.Errorf("error loading chunk: %v", err)
- }
- }
-
- // read off freq/offsets even though we don't care about them
- hasLocs, err := i.skipFreqNormReadHasLocs()
- if err != nil {
- return err
- }
-
- if i.includeLocs && hasLocs {
- numLocsBytes, err := i.locReader.ReadUvarint()
- if err != nil {
- return fmt.Errorf("error reading location numLocsBytes: %v", err)
- }
-
- // skip over all the location bytes
- i.locReader.SkipBytes(int(numLocsBytes))
- }
-
- return nil
-}
-
-// DocNum1Hit returns the docNum and true if this is "1-hit" optimized
-// and the docNum is available.
-func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
- if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
- return p.docNum1Hit, true
- }
- return 0, false
-}
-
-// PostingsIteratorFromBitmap constructs a PostingsIterator given an
-// "actual" bitmap.
-func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
- includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
- return &PostingsIterator{
- ActualBM: bm,
- Actual: bm.Iterator(),
- includeFreqNorm: includeFreqNorm,
- includeLocs: includeLocs,
- }, nil
-}
-
-// PostingsIteratorFrom1Hit constructs a PostingsIterator given a
-// 1-hit docNum.
-func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64,
- includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
- return &PostingsIterator{
- docNum1Hit: docNum1Hit,
- normBits1Hit: normBits1Hit,
- includeFreqNorm: includeFreqNorm,
- includeLocs: includeLocs,
- }, nil
-}
-
-// Posting is a single entry in a postings list
-type Posting struct {
- docNum uint64
- freq uint64
- norm float32
- locs []segment.Location
-}
-
-func (p *Posting) Size() int {
- sizeInBytes := reflectStaticSizePosting
-
- for _, entry := range p.locs {
- sizeInBytes += entry.Size()
- }
-
- return sizeInBytes
-}
-
-// Number returns the document number of this posting in this segment
-func (p *Posting) Number() uint64 {
- return p.docNum
-}
-
-// Frequency returns the frequencies of occurrence of this term in this doc/field
-func (p *Posting) Frequency() uint64 {
- return p.freq
-}
-
-// Norm returns the normalization factor for this posting
-func (p *Posting) Norm() float64 {
- return float64(p.norm)
-}
-
-// Locations returns the location information for each occurrence
-func (p *Posting) Locations() []segment.Location {
- return p.locs
-}
-
-// Location represents the location of a single occurrence
-type Location struct {
- field string
- pos uint64
- start uint64
- end uint64
- ap []uint64
-}
-
-func (l *Location) Size() int {
- return reflectStaticSizeLocation +
- len(l.field) +
- len(l.ap)*size.SizeOfUint64
-}
-
-// Field returns the name of the field (useful in composite fields to know
-// which original field the value came from)
-func (l *Location) Field() string {
- return l.field
-}
-
-// Start returns the start byte offset of this occurrence
-func (l *Location) Start() uint64 {
- return l.start
-}
-
-// End returns the end byte offset of this occurrence
-func (l *Location) End() uint64 {
- return l.end
-}
-
-// Pos returns the 1-based phrase position of this occurrence
-func (l *Location) Pos() uint64 {
- return l.pos
-}
-
-// ArrayPositions returns the array position vector associated with this occurrence
-func (l *Location) ArrayPositions() []uint64 {
- return l.ap
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go
deleted file mode 100644
index e47d4c6abd..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/read.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import "encoding/binary"
-
-func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
- _, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
-
- meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
- data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
-
- return meta, data
-}
-
-func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
- uint64, uint64, uint64, uint64, uint64) {
- indexOffset := s.storedIndexOffset + (8 * docNum)
-
- storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
-
- var n uint64
-
- metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
- n += uint64(read)
-
- dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- return indexOffset, storedOffset, n, metaLen, dataLen
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
deleted file mode 100644
index 5aa33a26c9..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
+++ /dev/null
@@ -1,572 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "io"
- "os"
- "sync"
- "unsafe"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/size"
- "github.com/couchbase/vellum"
- mmap "github.com/edsrzf/mmap-go"
- "github.com/golang/snappy"
-)
-
-var reflectStaticSizeSegmentBase int
-
-func init() {
- var sb SegmentBase
- reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
-}
-
-// Open returns a zap impl of a segment
-func Open(path string) (segment.Segment, error) {
- f, err := os.Open(path)
- if err != nil {
- return nil, err
- }
- mm, err := mmap.Map(f, mmap.RDONLY, 0)
- if err != nil {
- // mmap failed, try to close the file
- _ = f.Close()
- return nil, err
- }
-
- rv := &Segment{
- SegmentBase: SegmentBase{
- mem: mm[0 : len(mm)-FooterSize],
- fieldsMap: make(map[string]uint16),
- fieldDvReaders: make(map[uint16]*docValueReader),
- fieldFSTs: make(map[uint16]*vellum.FST),
- },
- f: f,
- mm: mm,
- path: path,
- refs: 1,
- }
- rv.SegmentBase.updateSize()
-
- err = rv.loadConfig()
- if err != nil {
- _ = rv.Close()
- return nil, err
- }
-
- err = rv.loadFields()
- if err != nil {
- _ = rv.Close()
- return nil, err
- }
-
- err = rv.loadDvReaders()
- if err != nil {
- _ = rv.Close()
- return nil, err
- }
-
- return rv, nil
-}
-
-// SegmentBase is a memory only, read-only implementation of the
-// segment.Segment interface, using zap's data representation.
-type SegmentBase struct {
- mem []byte
- memCRC uint32
- chunkFactor uint32
- fieldsMap map[string]uint16 // fieldName -> fieldID+1
- fieldsInv []string // fieldID -> fieldName
- numDocs uint64
- storedIndexOffset uint64
- fieldsIndexOffset uint64
- docValueOffset uint64
- dictLocs []uint64
- fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
- fieldDvNames []string // field names cached in fieldDvReaders
- size uint64
-
- m sync.Mutex
- fieldFSTs map[uint16]*vellum.FST
-}
-
-func (sb *SegmentBase) Size() int {
- return int(sb.size)
-}
-
-func (sb *SegmentBase) updateSize() {
- sizeInBytes := reflectStaticSizeSegmentBase +
- cap(sb.mem)
-
- // fieldsMap
- for k, _ := range sb.fieldsMap {
- sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
- }
-
- // fieldsInv, dictLocs
- for _, entry := range sb.fieldsInv {
- sizeInBytes += len(entry) + size.SizeOfString
- }
- sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
-
- // fieldDvReaders
- for _, v := range sb.fieldDvReaders {
- sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
- if v != nil {
- sizeInBytes += v.size()
- }
- }
-
- sb.size = uint64(sizeInBytes)
-}
-
-func (sb *SegmentBase) AddRef() {}
-func (sb *SegmentBase) DecRef() (err error) { return nil }
-func (sb *SegmentBase) Close() (err error) { return nil }
-
-// Segment implements a persisted segment.Segment interface, by
-// embedding an mmap()'ed SegmentBase.
-type Segment struct {
- SegmentBase
-
- f *os.File
- mm mmap.MMap
- path string
- version uint32
- crc uint32
-
- m sync.Mutex // Protects the fields that follow.
- refs int64
-}
-
-func (s *Segment) Size() int {
- // 8 /* size of file pointer */
- // 4 /* size of version -> uint32 */
- // 4 /* size of crc -> uint32 */
- sizeOfUints := 16
-
- sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
-
- // mutex, refs -> int64
- sizeInBytes += 16
-
- // do not include the mmap'ed part
- return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
-}
-
-func (s *Segment) AddRef() {
- s.m.Lock()
- s.refs++
- s.m.Unlock()
-}
-
-func (s *Segment) DecRef() (err error) {
- s.m.Lock()
- s.refs--
- if s.refs == 0 {
- err = s.closeActual()
- }
- s.m.Unlock()
- return err
-}
-
-func (s *Segment) loadConfig() error {
- crcOffset := len(s.mm) - 4
- s.crc = binary.BigEndian.Uint32(s.mm[crcOffset : crcOffset+4])
-
- verOffset := crcOffset - 4
- s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
- if s.version != Version {
- return fmt.Errorf("unsupported version %d", s.version)
- }
-
- chunkOffset := verOffset - 4
- s.chunkFactor = binary.BigEndian.Uint32(s.mm[chunkOffset : chunkOffset+4])
-
- docValueOffset := chunkOffset - 8
- s.docValueOffset = binary.BigEndian.Uint64(s.mm[docValueOffset : docValueOffset+8])
-
- fieldsIndexOffset := docValueOffset - 8
- s.fieldsIndexOffset = binary.BigEndian.Uint64(s.mm[fieldsIndexOffset : fieldsIndexOffset+8])
-
- storedIndexOffset := fieldsIndexOffset - 8
- s.storedIndexOffset = binary.BigEndian.Uint64(s.mm[storedIndexOffset : storedIndexOffset+8])
-
- numDocsOffset := storedIndexOffset - 8
- s.numDocs = binary.BigEndian.Uint64(s.mm[numDocsOffset : numDocsOffset+8])
- return nil
-}
-
-func (s *SegmentBase) loadFields() error {
- // NOTE for now we assume the fields index immediately precedes
- // the footer, and if this changes, need to adjust accordingly (or
- // store explicit length), where s.mem was sliced from s.mm in Open().
- fieldsIndexEnd := uint64(len(s.mem))
-
- // iterate through fields index
- var fieldID uint64
- for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
- addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
-
- dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
- n := uint64(read)
- s.dictLocs = append(s.dictLocs, dictLoc)
-
- var nameLen uint64
- nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
- n += uint64(read)
-
- name := string(s.mem[addr+n : addr+n+nameLen])
- s.fieldsInv = append(s.fieldsInv, name)
- s.fieldsMap[name] = uint16(fieldID + 1)
-
- fieldID++
- }
- return nil
-}
-
-// Dictionary returns the term dictionary for the specified field
-func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
- dict, err := s.dictionary(field)
- if err == nil && dict == nil {
- return &segment.EmptyDictionary{}, nil
- }
- return dict, err
-}
-
-func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
- fieldIDPlus1 := sb.fieldsMap[field]
- if fieldIDPlus1 > 0 {
- rv = &Dictionary{
- sb: sb,
- field: field,
- fieldID: fieldIDPlus1 - 1,
- }
-
- dictStart := sb.dictLocs[rv.fieldID]
- if dictStart > 0 {
- var ok bool
- sb.m.Lock()
- if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
- // read the length of the vellum data
- vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
- fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
- rv.fst, err = vellum.Load(fstBytes)
- if err != nil {
- sb.m.Unlock()
- return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
- }
-
- sb.fieldFSTs[rv.fieldID] = rv.fst
- }
-
- sb.m.Unlock()
- rv.fstReader, err = rv.fst.Reader()
- if err != nil {
- return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
- }
-
- }
- }
-
- return rv, nil
-}
-
-// visitDocumentCtx holds data structures that are reusable across
-// multiple VisitDocument() calls to avoid memory allocations
-type visitDocumentCtx struct {
- buf []byte
- reader bytes.Reader
- arrayPos []uint64
-}
-
-var visitDocumentCtxPool = sync.Pool{
- New: func() interface{} {
- reuse := &visitDocumentCtx{}
- return reuse
- },
-}
-
-// VisitDocument invokes the DocFieldValueVistor for each stored field
-// for the specified doc number
-func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
- vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
- defer visitDocumentCtxPool.Put(vdc)
- return s.visitDocument(vdc, num, visitor)
-}
-
-func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64,
- visitor segment.DocumentFieldValueVisitor) error {
- // first make sure this is a valid number in this segment
- if num < s.numDocs {
- meta, compressed := s.getDocStoredMetaAndCompressed(num)
-
- vdc.reader.Reset(meta)
-
- // handle _id field special case
- idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- idFieldVal := compressed[:idFieldValLen]
-
- keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
- if !keepGoing {
- visitDocumentCtxPool.Put(vdc)
- return nil
- }
-
- // handle non-"_id" fields
- compressed = compressed[idFieldValLen:]
-
- uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
- if err != nil {
- return err
- }
-
- for keepGoing {
- field, err := binary.ReadUvarint(&vdc.reader)
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
- typ, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- offset, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- l, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- numap, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- var arrayPos []uint64
- if numap > 0 {
- if cap(vdc.arrayPos) < int(numap) {
- vdc.arrayPos = make([]uint64, numap)
- }
- arrayPos = vdc.arrayPos[:numap]
- for i := 0; i < int(numap); i++ {
- ap, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return err
- }
- arrayPos[i] = ap
- }
- }
-
- value := uncompressed[offset : offset+l]
- keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
- }
-
- vdc.buf = uncompressed
- }
- return nil
-}
-
-// DocID returns the value of the _id field for the given docNum
-func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
- if num >= s.numDocs {
- return nil, nil
- }
-
- vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
-
- meta, compressed := s.getDocStoredMetaAndCompressed(num)
-
- vdc.reader.Reset(meta)
-
- // handle _id field special case
- idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
- if err != nil {
- return nil, err
- }
- idFieldVal := compressed[:idFieldValLen]
-
- visitDocumentCtxPool.Put(vdc)
-
- return idFieldVal, nil
-}
-
-// Count returns the number of documents in this segment.
-func (s *SegmentBase) Count() uint64 {
- return s.numDocs
-}
-
-// DocNumbers returns a bitset corresponding to the doc numbers of all the
-// provided _id strings
-func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
- rv := roaring.New()
-
- if len(s.fieldsMap) > 0 {
- idDict, err := s.dictionary("_id")
- if err != nil {
- return nil, err
- }
-
- postingsList := emptyPostingsList
-
- sMax, err := idDict.fst.GetMaxKey()
- if err != nil {
- return nil, err
- }
- sMaxStr := string(sMax)
- filteredIds := make([]string, 0, len(ids))
- for _, id := range ids {
- if id <= sMaxStr {
- filteredIds = append(filteredIds, id)
- }
- }
-
- for _, id := range filteredIds {
- postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
- if err != nil {
- return nil, err
- }
- postingsList.OrInto(rv)
- }
- }
-
- return rv, nil
-}
-
-// Fields returns the field names used in this segment
-func (s *SegmentBase) Fields() []string {
- return s.fieldsInv
-}
-
-// Path returns the path of this segment on disk
-func (s *Segment) Path() string {
- return s.path
-}
-
-// Close releases all resources associated with this segment
-func (s *Segment) Close() (err error) {
- return s.DecRef()
-}
-
-func (s *Segment) closeActual() (err error) {
- if s.mm != nil {
- err = s.mm.Unmap()
- }
- // try to close file even if unmap failed
- if s.f != nil {
- err2 := s.f.Close()
- if err == nil {
- // try to return first error
- err = err2
- }
- }
- return
-}
-
-// some helpers i started adding for the command-line utility
-
-// Data returns the underlying mmaped data slice
-func (s *Segment) Data() []byte {
- return s.mm
-}
-
-// CRC returns the CRC value stored in the file footer
-func (s *Segment) CRC() uint32 {
- return s.crc
-}
-
-// Version returns the file version in the file footer
-func (s *Segment) Version() uint32 {
- return s.version
-}
-
-// ChunkFactor returns the chunk factor in the file footer
-func (s *Segment) ChunkFactor() uint32 {
- return s.chunkFactor
-}
-
-// FieldsIndexOffset returns the fields index offset in the file footer
-func (s *Segment) FieldsIndexOffset() uint64 {
- return s.fieldsIndexOffset
-}
-
-// StoredIndexOffset returns the stored value index offset in the file footer
-func (s *Segment) StoredIndexOffset() uint64 {
- return s.storedIndexOffset
-}
-
-// DocValueOffset returns the docValue offset in the file footer
-func (s *Segment) DocValueOffset() uint64 {
- return s.docValueOffset
-}
-
-// NumDocs returns the number of documents in the file footer
-func (s *Segment) NumDocs() uint64 {
- return s.numDocs
-}
-
-// DictAddr is a helper function to compute the file offset where the
-// dictionary is stored for the specified field.
-func (s *Segment) DictAddr(field string) (uint64, error) {
- fieldIDPlus1, ok := s.fieldsMap[field]
- if !ok {
- return 0, fmt.Errorf("no such field '%s'", field)
- }
-
- return s.dictLocs[fieldIDPlus1-1], nil
-}
-
-func (s *SegmentBase) loadDvReaders() error {
- if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
- return nil
- }
-
- var read uint64
- for fieldID, field := range s.fieldsInv {
- var fieldLocStart, fieldLocEnd uint64
- var n int
- fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
- if n <= 0 {
- return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
- }
- read += uint64(n)
- fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
- if n <= 0 {
- return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
- }
- read += uint64(n)
-
- fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
- if err != nil {
- return err
- }
- if fieldDvReader != nil {
- s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
- s.fieldDvNames = append(s.fieldDvNames, field)
- }
- }
-
- return nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
deleted file mode 100644
index cddaedd007..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package zap
-
-import (
- "encoding/binary"
- "io"
-
- "github.com/RoaringBitmap/roaring"
-)
-
-// writes out the length of the roaring bitmap in bytes as varint
-// then writes out the roaring bitmap itself
-func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
- reuseBufVarint []byte) (int, error) {
- buf, err := r.ToBytes()
- if err != nil {
- return 0, err
- }
-
- var tw int
-
- // write out the length
- n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
- nw, err := w.Write(reuseBufVarint[:n])
- tw += nw
- if err != nil {
- return tw, err
- }
-
- // write out the roaring bytes
- nw, err = w.Write(buf)
- tw += nw
- if err != nil {
- return tw, err
- }
-
- return tw, nil
-}
-
-func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
- var rv uint64
- var fieldsOffsets []uint64
-
- for fieldID, fieldName := range fieldsInv {
- // record start of this field
- fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))
-
- // write out the dict location and field name length
- _, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
- if err != nil {
- return 0, err
- }
-
- // write out the field name
- _, err = w.Write([]byte(fieldName))
- if err != nil {
- return 0, err
- }
- }
-
- // now write out the fields index
- rv = uint64(w.Count())
- for fieldID := range fieldsInv {
- err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
- if err != nil {
- return 0, err
- }
- }
-
- return rv, nil
-}
-
-// FooterSize is the size of the footer record in bytes
-// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
-const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8
-
-func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
- chunkFactor uint32, crcBeforeFooter uint32, writerIn io.Writer) error {
- w := NewCountHashWriter(writerIn)
- w.crc = crcBeforeFooter
-
- // write out the number of docs
- err := binary.Write(w, binary.BigEndian, numDocs)
- if err != nil {
- return err
- }
- // write out the stored field index location:
- err = binary.Write(w, binary.BigEndian, storedIndexOffset)
- if err != nil {
- return err
- }
- // write out the field index location
- err = binary.Write(w, binary.BigEndian, fieldsIndexOffset)
- if err != nil {
- return err
- }
- // write out the fieldDocValue location
- err = binary.Write(w, binary.BigEndian, docValueOffset)
- if err != nil {
- return err
- }
- // write out 32-bit chunk factor
- err = binary.Write(w, binary.BigEndian, chunkFactor)
- if err != nil {
- return err
- }
- // write out 32-bit version
- err = binary.Write(w, binary.BigEndian, Version)
- if err != nil {
- return err
- }
- // write out CRC-32 of everything upto but not including this CRC
- err = binary.Write(w, binary.BigEndian, w.crc)
- if err != nil {
- return err
- }
- return nil
-}
-
-func writeUvarints(w io.Writer, vals ...uint64) (tw int, err error) {
- buf := make([]byte, binary.MaxVarintLen64)
- for _, val := range vals {
- n := binary.PutUvarint(buf, val)
- var nw int
- nw, err = w.Write(buf[:n])
- tw += nw
- if err != nil {
- return tw, err
- }
- }
- return tw, err
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/zap.md b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/zap.md
deleted file mode 100644
index d74dc548b8..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/zap.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# ZAP File Format
-
-## Legend
-
-### Sections
-
- |========|
- | | section
- |========|
-
-### Fixed-size fields
-
- |--------| |----| |--| |-|
- | | uint64 | | uint32 | | uint16 | | uint8
- |--------| |----| |--| |-|
-
-### Varints
-
- |~~~~~~~~|
- | | varint(up to uint64)
- |~~~~~~~~|
-
-### Arbitrary-length fields
-
- |--------...---|
- | | arbitrary-length field (string, vellum, roaring bitmap)
- |--------...---|
-
-### Chunked data
-
- [--------]
- [ ]
- [--------]
-
-## Overview
-
-Footer section describes the configuration of particular ZAP file. The format of footer is version-dependent, so it is necessary to check `V` field before the parsing.
-
- |==================================================|
- | Stored Fields |
- |==================================================|
- |-----> | Stored Fields Index |
- | |==================================================|
- | | Dictionaries + Postings + DocValues |
- | |==================================================|
- | |---> | DocValues Index |
- | | |==================================================|
- | | | Fields |
- | | |==================================================|
- | | |-> | Fields Index |
- | | | |========|========|========|========|====|====|====|
- | | | | D# | SF | F | FDV | CF | V | CC | (Footer)
- | | | |========|====|===|====|===|====|===|====|====|====|
- | | | | | |
- |-+-+-----------------| | |
- | |--------------------------| |
- |-------------------------------------|
-
- D#. Number of Docs.
- SF. Stored Fields Index Offset.
- F. Field Index Offset.
- FDV. Field DocValue Offset.
- CF. Chunk Factor.
- V. Version.
- CC. CRC32.
-
-## Stored Fields
-
-Stored Fields Index is `D#` consecutive 64-bit unsigned integers - offsets, where relevant Stored Fields Data records are located.
-
- 0 [SF] [SF + D# * 8]
- | Stored Fields | Stored Fields Index |
- |================================|==================================|
- | | |
- | |--------------------| ||--------|--------|. . .|--------||
- | |-> | Stored Fields Data | || 0 | 1 | | D# - 1 ||
- | | |--------------------| ||--------|----|---|. . .|--------||
- | | | | |
- |===|============================|==============|===================|
- | |
- |-------------------------------------------|
-
-Stored Fields Data is an arbitrary size record, which consists of metadata and [Snappy](https://github.com/golang/snappy)-compressed data.
-
- Stored Fields Data
- |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
- | MDS | CDS | MD | CD |
- |~~~~~~~~|~~~~~~~~|~~~~~~~~...~~~~~~~~|~~~~~~~~...~~~~~~~~|
-
- MDS. Metadata size.
- CDS. Compressed data size.
- MD. Metadata.
- CD. Snappy-compressed data.
-
-## Fields
-
-Fields Index section located between addresses `F` and `len(file) - len(footer)` and consist of `uint64` values (`F1`, `F2`, ...) which are offsets to records in Fields section. We have `F# = (len(file) - len(footer) - F) / sizeof(uint64)` fields.
-
-
- (...) [F] [F + F#]
- | Fields | Fields Index. |
- |================================|================================|
- | | |
- | |~~~~~~~~|~~~~~~~~|---...---|||--------|--------|...|--------||
- ||->| Dict | Length | Name ||| 0 | 1 | | F# - 1 ||
- || |~~~~~~~~|~~~~~~~~|---...---|||--------|----|---|...|--------||
- || | | |
- ||===============================|==============|=================|
- | |
- |----------------------------------------------|
-
-
-## Dictionaries + Postings
-
-Each of fields has its own dictionary, encoded in [Vellum](https://github.com/couchbase/vellum) format. Dictionary consists of pairs `(term, offset)`, where `offset` indicates the position of postings (list of documents) for this particular term.
-
- |================================================================|- Dictionaries +
- | | Postings +
- | | DocValues
- | Freq/Norm (chunked) |
- | [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~] |
- | |->[ Freq | Norm (float32 under varint) ] |
- | | [~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~] |
- | | |
- | |------------------------------------------------------------| |
- | Location Details (chunked) | |
- | [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~] | |
- | |->[ Size | Pos | Start | End | Arr# | ArrPos | ... ] | |
- | | [~~~~~~|~~~~~|~~~~~~~|~~~~~|~~~~~~|~~~~~~~~|~~~~~] | |
- | | | |
- | |----------------------| | |
- | Postings List | | |
- | |~~~~~~~~|~~~~~|~~|~~~~~~~~|-----------...--| | |
- | |->| F/N | LD | Length | ROARING BITMAP | | |
- | | |~~~~~|~~|~~~~~~~~|~~~~~~~~|-----------...--| | |
- | | |----------------------------------------------| |
- | |--------------------------------------| |
- | Dictionary | |
- | |~~~~~~~~|--------------------------|-...-| |
- | |->| Length | VELLUM DATA : (TERM -> OFFSET) | |
- | | |~~~~~~~~|----------------------------...-| |
- | | |
- |======|=========================================================|- DocValues Index
- | | |
- |======|=========================================================|- Fields
- | | |
- | |~~~~|~~~|~~~~~~~~|---...---| |
- | | Dict | Length | Name | |
- | |~~~~~~~~|~~~~~~~~|---...---| |
- | |
- |================================================================|
-
-## DocValues
-
-DocValues Index is `F#` pairs of varints, one pair per field. Each pair of varints indicates start and end point of DocValues slice.
-
- |================================================================|
- | |------...--| |
- | |->| DocValues |<-| |
- | | |------...--| | |
- |==|=================|===========================================|- DocValues Index
- ||~|~~~~~~~~~|~~~~~~~|~~| |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
- || DV1 START | DV1 STOP | . . . . . | DV(F#) START | DV(F#) END ||
- ||~~~~~~~~~~~|~~~~~~~~~~| |~~~~~~~~~~~~~~|~~~~~~~~~~~~||
- |================================================================|
-
-DocValues is chunked Snappy-compressed values for each document and field.
-
- [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
- [ Doc# in Chunk | Doc1 | Offset1 | ... | DocN | OffsetN | SNAPPY COMPRESSED DATA ]
- [~~~~~~~~~~~~~~~|~~~~~~|~~~~~~~~~|-...-|~~~~~~|~~~~~~~~~|--------------------...-]
-
-Last 16 bytes are description of chunks.
-
- |~~~~~~~~~~~~...~|----------------|----------------|
- | Chunk Sizes | Chunk Size Arr | Chunk# |
- |~~~~~~~~~~~~...~|----------------|----------------|
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go
new file mode 100644
index 0000000000..01eda7fbd5
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go
@@ -0,0 +1,77 @@
+// Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+ "fmt"
+
+ "github.com/blevesearch/bleve/index/scorch/segment"
+
+ zapv11 "github.com/blevesearch/zap/v11"
+ zapv12 "github.com/blevesearch/zap/v12"
+)
+
+var supportedSegmentPlugins map[string]map[uint32]segment.Plugin
+var defaultSegmentPlugin segment.Plugin
+
+func init() {
+ ResetPlugins()
+ RegisterPlugin(zapv12.Plugin(), false)
+ RegisterPlugin(zapv11.Plugin(), true)
+}
+
+func ResetPlugins() {
+ supportedSegmentPlugins = map[string]map[uint32]segment.Plugin{}
+}
+
+func RegisterPlugin(plugin segment.Plugin, makeDefault bool) {
+ if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
+ supportedSegmentPlugins[plugin.Type()] = map[uint32]segment.Plugin{}
+ }
+ supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
+ if makeDefault {
+ defaultSegmentPlugin = plugin
+ }
+}
+
+func SupportedSegmentTypes() (rv []string) {
+ for k := range supportedSegmentPlugins {
+ rv = append(rv, k)
+ }
+ return
+}
+
+func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
+ for k := range supportedSegmentPlugins[typ] {
+ rv = append(rv, k)
+ }
+ return rv
+}
+
+func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
+ forcedSegmentVersion uint32) error {
+ if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
+ if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
+ s.segPlugin = segPlugin
+ return nil
+ }
+ return fmt.Errorf(
+ "unsupported version %d for segment type: %s, supported: %v",
+ forcedSegmentVersion, forcedSegmentType,
+ SupportedSegmentTypeVersions(forcedSegmentType))
+ }
+ return fmt.Errorf("unsupported segment type: %s, supported: %v",
+ forcedSegmentType, SupportedSegmentTypes())
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
index 470868d0eb..7cc87bdea0 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
@@ -17,9 +17,10 @@ package scorch
import (
"fmt"
"log"
+ "os"
"github.com/blevesearch/bleve/index/scorch/segment"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
type RollbackPoint struct {
@@ -34,13 +35,22 @@ func (r *RollbackPoint) GetInternal(key []byte) []byte {
// RollbackPoints returns an array of rollback points available for
// the application to rollback to, with more recent rollback points
// (higher epochs) coming first.
-func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
- if s.rootBolt == nil {
- return nil, fmt.Errorf("RollbackPoints: root is nil")
+func RollbackPoints(path string) ([]*RollbackPoint, error) {
+ if len(path) == 0 {
+ return nil, fmt.Errorf("RollbackPoints: invalid path")
+ }
+
+ rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
+ rootBoltOpt := &bolt.Options{
+ ReadOnly: true,
+ }
+ rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
+ if err != nil || rootBolt == nil {
+ return nil, err
}
// start a read-only bolt transaction
- tx, err := s.rootBolt.Begin(false)
+ tx, err := rootBolt.Begin(false)
if err != nil {
return nil, fmt.Errorf("RollbackPoints: failed to start" +
" read-only transaction")
@@ -49,6 +59,7 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
// read-only bolt transactions to be rolled back
defer func() {
_ = tx.Rollback()
+ _ = rootBolt.Close()
}()
snapshots := tx.Bucket(boltSnapshotsBucket)
@@ -105,69 +116,98 @@ func (s *Scorch) RollbackPoints() ([]*RollbackPoint, error) {
return rollbackPoints, nil
}
-// Rollback atomically and durably (if unsafeBatch is unset) brings
-// the store back to the point in time as represented by the
-// RollbackPoint. Rollback() should only be passed a RollbackPoint
-// that came from the same store using the RollbackPoints() API.
-func (s *Scorch) Rollback(to *RollbackPoint) error {
+// Rollback atomically and durably brings the store back to the point
+// in time as represented by the RollbackPoint.
+// Rollback() should only be passed a RollbackPoint that came from the
+// same store using the RollbackPoints() API along with the index path.
+func Rollback(path string, to *RollbackPoint) error {
if to == nil {
return fmt.Errorf("Rollback: RollbackPoint is nil")
}
-
- if s.rootBolt == nil {
- return fmt.Errorf("Rollback: root is nil")
+ if len(path) == 0 {
+ return fmt.Errorf("Rollback: index path is empty")
}
- revert := &snapshotReversion{}
-
- s.rootLock.Lock()
+ rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
+ rootBoltOpt := &bolt.Options{
+ ReadOnly: false,
+ }
+ rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
+ if err != nil || rootBolt == nil {
+ return err
+ }
+ defer func() {
+ err1 := rootBolt.Close()
+ if err1 != nil && err == nil {
+ err = err1
+ }
+ }()
- err := s.rootBolt.View(func(tx *bolt.Tx) error {
+ // pick all the younger persisted epochs in bolt store
+ // including the target one.
+ var found bool
+ var eligibleEpochs []uint64
+ err = rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil {
- return fmt.Errorf("Rollback: no snapshots available")
- }
-
- pos := segment.EncodeUvarintAscending(nil, to.epoch)
-
- snapshot := snapshots.Bucket(pos)
- if snapshot == nil {
- return fmt.Errorf("Rollback: snapshot not found")
+ return nil
}
-
- indexSnapshot, err := s.loadSnapshot(snapshot)
- if err != nil {
- return fmt.Errorf("Rollback: unable to load snapshot: %v", err)
- }
-
- // add segments referenced by loaded index snapshot to the
- // ineligibleForRemoval map
- for _, segSnap := range indexSnapshot.segment {
- filename := zapFileName(segSnap.id)
- s.ineligibleForRemoval[filename] = true
+ sc := snapshots.Cursor()
+ for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
+ _, snapshotEpoch, err := segment.DecodeUvarintAscending(sk)
+ if err != nil {
+ continue
+ }
+ if snapshotEpoch == to.epoch {
+ found = true
+ }
+ eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
}
-
- revert.snapshot = indexSnapshot
- revert.applied = make(chan error)
- revert.persisted = make(chan error)
-
return nil
})
- s.rootLock.Unlock()
+ if len(eligibleEpochs) == 0 {
+ return fmt.Errorf("Rollback: no persisted epochs found in bolt")
+ }
+ if !found {
+ return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
+ }
+ // start a write transaction
+ tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
- // introduce the reversion
- s.revertToSnapshots <- revert
+ defer func() {
+ if err == nil {
+ err = tx.Commit()
+ } else {
+ _ = tx.Rollback()
+ }
+ if err == nil {
+ err = rootBolt.Sync()
+ }
+ }()
- // block until this snapshot is applied
- err = <-revert.applied
- if err != nil {
- return fmt.Errorf("Rollback: failed with err: %v", err)
+ snapshots := tx.Bucket(boltSnapshotsBucket)
+ if snapshots == nil {
+ return nil
+ }
+ for _, epoch := range eligibleEpochs {
+ k := segment.EncodeUvarintAscending(nil, epoch)
+ if err != nil {
+ continue
+ }
+ if epoch == to.epoch {
+ // return here as it already processed until the given epoch
+ return nil
+ }
+ err = snapshots.DeleteBucket(k)
+ if err == bolt.ErrBucketNotFound {
+ err = nil
+ }
}
- return <-revert.persisted
+ return err
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
index 6549fddf51..e638362a71 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
@@ -98,10 +98,12 @@ type Stats struct {
TotFileSegmentsAtRoot uint64
TotFileMergeWrittenBytes uint64
- TotFileMergeZapBeg uint64
- TotFileMergeZapEnd uint64
- TotFileMergeZapTime uint64
- MaxFileMergeZapTime uint64
+ TotFileMergeZapBeg uint64
+ TotFileMergeZapEnd uint64
+ TotFileMergeZapTime uint64
+ MaxFileMergeZapTime uint64
+ TotFileMergeZapIntroductionTime uint64
+ MaxFileMergeZapIntroductionTime uint64
TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
index 4b5019f1f2..cf4da87c35 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
@@ -17,7 +17,7 @@ package boltdb
import (
"bytes"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
type Iterator struct {
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
index 4cd94183c6..7977ebbe56 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
@@ -16,7 +16,7 @@ package boltdb
import (
"github.com/blevesearch/bleve/index/store"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
type Reader struct {
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
index 56613d5315..3c749693c0 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
@@ -30,7 +30,7 @@ import (
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
- bolt "github.com/etcd-io/bbolt"
+ bolt "go.etcd.io/bbolt"
)
const (
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
index 24f5aae949..8e915c6ad5 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
@@ -820,7 +820,8 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
if numUpdates > 0 {
go func() {
- for _, doc := range batch.IndexOps {
+ for k := range batch.IndexOps {
+ doc := batch.IndexOps[k]
if doc != nil {
aw := index.NewAnalysisWork(udc, doc, resultChan)
// put the work on the queue
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
index a027a12c22..8d4afb63a0 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/topn.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
@@ -17,6 +17,7 @@ package collector
import (
"context"
"reflect"
+ "strconv"
"time"
"github.com/blevesearch/bleve/index"
@@ -90,6 +91,18 @@ func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *Top
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
+
+ for pos, ss := range sort {
+ if ss.RequiresDocID() {
+ rv.searchAfter.ID = after[pos]
+ }
+ if ss.RequiresScoring() {
+ if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
+ rv.searchAfter.Score = score
+ }
+ }
+ }
+
return rv
}
diff --git a/vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go b/vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go
index 6f6ecedf55..9c63f7fb6c 100644
--- a/vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go
+++ b/vendor/github.com/blevesearch/bleve/search/highlight/fragmenter/simple/simple.go
@@ -58,6 +58,11 @@ OUTER:
// push back towards beginning
// without cross maxbegin
for start > 0 && used < s.fragmentSize {
+ if start > len(orig) {
+ // bail if out of bounds, possibly due to token replacement
+ // e.g with a regexp replacement
+ continue OUTER
+ }
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
index 38cb6467fb..c4b8af9270 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
@@ -224,7 +224,8 @@ func ComputeGeoRange(term uint64, shift uint,
func buildRectFilter(dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
- var lon, lat float64
+ // check geo matches against all numeric type terms indexed
+ var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
@@ -234,15 +235,19 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
- lon = geo.MortonUnhashLon(uint64(i64))
- lat = geo.MortonUnhashLat(uint64(i64))
+ lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+ lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
if err == nil && found {
- return geo.BoundingBoxContains(lon, lat,
- minLon, minLat, maxLon, maxLat)
+ for i := range lons {
+ if geo.BoundingBoxContains(lons[i], lats[i],
+ minLon, minLat, maxLon, maxLat) {
+ return true
+ }
+ }
}
return false
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
index b01ae6a0af..b6f2932445 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
@@ -83,7 +83,7 @@ func boxSearcher(indexReader index.IndexReader,
return boxSearcher, nil
}
- // build geoboundinggox searcher for that bounding box
+ // build geoboundingbox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, checkBoundaries)
@@ -96,7 +96,8 @@ func boxSearcher(indexReader index.IndexReader,
func buildDistFilter(dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
- var lon, lat float64
+ // check geo matches against all numeric type terms indexed
+ var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
@@ -106,16 +107,18 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
- lon = geo.MortonUnhashLon(uint64(i64))
- lat = geo.MortonUnhashLat(uint64(i64))
+ lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+ lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
if err == nil && found {
- dist := geo.Haversin(lon, lat, centerLon, centerLat)
- if dist <= maxDist/1000 {
- return true
+ for i := range lons {
+ dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
+ if dist <= maxDist/1000 {
+ return true
+ }
}
}
return false
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
index 3bb47519d0..5f16aa8d26 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
@@ -15,6 +15,7 @@
package searcher
import (
+ "fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
@@ -26,6 +27,10 @@ func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
+ if len(polygon) < 3 {
+ return nil, fmt.Errorf("Too few points specified for the polygon boundary")
+ }
+
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
@@ -63,7 +68,8 @@ func almostEqual(a, b float64) bool {
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
- var lon, lat float64
+ // check geo matches against all numeric type terms indexed
+ var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
@@ -73,8 +79,8 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
- lon = geo.MortonUnhashLon(uint64(i64))
- lat = geo.MortonUnhashLat(uint64(i64))
+ lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
+ lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
@@ -84,26 +90,36 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
- var inside bool
- // check for a direct vertex match
- if almostEqual(polygon[0].Lat, lat) &&
- almostEqual(polygon[0].Lon, lon) {
- return true
+ if len(polygon) < 3 {
+ return false
+ }
+ rayIntersectsSegment := func(point, a, b geo.Point) bool {
+ return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
+ point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
}
- for i := 1; i < nVertices; i++ {
- if almostEqual(polygon[i].Lat, lat) &&
- almostEqual(polygon[i].Lon, lon) {
+ for i := range lons {
+ pt := geo.Point{Lon: lons[i], Lat: lats[i]}
+ inside := rayIntersectsSegment(pt, polygon[len(polygon)-1], polygon[0])
+ // check for a direct vertex match
+ if almostEqual(polygon[0].Lat, lats[i]) &&
+ almostEqual(polygon[0].Lon, lons[i]) {
return true
}
- if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
- lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
- (polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
- inside = !inside
+
+ for j := 1; j < nVertices; j++ {
+ if almostEqual(polygon[j].Lat, lats[i]) &&
+ almostEqual(polygon[j].Lon, lons[i]) {
+ return true
+ }
+ if rayIntersectsSegment(pt, polygon[j-1], polygon[j]) {
+ inside = !inside
+ }
+ }
+ if inside {
+ return true
}
}
- return inside
-
}
return false
}