diff options
Diffstat (limited to 'vendor/github.com/blevesearch/bleve')
45 files changed, 1377 insertions, 534 deletions
diff --git a/vendor/github.com/blevesearch/bleve/.travis.yml b/vendor/github.com/blevesearch/bleve/.travis.yml index 35f7b60f23..e00e7b9948 100644 --- a/vendor/github.com/blevesearch/bleve/.travis.yml +++ b/vendor/github.com/blevesearch/bleve/.travis.yml @@ -3,9 +3,9 @@ sudo: false language: go go: - - "1.9.x" - "1.10.x" - "1.11.x" + - "1.12.x" script: - go get golang.org/x/tools/cmd/cover @@ -15,7 +15,12 @@ script: - gvt restore - go test -race -v $(go list ./... | grep -v vendor/) - go vet $(go list ./... | grep -v vendor/) - - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) + - go test ./test -v -indexType scorch + - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then + echo "errcheck skipped for go version" $TRAVIS_GO_VERSION; + else + errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/); + fi - docs/project-code-coverage.sh - docs/build_children.sh diff --git a/vendor/github.com/blevesearch/bleve/document/field_text.go b/vendor/github.com/blevesearch/bleve/document/field_text.go index c8e871c9d5..6bd74c7127 100644 --- a/vendor/github.com/blevesearch/bleve/document/field_text.go +++ b/vendor/github.com/blevesearch/bleve/document/field_text.go @@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { return fieldLength, tokenFreqs } +func (t *TextField) Analyzer() *analysis.Analyzer { + return t.analyzer +} + func (t *TextField) Value() []byte { return t.value } diff --git a/vendor/github.com/blevesearch/bleve/geo/geo.go b/vendor/github.com/blevesearch/bleve/geo/geo.go index 86861b4f3b..583451e308 100644 --- a/vendor/github.com/blevesearch/bleve/geo/geo.go +++ b/vendor/github.com/blevesearch/bleve/geo/geo.go @@ -37,6 +37,12 @@ var geoTolerance = 1E-6 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 +// Point represents a geo point. +type Point struct { + Lon float64 + Lat float64 +} + // MortonHash computes the morton hash value for the provided geo point // This point is ordered as lon, lat. func MortonHash(lon, lat float64) uint64 { @@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error { } return nil } + +func BoundingRectangleForPolygon(polygon []Point) ( + float64, float64, float64, float64, error) { + err := checkLongitude(polygon[0].Lon) + if err != nil { + return 0, 0, 0, 0, err + } + err = checkLatitude(polygon[0].Lat) + if err != nil { + return 0, 0, 0, 0, err + } + maxY, minY := polygon[0].Lat, polygon[0].Lat + maxX, minX := polygon[0].Lon, polygon[0].Lon + for i := 1; i < len(polygon); i++ { + err := checkLongitude(polygon[i].Lon) + if err != nil { + return 0, 0, 0, 0, err + } + err = checkLatitude(polygon[i].Lat) + if err != nil { + return 0, 0, 0, 0, err + } + + maxY = math.Max(maxY, polygon[i].Lat) + minY = math.Min(minY, polygon[i].Lat) + + maxX = math.Max(maxX, polygon[i].Lon) + minX = math.Min(minX, polygon[i].Lon) + } + + return minX, maxY, maxX, minY, nil +} diff --git a/vendor/github.com/blevesearch/bleve/geo/geohash.go b/vendor/github.com/blevesearch/bleve/geo/geohash.go index 35db720c0f..d3d4dfa8b5 100644 --- a/vendor/github.com/blevesearch/bleve/geo/geohash.go +++ b/vendor/github.com/blevesearch/bleve/geo/geohash.go @@ -1,32 +1,21 @@ -// The code here was obtained from: -// https://github.com/mmcloughlin/geohash - -// The MIT License (MIT) -// Copyright (c) 2015 Michael McLoughlin -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// This implementation is inspired from the geohash-js +// ref: https://github.com/davetroy/geohash-js package geo -import ( - "math" -) - // encoding encapsulates an encoding defined by a given base32 alphabet. type encoding struct { enc string @@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding { return e } -// Decode string into bits of a 64-bit word. The string s may be at most 12 -// characters. -func (e *encoding) decode(s string) uint64 { - x := uint64(0) - for i := 0; i < len(s); i++ { - x = (x << 5) | uint64(e.dec[s[i]]) - } - return x -} - -// Encode bits of 64-bit word into a string. -func (e *encoding) encode(x uint64) string { - b := [12]byte{} - for i := 0; i < 12; i++ { - b[11-i] = e.enc[x&0x1f] - x >>= 5 - } - return string(b[:]) -} - -// Base32Encoding with the Geohash alphabet. +// base32encoding with the Geohash alphabet. var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") -// BoundingBox returns the region encoded by the given string geohash. -func geoBoundingBox(hash string) geoBox { - bits := uint(5 * len(hash)) - inthash := base32encoding.decode(hash) - return geoBoundingBoxIntWithPrecision(inthash, bits) -} - -// Box represents a rectangle in latitude/longitude space. -type geoBox struct { - minLat float64 - maxLat float64 - minLng float64 - maxLng float64 -} - -// Round returns a point inside the box, making an effort to round to minimal -// precision. -func (b geoBox) round() (lat, lng float64) { - x := maxDecimalPower(b.maxLat - b.minLat) - lat = math.Ceil(b.minLat/x) * x - x = maxDecimalPower(b.maxLng - b.minLng) - lng = math.Ceil(b.minLng/x) * x - return -} - -// precalculated for performance -var exp232 = math.Exp2(32) - -// errorWithPrecision returns the error range in latitude and longitude for in -// integer geohash with bits of precision. -func errorWithPrecision(bits uint) (latErr, lngErr float64) { - b := int(bits) - latBits := b / 2 - lngBits := b - latBits - latErr = math.Ldexp(180.0, -latBits) - lngErr = math.Ldexp(360.0, -lngBits) - return -} - -// minDecimalPlaces returns the minimum number of decimal places such that -// there must exist an number with that many places within any range of width -// r. This is intended for returning minimal precision coordinates inside a -// box. -func maxDecimalPower(r float64) float64 { - m := int(math.Floor(math.Log10(r))) - return math.Pow10(m) -} - -// Encode the position of x within the range -r to +r as a 32-bit integer. -func encodeRange(x, r float64) uint32 { - p := (x + r) / (2 * r) - return uint32(p * exp232) -} - -// Decode the 32-bit range encoding X back to a value in the range -r to +r. -func decodeRange(X uint32, r float64) float64 { - p := float64(X) / exp232 - x := 2*r*p - r - return x -} - -// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are -// ignored, and may take any value. -func squash(X uint64) uint32 { - X &= 0x5555555555555555 - X = (X | (X >> 1)) & 0x3333333333333333 - X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f - X = (X | (X >> 4)) & 0x00ff00ff00ff00ff - X = (X | (X >> 8)) & 0x0000ffff0000ffff - X = (X | (X >> 16)) & 0x00000000ffffffff - return uint32(X) -} +var masks = []uint64{16, 8, 4, 2, 1} + +// DecodeGeoHash decodes the string geohash faster with +// higher precision. This api is in experimental phase. +func DecodeGeoHash(geoHash string) (float64, float64) { + even := true + lat := []float64{-90.0, 90.0} + lon := []float64{-180.0, 180.0} + + for i := 0; i < len(geoHash); i++ { + cd := uint64(base32encoding.dec[geoHash[i]]) + for j := 0; j < 5; j++ { + if even { + if cd&masks[j] > 0 { + lon[0] = (lon[0] + lon[1]) / 2 + } else { + lon[1] = (lon[0] + lon[1]) / 2 + } + } else { + if cd&masks[j] > 0 { + lat[0] = (lat[0] + lat[1]) / 2 + } else { + lat[1] = (lat[0] + lat[1]) / 2 + } + } + even = !even + } + } -// Deinterleave the bits of X into 32-bit words containing the even and odd -// bitlevels of X, respectively. -func deinterleave(X uint64) (uint32, uint32) { - return squash(X), squash(X >> 1) + return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2 } -// BoundingBoxIntWithPrecision returns the region encoded by the integer -// geohash with the specified precision. -func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox { - fullHash := hash << (64 - bits) - latInt, lngInt := deinterleave(fullHash) - lat := decodeRange(latInt, 90) - lng := decodeRange(lngInt, 180) - latErr, lngErr := errorWithPrecision(bits) - return geoBox{ - minLat: lat, - maxLat: lat + latErr, - minLng: lng, - maxLng: lng + lngErr, +func EncodeGeoHash(lat, lon float64) string { + even := true + lats := []float64{-90.0, 90.0} + lons := []float64{-180.0, 180.0} + precision := 12 + var ch, bit uint64 + var geoHash string + + for len(geoHash) < precision { + if even { + mid := (lons[0] + lons[1]) / 2 + if lon > mid { + ch |= masks[bit] + lons[0] = mid + } else { + lons[1] = mid + } + } else { + mid := (lats[0] + lats[1]) / 2 + if lat > mid { + ch |= masks[bit] + lats[0] = mid + } else { + lats[1] = mid + } + } + even = !even + if bit < 4 { + bit++ + } else { + geoHash += string(base32encoding.enc[ch]) + ch = 0 + bit = 0 + } } -} - -// ---------------------------------------------------------------------- -// Decode the string geohash to a (lat, lng) point. -func GeoHashDecode(hash string) (lat, lng float64) { - box := geoBoundingBox(hash) - return box.round() + return geoHash } diff --git a/vendor/github.com/blevesearch/bleve/geo/parse.go b/vendor/github.com/blevesearch/bleve/geo/parse.go index 0511fea7b6..5d833d9110 100644 --- a/vendor/github.com/blevesearch/bleve/geo/parse.go +++ b/vendor/github.com/blevesearch/bleve/geo/parse.go @@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { } } else { // geohash - lat, lon = GeoHashDecode(geoStr) + lat, lon = DecodeGeoHash(geoStr) foundLat = true foundLon = true } diff --git a/vendor/github.com/blevesearch/bleve/index.go b/vendor/github.com/blevesearch/bleve/index.go index 99357eee01..ef6ede9343 100644 --- a/vendor/github.com/blevesearch/bleve/index.go +++ b/vendor/github.com/blevesearch/bleve/index.go @@ -117,6 +117,8 @@ func (b *Batch) String() string { // be re-used in the future. func (b *Batch) Reset() { b.internal.Reset() + b.lastDocSize = 0 + b.totalSize = 0 } func (b *Batch) Merge(o *Batch) { diff --git a/vendor/github.com/blevesearch/bleve/index/index.go b/vendor/github.com/blevesearch/bleve/index/index.go index 6aa444cfd8..3e866f3aab 100644 --- a/vendor/github.com/blevesearch/bleve/index/index.go +++ b/vendor/github.com/blevesearch/bleve/index/index.go @@ -121,6 +121,10 @@ type IndexReaderOnly interface { FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) } +type IndexReaderContains interface { + FieldDictContains(field string) (FieldDictContains, error) +} + // FieldTerms contains the terms used by a document, keyed by field type FieldTerms map[string][]string @@ -230,6 +234,10 @@ type FieldDict interface { Close() error } +type FieldDictContains interface { + Contains(key []byte) (bool, error) +} + // DocIDReader is the interface exposing enumeration of documents identifiers. // Close the reader to release associated resources. type DocIDReader interface { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go index 2d04bd38e5..ac627796f5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go @@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { fileSegments++ } } + } // before the newMerge introduction, need to clean the newly @@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { } } } - // In case where all the docs in the newly merged segment getting // deleted by the time we reach here, can skip the introduction. if nextMerge.new != nil && @@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { newSnapshot.AddRef() // 1 ref for the nextMerge.notify response newSnapshot.updateSize() - s.rootLock.Lock() // swap in new index snapshot newSnapshot.epoch = s.nextSnapshotEpoch @@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { } newSnapshot.updateSize() - // swap in new snapshot rootPrev := s.root s.root = newSnapshot diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go index bcbf5b7106..d7144772fd 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "os" + "strings" "sync/atomic" "time" @@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) return nil } - atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) // process tasks in serial for now var notifications []chan *IndexSnapshot + var filenames []string for _, task := range resultMergePlan.Tasks { if len(task.Segments) == 0 { atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) @@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, segmentsToMerge = append(segmentsToMerge, zapSeg) docsToDrop = append(docsToDrop, segSnapshot.deleted) } + // track the files getting merged for unsetting the + // removal ineligibility. This helps to unflip files + // even with fast merger, slow persister work flows. + path := zapSeg.Path() + filenames = append(filenames, + strings.TrimPrefix(path, s.path+string(os.PathSeparator))) } } } @@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) return err } + err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment)) + if err != nil { + s.unmarkIneligibleForRemoval(filename) + return fmt.Errorf("merge validation failed: %v", err) + } oldNewDocNums = make(map[uint64][]uint64) for i, segNewDocNums := range newDocNums { oldNewDocNums[task.Segments[i].Id()] = segNewDocNums @@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, } } + // once all the newly merged segment introductions are done, + // its safe to unflip the removal ineligibility for the replaced + // older segments + for _, f := range filenames { + s.unmarkIneligibleForRemoval(f) + } + return nil } @@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotMemMergeErr, 1) return nil, 0, err } + err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment)) + if err != nil { + return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err) + } // update persisted stats atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go index 349ccdc0e9..064e9e6a85 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go @@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() { var persistWatchers []*epochWatcher var lastPersistedEpoch, lastMergedEpoch uint64 var ew *epochWatcher + + var unpersistedCallbacks []index.BatchCallback + po, err := s.parsePersisterOptions() if err != nil { s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) @@ -111,7 +114,6 @@ OUTER: if ew != nil && ew.epoch > lastMergedEpoch { lastMergedEpoch = ew.epoch } - lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, lastMergedEpoch, persistWatchers, po) @@ -150,11 +152,25 @@ OUTER: _ = ourSnapshot.DecRef() break OUTER } + + // save this current snapshot's persistedCallbacks, to invoke during + // the retry attempt + unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) + s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) _ = ourSnapshot.DecRef() atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) continue OUTER } + + if unpersistedCallbacks != nil { + // in the event of this being a retry attempt for persisting a snapshot + // that had earlier failed, prepend the persistedCallbacks associated + // with earlier segment(s) to the latest persistedCallbacks + ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) + unpersistedCallbacks = nil + } + for i := range ourPersistedCallbacks { ourPersistedCallbacks[i](err) } @@ -179,7 +195,6 @@ OUTER: s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) if changed { - s.removeOldData() atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) continue OUTER } @@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64, return watchersNext } -func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, - persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { +func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, + lastMergedEpoch uint64, persistWatchers []*epochWatcher, + po *persisterOptions) (uint64, []*epochWatcher) { - // first, let the watchers proceed if they lag behind + // First, let the watchers proceed if they lag behind persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) - // check the merger lag by counting the segment files on disk, + // Check the merger lag by counting the segment files on disk, + numFilesOnDisk, _ := s.diskFileStats() + // On finding fewer files on disk, persister takes a short pause // for sufficient in-memory segments to pile up for the next // memory merge cum persist loop. - // On finding too many files on disk, persister pause until the merger - // catches up to reduce the segment file count under the threshold. - // But if there is memory pressure, then skip this sleep maneuvers. - numFilesOnDisk, _ := s.diskFileStats() if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && po.PersisterNapTimeMSec > 0 && s.paused() == 0 { select { @@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM return lastMergedEpoch, persistWatchers } + // Finding too many files on disk could be due to two reasons. + // 1. Too many older snapshots awaiting the clean up. + // 2. The merger could be lagging behind on merging the disk files. + if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) { + s.removeOldData() + numFilesOnDisk, _ = s.diskFileStats() + } + + // Persister pause until the merger catches up to reduce the segment + // file count under the threshold. + // But if there is memory pressure, then skip this sleep maneuvers. OUTER: for po.PersisterNapUnderNumFiles > 0 && numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && @@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { } func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { + rv := &IndexSnapshot{ parent: s, internal: make(map[string][]byte), refs: 1, creator: "loadSnapshot", } - var running uint64 c := snapshot.Cursor() for k, _ := c.First(); k != nil; k, _ = c.Next() { @@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { running += segmentSnapshot.segment.Count() } } - return rv, nil } @@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() { if err != nil { s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) } + atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed)) - if removed > 0 { - err = s.removeOldZapFiles() - if err != nil { - s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) - } + err = s.removeOldZapFiles() + if err != nil { + s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) } } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go index 3f3d8bffce..44a97d1ea6 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go @@ -41,12 +41,14 @@ const Version uint8 = 2 var ErrClosed = fmt.Errorf("scorch closed") type Scorch struct { + nextSegmentID uint64 + stats Stats + iStats internalStats + readOnly bool version uint8 config map[string]interface{} analysisQueue *index.AnalysisQueue - stats Stats - nextSegmentID uint64 path string unsafeBatch bool @@ -73,8 +75,6 @@ type Scorch struct { onEvent func(event Event) onAsyncError func(err error) - iStats internalStats - pauseLock sync.RWMutex pauseCount uint64 @@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { // FIXME could sort ids list concurrent with analysis? - if len(batch.IndexOps) > 0 { + if numUpdates > 0 { go func() { for _, doc := range batch.IndexOps { if doc != nil { @@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["CurOnDiskBytes"] = numBytesUsedDisk m["CurOnDiskFiles"] = numFilesOnDisk + s.rootLock.RLock() + m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval)) + s.rootLock.RUnlock() // TODO: consider one day removing these backwards compatible // names for apps using the old names m["updates"] = m["TotUpdates"] diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go index 165a01bc16..fdc407a747 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go @@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte, return &EmptyDictionaryIterator{} } +func (e *EmptyDictionary) Contains(key []byte) (bool, error) { + return false, nil +} + type EmptyDictionaryIterator struct{} func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { return nil, nil } +func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) { + return false, nil +} + func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { return nil, nil } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go index a4836ebf8a..55299d8f7a 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go @@ -19,7 +19,10 @@ package segment -import "fmt" +import ( + "errors" + "fmt" +) const ( MaxVarintSize = 9 @@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { } return b[length:], v, nil } + +// ------------------------------------------------------------ + +type MemUvarintReader struct { + C int // index of next byte to read from S + S []byte +} + +func NewMemUvarintReader(s []byte) *MemUvarintReader { + return &MemUvarintReader{S: s} +} + +// Len returns the number of unread bytes. +func (r *MemUvarintReader) Len() int { + n := len(r.S) - r.C + if n < 0 { + return 0 + } + return n +} + +var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow") + +// ReadUvarint reads an encoded uint64. The original code this was +// based on is at encoding/binary/ReadUvarint(). +func (r *MemUvarintReader) ReadUvarint() (uint64, error) { + var x uint64 + var s uint + var C = r.C + var S = r.S + + for { + b := S[C] + C++ + + if b < 0x80 { + r.C = C + + // why 63? The original code had an 'i += 1' loop var and + // checked for i > 9 || i == 9 ...; but, we no longer + // check for the i var, but instead check here for s, + // which is incremented by 7. So, 7*9 == 63. + // + // why the "extra" >= check? The normal case is that s < + // 63, so we check this single >= guard first so that we + // hit the normal, nil-error return pathway sooner. + if s >= 63 && (s > 63 || s == 63 && b > 1) { + return 0, ErrMemUvarintReaderOverflow + } + + return x | uint64(b)<<s, nil + } + + x |= uint64(b&0x7f) << s + s += 7 + } +} + +// SkipUvarint skips ahead one encoded uint64. +func (r *MemUvarintReader) SkipUvarint() { + for { + b := r.S[r.C] + r.C++ + + if b < 0x80 { + return + } + } +} + +// SkipBytes skips a count number of bytes. +func (r *MemUvarintReader) SkipBytes(count int) { + r.C = r.C + count +} + +func (r *MemUvarintReader) Reset(s []byte) { + r.C = 0 + r.S = s +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go index 3aa151d64d..3a31f41498 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go @@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string { s = s.Sub[0] } - if s.Op == syntax.OpLiteral { + if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) { return string(s.Rune) } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go index b94d6f979f..34c2bc2048 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go @@ -59,6 +59,8 @@ type TermDictionary interface { AutomatonIterator(a vellum.Automaton, startKeyInclusive, endKeyExclusive []byte) DictionaryIterator OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator + + Contains(key []byte) (bool, error) } type DictionaryIterator interface { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go index 91bfd4e24e..c02333cee0 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go @@ -16,6 +16,7 @@ package zap import ( "bufio" + "github.com/couchbase/vellum" "math" "os" ) @@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, docValueOffset: docValueOffset, dictLocs: dictLocs, fieldDvReaders: make(map[uint16]*docValueReader), + fieldFSTs: make(map[uint16]*vellum.FST), } sb.updateSize() diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go index 2c0e1bf2ad..ad4a8f8dc5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go @@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) return rv } +func (d *Dictionary) Contains(key []byte) (bool, error) { + return d.fst.Contains(key) +} + // Iterator returns an iterator for this dictionary func (d *Dictionary) Iterator() segment.DictionaryIterator { rv := &DictionaryIterator{ @@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator } // need to increment the end position to be inclusive - endBytes := []byte(end) - if endBytes[len(endBytes)-1] < 0xff { - endBytes[len(endBytes)-1]++ - } else { - endBytes = append(endBytes, 0xff) + var endBytes []byte + if len(end) > 0 { + endBytes = []byte(end) + if endBytes[len(endBytes)-1] < 0xff { + endBytes[len(endBytes)-1]++ + } else { + endBytes = append(endBytes, 0xff) + } } if d.fst != nil { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go index bcc0f94728..a819ca239f 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go @@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error type docVisitState struct { dvrs map[uint16]*docValueReader - segment *Segment + segment *SegmentBase } type docValueReader struct { @@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { // get the docValue offset for the given fields if fieldDvLocStart == fieldNotUninverted { - return nil, fmt.Errorf("loadFieldDocValueReader: "+ - "no docValues found for field: %s", field) + // no docValues found, nothing to do + return nil, nil } // read the number of chunks, and chunk offsets position @@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) // acquire position of chunk offsets chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen + } else { + return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart) } fdvIter := &docValueReader{ @@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { // VisitDocumentFieldTerms is an implementation of the // DocumentFieldTermVisitable interface -func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, +func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error) { dvs, ok := dvsIn.(*docVisitState) @@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { // check if the chunk is already loaded if docInChunk != dvr.curChunkNumber() { - err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) + err := dvr.loadDvChunk(docInChunk, s) if err != nil { return dvs, err } @@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, // VisitableDocValueFields returns the list of fields with // persisted doc value terms ready to be visitable using the // VisitDocumentFieldTerms method. -func (s *Segment) VisitableDocValueFields() ([]string, error) { +func (s *SegmentBase) VisitableDocValueFields() ([]string, error) { return s.fieldDvNames, nil } diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go index 4ef222c1a2..50bd7207a5 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go @@ -31,6 +31,14 @@ import ( var DefaultFileMergerBufferSize = 1024 * 1024 +// ValidateMerge can be set by applications to perform additional checks +// on a new segment produced by a merge, by default this does nothing. +// Caller should provide EITHER segments or memSegments, but not both. +// This API is experimental and may be removed at any time. +var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error { + return nil +} + const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc // Merge takes a slice of zap segments and bit masks describing which diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go index 22b69913e4..c108ec16dd 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go @@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100 var NewSegmentBufferNumResultsFactor float64 = 1.0 var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 +// ValidateDocFields can be set by applications to perform additional checks +// on fields in a document being added to a new segment, by default it does +// nothing. +// This API is experimental and may be removed at any time. +var ValidateDocFields = func(field document.Field) error { + return nil +} + // AnalysisResultsToSegmentBase produces an in-memory zap-encoded // SegmentBase from analysis results func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, @@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() ( if opts.IncludeDocValues() { s.IncludeDocValues[fieldID] = true } + + err := ValidateDocFields(field) + if err != nil { + return 0, err + } } var curr int diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go index 26378c27e0..4c43fdb9b9 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go @@ -15,10 +15,8 @@ package zap import ( - "bytes" "encoding/binary" "fmt" - "io" "math" "reflect" @@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, } rv.postings = p - rv.includeFreqNorm = includeFreq || includeNorm + rv.includeFreqNorm = includeFreq || includeNorm || includeLocs rv.includeLocs = includeLocs if p.normBits1Hit != 0 { @@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, // Count returns the number of items on this postings list func (p *PostingsList) Count() uint64 { - var n uint64 + var n, e uint64 if p.normBits1Hit != 0 { n = 1 + if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) { + e = 1 + } } else if p.postings != nil { n = p.postings.GetCardinality() - } - var e uint64 - if p.except != nil { - e = p.except.GetCardinality() - } - if n <= e { - return 0 + if p.except != nil { + e = p.postings.AndCardinality(p.except) + } } return n - e } @@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error { // PostingsIterator provides a way to iterate through the postings list type PostingsIterator struct { postings *PostingsList - all roaring.IntIterable - Actual roaring.IntIterable + all roaring.IntPeekable + Actual roaring.IntPeekable ActualBM *roaring.Bitmap currChunk uint32 currChunkFreqNorm []byte currChunkLoc []byte - freqNormReader *bytes.Reader - locReader *bytes.Reader + freqNormReader *segment.MemUvarintReader + locReader *segment.MemUvarintReader freqChunkOffsets []uint64 freqChunkStart uint64 @@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { end += e i.currChunkFreqNorm = i.postings.sb.mem[start:end] if i.freqNormReader == nil { - i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm) + i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm) } else { i.freqNormReader.Reset(i.currChunkFreqNorm) } @@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { end += e i.currChunkLoc = i.postings.sb.mem[start:end] if i.locReader == nil { - i.locReader = bytes.NewReader(i.currChunkLoc) + i.locReader = segment.NewMemUvarintReader(i.currChunkLoc) } else { i.locReader.Reset(i.currChunkLoc) } @@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) { return 1, i.normBits1Hit, false, nil } - freqHasLocs, err := binary.ReadUvarint(i.freqNormReader) + freqHasLocs, err := i.freqNormReader.ReadUvarint() if err != nil { return 0, 0, false, fmt.Errorf("error reading frequency: %v", err) } + freq, hasLocs := decodeFreqHasLocs(freqHasLocs) - normBits, err := binary.ReadUvarint(i.freqNormReader) + normBits, err := i.freqNormReader.ReadUvarint() if err != nil { return 0, 0, false, fmt.Errorf("error reading norm: %v", err) } - return freq, normBits, hasLocs, err + return freq, normBits, hasLocs, nil +} + +func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) { + if i.normBits1Hit != 0 { + return false, nil + } + + freqHasLocs, err := i.freqNormReader.ReadUvarint() + if err != nil { + return false, fmt.Errorf("error reading freqHasLocs: %v", err) + } + + i.freqNormReader.SkipUvarint() // Skip normBits. + + return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs. } func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 { @@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) { } // readLocation processes all the integers on the stream representing a single -// location. if you care about it, pass in a non-nil location struct, and we -// will fill it. if you don't care about it, pass in nil and we safely consume -// the contents. +// location. func (i *PostingsIterator) readLocation(l *Location) error { // read off field - fieldID, err := binary.ReadUvarint(i.locReader) + fieldID, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location field: %v", err) } // read off pos - pos, err := binary.ReadUvarint(i.locReader) + pos, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location pos: %v", err) } // read off start - start, err := binary.ReadUvarint(i.locReader) + start, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location start: %v", err) } // read off end - end, err := binary.ReadUvarint(i.locReader) + end, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location end: %v", err) } // read off num array pos - numArrayPos, err := binary.ReadUvarint(i.locReader) + numArrayPos, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location num array pos: %v", err) } - // group these together for less branching - if l != nil { - l.field = i.postings.sb.fieldsInv[fieldID] - l.pos = pos - l.start = start - l.end = end - if cap(l.ap) < int(numArrayPos) { - l.ap = make([]uint64, int(numArrayPos)) - } else { - l.ap = l.ap[:int(numArrayPos)] - } + l.field = i.postings.sb.fieldsInv[fieldID] + l.pos = pos + l.start = start + l.end = end + + if cap(l.ap) < int(numArrayPos) { + l.ap = make([]uint64, int(numArrayPos)) + } else { + l.ap = l.ap[:int(numArrayPos)] } // read off array positions for k := 0; k < int(numArrayPos); k++ { - ap, err := binary.ReadUvarint(i.locReader) + ap, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading array position: %v", err) } - if l != nil { - l.ap[k] = ap - } + + l.ap[k] = ap } return nil @@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err } rv.locs = i.nextSegmentLocs[:0] - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return nil, fmt.Errorf("error reading location numLocsBytes: %v", err) } @@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() ( if hasLocs { startLoc := len(i.currChunkLoc) - i.locReader.Len() - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return 0, 0, 0, nil, nil, fmt.Errorf("error reading location nextBytes numLocs: %v", err) } // skip over all the location bytes - _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) - if err != nil { - return 0, 0, 0, nil, nil, err - } + i.locReader.SkipBytes(int(numLocsBytes)) endLoc := len(i.currChunkLoc) - i.locReader.Len() bytesLoc = i.currChunkLoc[startLoc:endLoc] @@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, return i.nextDocNumAtOrAfterClean(atOrAfter) } - n := i.Actual.Next() - for uint64(n) < atOrAfter && i.Actual.HasNext() { - n = i.Actual.Next() - } - if uint64(n) < atOrAfter { + i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) + + if !i.Actual.HasNext() { // couldn't find anything return 0, false, nil } + + n := i.Actual.Next() allN := i.all.Next() nChunk := n / i.postings.sb.chunkFactor @@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, // no deletions) where the all bitmap is the same as the actual bitmap func (i *PostingsIterator) nextDocNumAtOrAfterClean( atOrAfter uint64) (uint64, bool, error) { - n := i.Actual.Next() if !i.includeFreqNorm { - for uint64(n) < atOrAfter && i.Actual.HasNext() { - n = i.Actual.Next() - } + i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) - if uint64(n) < atOrAfter { + if !i.Actual.HasNext() { return 0, false, nil // couldn't find anything } - return uint64(n), true, nil + return uint64(i.Actual.Next()), true, nil } // freq-norm's needed, so maintain freq-norm chunk reader sameChunkNexts := 0 // # of times we called Next() in the same chunk - + n := i.Actual.Next() nChunk := n / i.postings.sb.chunkFactor for uint64(n) < atOrAfter && i.Actual.HasNext() { @@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error { } // read off freq/offsets even though we don't care about them - _, _, hasLocs, err := i.readFreqNormHasLocs() + hasLocs, err := i.skipFreqNormReadHasLocs() if err != nil { return err } if i.includeLocs && hasLocs { - numLocsBytes, err := binary.ReadUvarint(i.locReader) + numLocsBytes, err := i.locReader.ReadUvarint() if err != nil { return fmt.Errorf("error reading location numLocsBytes: %v", err) } // skip over all the location bytes - _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) - if err != nil { - return err - } + i.locReader.SkipBytes(int(numLocsBytes)) } return nil diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go index 7ba28c2366..5aa33a26c9 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go @@ -20,8 +20,8 @@ import ( "fmt" "io" "os" - "reflect" "sync" + "unsafe" "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/index/scorch/segment" @@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int func init() { var sb SegmentBase - reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) + reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb)) } // Open returns a zap impl of a segment @@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) { mem: mm[0 : len(mm)-FooterSize], fieldsMap: make(map[string]uint16), fieldDvReaders: make(map[uint16]*docValueReader), + fieldFSTs: make(map[uint16]*vellum.FST), }, f: f, mm: mm, @@ -101,6 +102,9 @@ type SegmentBase struct { fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field fieldDvNames []string // field names cached in fieldDvReaders size uint64 + + m sync.Mutex + fieldFSTs map[uint16]*vellum.FST } func (sb *SegmentBase) Size() int { @@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { dictStart := sb.dictLocs[rv.fieldID] if dictStart > 0 { - // read the length of the vellum data - vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) - fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] - if fstBytes != nil { + var ok bool + sb.m.Lock() + if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok { + // read the length of the vellum data + vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) + fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] rv.fst, err = vellum.Load(fstBytes) if err != nil { + sb.m.Unlock() return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) } - rv.fstReader, err = rv.fst.Reader() - if err != nil { - return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) - } + + sb.fieldFSTs[rv.fieldID] = rv.fst } + + sb.m.Unlock() + rv.fstReader, err = rv.fst.Reader() + if err != nil { + return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) + } + } } @@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) { } func (s *SegmentBase) loadDvReaders() error { - if s.docValueOffset == fieldNotUninverted { + if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 { return nil } @@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error { } read += uint64(n) - fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) + fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) + if err != nil { + return err + } if fieldDvReader != nil { s.fieldDvReaders[uint16(fieldID)] = fieldDvReader s.fieldDvNames = append(s.fieldDvNames, field) diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go index 8babb31fa4..47cc809b21 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go @@ -28,13 +28,14 @@ import ( "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/scorch/segment" "github.com/couchbase/vellum" - lev2 "github.com/couchbase/vellum/levenshtein2" + lev "github.com/couchbase/vellum/levenshtein" ) // re usable, threadsafe levenshtein builders -var lb1, lb2 *lev2.LevenshteinAutomatonBuilder +var lb1, lb2 *lev.LevenshteinAutomatonBuilder type asynchSegmentResult struct { + dict segment.TermDictionary dictItr segment.DictionaryIterator index int @@ -51,11 +52,11 @@ func init() { var is interface{} = IndexSnapshot{} reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) var err error - lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) + lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true) if err != nil { panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) } - lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) + lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true) if err != nil { panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) } @@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() { } } -func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { +func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, + makeItr func(i segment.TermDictionary) segment.DictionaryIterator, + randomLookup bool) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) for index, segment := range i.segment { @@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if err != nil { results <- &asynchSegmentResult{err: err} } else { - results <- &asynchSegmentResult{dictItr: makeItr(dict)} + if randomLookup { + results <- &asynchSegmentResult{dict: dict} + } else { + results <- &asynchSegmentResult{dictItr: makeItr(dict)} + } } }(index, segment) } @@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if asr.err != nil && err == nil { err = asr.err } else { - next, err2 := asr.dictItr.Next() - if err2 != nil && err == nil { - err = err2 - } - if next != nil { + if !randomLookup { + next, err2 := asr.dictItr.Next() + if err2 != nil && err == nil { + err = err2 + } + if next != nil { + rv.cursors = append(rv.cursors, &segmentDictCursor{ + itr: asr.dictItr, + curr: *next, + }) + } + } else { rv.cursors = append(rv.cursors, &segmentDictCursor{ - itr: asr.dictItr, - curr: *next, + dict: asr.dict, }) } } @@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s if err != nil { return nil, err } - // prepare heap - heap.Init(rv) + + if !randomLookup { + // prepare heap + heap.Init(rv) + } return rv, nil } @@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.Iterator() - }) + }, false) } func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.RangeIterator(string(startTerm), string(endTerm)) - }) + }, false) } func (i *IndexSnapshot) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.PrefixIterator(string(termPrefix)) - }) + }, false) } func (i *IndexSnapshot) FieldDictRegexp(field string, @@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) - }) + }, false) } func (i *IndexSnapshot) getLevAutomaton(term string, @@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) - }) + }, false) } func (i *IndexSnapshot) FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.OnlyIterator(onlyTerms, includeCount) - }) + }, false) +} + +func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) { + return i.newIndexSnapshotFieldDict(field, nil, true) } func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go index abd3bde8c1..47486c2554 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go @@ -22,6 +22,7 @@ import ( ) type segmentDictCursor struct { + dict segment.TermDictionary itr segment.DictionaryIterator curr index.DictEntry } @@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { func (i *IndexSnapshotFieldDict) Close() error { return nil } + +func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) { + if len(i.cursors) == 0 { + return false, nil + } + + for _, cursor := range i.cursors { + if found, _ := cursor.dict.Contains(key); found { + return true, nil + } + } + + return false, nil +} diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go index f3a2c56a98..96742b4f94 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go @@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { } type cachedDocs struct { + size uint64 m sync.Mutex // As the cache is asynchronously prepared, need a lock cache map[string]*cachedFieldDocs // Keyed by field - size uint64 } func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go index 2eb832f2cf..6549fddf51 100644 --- a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go +++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go @@ -107,6 +107,9 @@ type Stats struct { TotFileMergeIntroductionsDone uint64 TotFileMergeIntroductionsSkipped uint64 + CurFilesIneligibleForRemoval uint64 + TotSnapshotsRemovedFromMetaStore uint64 + TotMemMergeBeg uint64 TotMemMergeErr uint64 TotMemMergeDone uint64 diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go index e4bc3d8f02..24f5aae949 100644 --- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go +++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go @@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error { func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() - numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) @@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { return } + return udc.UpdateWithAnalysis(doc, result, backIndexRow) +} + +func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document, + result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) { // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter @@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) - atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) + atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes()) } else { atomic.AddUint64(&udc.stats.errors, 1) } @@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] } func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { + persistedCallback := batch.PersistedCallback() + if persistedCallback != nil { + defer persistedCallback(err) + } analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) @@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { } } - if len(batch.IndexOps) > 0 { + if numUpdates > 0 { go func() { for _, doc := range batch.IndexOps { if doc != nil { @@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&udc.stats.errors, 1) } - persistedCallback := batch.PersistedCallback() - if persistedCallback != nil { - persistedCallback(err) - } return } diff --git a/vendor/github.com/blevesearch/bleve/index_alias_impl.go b/vendor/github.com/blevesearch/bleve/index_alias_impl.go index 335fcade2e..4366fc7956 100644 --- a/vendor/github.com/blevesearch/bleve/index_alias_impl.go +++ b/vendor/github.com/blevesearch/bleve/index_alias_impl.go @@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { Sort: req.Sort.Copy(), IncludeLocations: req.IncludeLocations, Score: req.Score, + SearchAfter: req.SearchAfter, + SearchBefore: req.SearchBefore, } return &rv } @@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se searchStart := time.Now() asyncResults := make(chan *asyncSearchResult, len(indexes)) + var reverseQueryExecution bool + if req.SearchBefore != nil { + reverseQueryExecution = true + req.Sort.Reverse() + req.SearchAfter = req.SearchBefore + req.SearchBefore = nil + } + // run search on each index in separate go routine var waitGroup sync.WaitGroup @@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se // sort all hits with the requested order if len(req.Sort) > 0 { - sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) + sorter := newSearchHitSorter(req.Sort, sr.Hits) sort.Sort(sorter) } @@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se sr.Facets.Fixup(name, fr.Size) } + if reverseQueryExecution { + // reverse the sort back to the original + req.Sort.Reverse() + // resort using the original order + mhs := newSearchHitSorter(req.Sort, sr.Hits) + sort.Sort(mhs) + // reset request + req.SearchBefore = req.SearchAfter + req.SearchAfter = nil + } + // fix up original request sr.Request = req searchDuration := time.Since(searchStart) @@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error { defer f.index.mutex.RUnlock() return f.fieldDict.Close() } - -type multiSearchHitSorter struct { - hits search.DocumentMatchCollection - sort search.SortOrder - cachedScoring []bool - cachedDesc []bool -} - -func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { - return &multiSearchHitSorter{ - sort: sort, - hits: hits, - cachedScoring: sort.CacheIsScore(), - cachedDesc: sort.CacheDescending(), - } -} - -func (m *multiSearchHitSorter) Len() int { return len(m.hits) } -func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } -func (m *multiSearchHitSorter) Less(i, j int) bool { - c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) - return c < 0 -} diff --git a/vendor/github.com/blevesearch/bleve/index_impl.go b/vendor/github.com/blevesearch/bleve/index_impl.go index fe61b8064a..6324d960eb 100644 --- a/vendor/github.com/blevesearch/bleve/index_impl.go +++ b/vendor/github.com/blevesearch/bleve/index_impl.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "os" + "sort" "sync" "sync/atomic" "time" @@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr return nil, ErrorIndexClosed } - collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) + var reverseQueryExecution bool + if req.SearchBefore != nil { + reverseQueryExecution = true + req.Sort.Reverse() + req.SearchAfter = req.SearchBefore + req.SearchBefore = nil + } + + var coll *collector.TopNCollector + if req.SearchAfter != nil { + coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter) + } else { + coll = collector.NewTopNCollector(req.Size, req.From, req.Sort) + } // open a reader for this search indexReader, err := i.i.Reader() @@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr facetsBuilder.Add(facetName, facetBuilder) } } - collector.SetFacetsBuilder(facetsBuilder) + coll.SetFacetsBuilder(facetsBuilder) } - memNeeded := memNeededForSearch(req, searcher, collector) + memNeeded := memNeededForSearch(req, searcher, coll) if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { err = cbF(memNeeded) @@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } } - err = collector.Collect(ctx, searcher, indexReader) + err = coll.Collect(ctx, searcher, indexReader) if err != nil { return nil, err } - hits := collector.Results() + hits := coll.Results() var highlighter highlight.Highlighter @@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } for _, hit := range hits { - if len(req.Fields) > 0 || highlighter != nil { - doc, err := indexReader.Document(hit.ID) - if err == nil && doc != nil { - if len(req.Fields) > 0 { - fieldsToLoad := deDuplicate(req.Fields) - for _, f := range fieldsToLoad { - for _, docF := range doc.Fields { - if f == "*" || docF.Name() == f { - var value interface{} - switch docF := docF.(type) { - case *document.TextField: - value = string(docF.Value()) - case *document.NumericField: - num, err := docF.Number() - if err == nil { - value = num - } - case *document.DateTimeField: - datetime, err := docF.DateTime() - if err == nil { - value = datetime.Format(time.RFC3339) - } - case *document.BooleanField: - boolean, err := docF.Boolean() - if err == nil { - value = boolean - } - case *document.GeoPointField: - lon, err := docF.Lon() - if err == nil { - lat, err := docF.Lat() - if err == nil { - value = []float64{lon, lat} - } - } - } - if value != nil { - hit.AddFieldValue(docF.Name(), value) - } - } - } - } - } - if highlighter != nil { - highlightFields := req.Highlight.Fields - if highlightFields == nil { - // add all fields with matches - highlightFields = make([]string, 0, len(hit.Locations)) - for k := range hit.Locations { - highlightFields = append(highlightFields, k) - } - } - for _, hf := range highlightFields { - highlighter.BestFragmentsInField(hit, doc, hf, 1) - } - } - } else if doc == nil { - // unexpected case, a doc ID that was found as a search hit - // was unable to be found during document lookup - return nil, ErrorIndexReadInconsistency - } - } if i.name != "" { hit.Index = i.name } + err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) + if err != nil { + return nil, err + } } atomic.AddUint64(&i.stats.searches, 1) @@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr logger.Printf("slow search took %s - %v", searchDuration, req) } + if reverseQueryExecution { + // reverse the sort back to the original + req.Sort.Reverse() + // resort using the original order + mhs := newSearchHitSorter(req.Sort, hits) + sort.Sort(mhs) + // reset request + req.SearchBefore = req.SearchAfter + req.SearchAfter = nil + } + return &SearchResult{ Status: &SearchStatus{ Total: 1, @@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr }, Request: req, Hits: hits, - Total: collector.Total(), - MaxScore: collector.MaxScore(), + Total: coll.Total(), + MaxScore: coll.MaxScore(), Took: searchDuration, - Facets: collector.FacetResults(), + Facets: coll.FacetResults(), }, nil } +func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, + indexName string, r index.IndexReader, + highlighter highlight.Highlighter) error { + if len(req.Fields) > 0 || highlighter != nil { + doc, err := r.Document(hit.ID) + if err == nil && doc != nil { + if len(req.Fields) > 0 { + fieldsToLoad := deDuplicate(req.Fields) + for _, f := range fieldsToLoad { + for _, docF := range doc.Fields { + if f == "*" || docF.Name() == f { + var value interface{} + switch docF := docF.(type) { + case *document.TextField: + value = string(docF.Value()) + case *document.NumericField: + num, err := docF.Number() + if err == nil { + value = num + } + case *document.DateTimeField: + datetime, err := docF.DateTime() + if err == nil { + value = datetime.Format(time.RFC3339) + } + case *document.BooleanField: + boolean, err := docF.Boolean() + if err == nil { + value = boolean + } + case *document.GeoPointField: + lon, err := docF.Lon() + if err == nil { + lat, err := docF.Lat() + if err == nil { + value = []float64{lon, lat} + } + } + } + if value != nil { + hit.AddFieldValue(docF.Name(), value) + } + } + } + } + } + if highlighter != nil { + highlightFields := req.Highlight.Fields + if highlightFields == nil { + // add all fields with matches + highlightFields = make([]string, 0, len(hit.Locations)) + for k := range hit.Locations { + highlightFields = append(highlightFields, k) + } + } + for _, hf := range highlightFields { + highlighter.BestFragmentsInField(hit, doc, hf, 1) + } + } + } else if doc == nil { + // unexpected case, a doc ID that was found as a search hit + // was unable to be found during document lookup + return ErrorIndexReadInconsistency + } + } + + return nil +} + // Fields returns the name of all the fields this // Index has operated on. func (i *indexImpl) Fields() (fields []string, err error) { @@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string { } return ret } + +type searchHitSorter struct { + hits search.DocumentMatchCollection + sort search.SortOrder + cachedScoring []bool + cachedDesc []bool +} + +func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter { + return &searchHitSorter{ + sort: sort, + hits: hits, + cachedScoring: sort.CacheIsScore(), + cachedDesc: sort.CacheDescending(), + } +} + +func (m *searchHitSorter) Len() int { return len(m.hits) } +func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } +func (m *searchHitSorter) Less(i, j int) bool { + c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) + return c < 0 +} diff --git a/vendor/github.com/blevesearch/bleve/mapping/document.go b/vendor/github.com/blevesearch/bleve/mapping/document.go index f950b59bef..15cb6b5fa1 100644 --- a/vendor/github.com/blevesearch/bleve/mapping/document.go +++ b/vendor/github.com/blevesearch/bleve/mapping/document.go @@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, if !propertyValue.IsNil() { switch property := property.(type) { case encoding.TextMarshaler: - - txt, err := property.MarshalText() - if err == nil && subDocMapping != nil { - // index by explicit mapping + // ONLY process TextMarshaler if there is an explicit mapping + // AND all of the fiels are of type text + // OTHERWISE process field without TextMarshaler + if subDocMapping != nil { + allFieldsText := true for _, fieldMapping := range subDocMapping.Fields { - if fieldMapping.Type == "text" { - fieldMapping.processString(string(txt), pathString, path, indexes, context) + if fieldMapping.Type != "text" { + allFieldsText = false + break } } - } else { - dm.walkDocument(property, path, indexes, context) + txt, err := property.MarshalText() + if err == nil && allFieldsText { + txtStr := string(txt) + for _, fieldMapping := range subDocMapping.Fields { + fieldMapping.processString(txtStr, pathString, path, indexes, context) + } + return + } } - + dm.walkDocument(property, path, indexes, context) default: dm.walkDocument(property, path, indexes, context) } diff --git a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go index 76ea001ba7..29bd0fc5c1 100644 --- a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go +++ b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go @@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20 type PrefixCoded []byte func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { + rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil) + return rv, err +} + +func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) ( + rv PrefixCoded, preallocRest []byte, err error) { if shift > 63 { - return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) + return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) } nChars := ((63 - shift) / 7) + 1 - rv := make(PrefixCoded, nChars+1) + + size := int(nChars + 1) + if len(prealloc) >= size { + rv = PrefixCoded(prealloc[0:size]) + preallocRest = prealloc[size:] + } else { + rv = make(PrefixCoded, size) + } + rv[0] = ShiftStartInt64 + byte(shift) sortableBits := int64(uint64(in) ^ 0x8000000000000000) @@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { nChars-- sortableBits = int64(uint64(sortableBits) >> 7) } - return rv, nil + + return rv, preallocRest, nil } func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { diff --git a/vendor/github.com/blevesearch/bleve/search.go b/vendor/github.com/blevesearch/bleve/search.go index ebd69971ef..b337edc9e4 100644 --- a/vendor/github.com/blevesearch/bleve/search.go +++ b/vendor/github.com/blevesearch/bleve/search.go @@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) { // result score explanations. // Sort describes the desired order for the results to be returned. // Score controls the kind of scoring performed +// SearchAfter supports deep paging by providing a minimum sort key +// SearchBefore supports deep paging by providing a maximum sort key // // A special field named "*" can be used to return all fields. type SearchRequest struct { @@ -275,6 +277,8 @@ type SearchRequest struct { Sort search.SortOrder `json:"sort"` IncludeLocations bool `json:"includeLocations"` Score string `json:"score,omitempty"` + SearchAfter []string `json:"search_after"` + SearchBefore []string `json:"search_before"` } func (r *SearchRequest) Validate() error { @@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error { } } + if r.SearchAfter != nil && r.SearchBefore != nil { + return fmt.Errorf("cannot use search after and search before together") + } + + if r.SearchAfter != nil { + if r.From != 0 { + return fmt.Errorf("cannot use search after with from !=0") + } + if len(r.SearchAfter) != len(r.Sort) { + return fmt.Errorf("search after must have same size as sort order") + } + } + if r.SearchBefore != nil { + if r.From != 0 { + return fmt.Errorf("cannot use search before with from !=0") + } + if len(r.SearchBefore) != len(r.Sort) { + return fmt.Errorf("search before must have same size as sort order") + } + } + return r.Facets.Validate() } @@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) { r.Sort = order } +// SetSearchAfter sets the request to skip over hits with a sort +// value less than the provided sort after key +func (r *SearchRequest) SetSearchAfter(after []string) { + r.SearchAfter = after +} + +// SetSearchBefore sets the request to skip over hits with a sort +// value greater than the provided sort before key +func (r *SearchRequest) SetSearchBefore(before []string) { + r.SearchBefore = before +} + // UnmarshalJSON deserializes a JSON representation of // a SearchRequest func (r *SearchRequest) UnmarshalJSON(input []byte) error { @@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { Sort []json.RawMessage `json:"sort"` IncludeLocations bool `json:"includeLocations"` Score string `json:"score"` + SearchAfter []string `json:"search_after"` + SearchBefore []string `json:"search_before"` } err := json.Unmarshal(input, &temp) @@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { r.Facets = temp.Facets r.IncludeLocations = temp.IncludeLocations r.Score = temp.Score + r.SearchAfter = temp.SearchAfter + r.SearchBefore = temp.SearchBefore r.Query, err = query.ParseQuery(temp.Q) if err != nil { return err diff --git a/vendor/github.com/blevesearch/bleve/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/search/collector/topn.go index 378a7b114a..a027a12c22 100644 --- a/vendor/github.com/blevesearch/bleve/search/collector/topn.go +++ b/vendor/github.com/blevesearch/bleve/search/collector/topn.go @@ -69,6 +69,7 @@ type TopNCollector struct { lowestMatchOutsideResults *search.DocumentMatch updateFieldVisitor index.DocumentFieldTermVisitor dvReader index.DocValueReader + searchAfter *search.DocumentMatch } // CheckDoneEvery controls how frequently we check the context deadline @@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024) // skipping over the first 'skip' hits // ordering hits by the provided sort order func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { + return newTopNCollector(size, skip, sort) +} + +// NewTopNCollector builds a collector to find the top 'size' hits +// skipping over the first 'skip' hits +// ordering hits by the provided sort order +func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector { + rv := newTopNCollector(size, 0, sort) + rv.searchAfter = &search.DocumentMatch{ + Sort: after, + } + return rv +} + +func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { hc := &TopNCollector{size: size, skip: skip, sort: sort} // pre-allocate space on the store to avoid reslicing @@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, searchContext := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), Collector: hc, + IndexReader: reader, } hc.dvReader, err = reader.DocValueReader(hc.neededFields) @@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler( if d == nil { return nil } + + // support search after based pagination, + // if this hit is <= the search after sort key + // we should skip it + if hc.searchAfter != nil { + // exact sort order matches use hit number to break tie + // but we want to allow for exact match, so we pretend + hc.searchAfter.HitNumber = d.HitNumber + if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 { + return nil + } + } + // optimization, we track lowest sorting hit already removed from heap // with this one comparison, we can avoid all heap operations if // this hit would have been added and then immediately removed diff --git a/vendor/github.com/blevesearch/bleve/search/query/date_range.go b/vendor/github.com/blevesearch/bleve/search/query/date_range.go index ff67a7bb70..3ac0322f55 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/date_range.go +++ b/vendor/github.com/blevesearch/bleve/search/query/date_range.go @@ -41,6 +41,14 @@ type BleveQueryTime struct { time.Time } +var MinRFC3339CompatibleTime time.Time +var MaxRFC3339CompatibleTime time.Time + +func init() { + MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z") + MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z") +} + func queryTimeFromString(t string) (time.Time, error) { dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) if err != nil { @@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { min := math.Inf(-1) max := math.Inf(1) if !q.Start.IsZero() { - min = numeric.Int64ToFloat64(q.Start.UnixNano()) + if !isDatetimeCompatible(q.Start) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) + } + startInt64 := q.Start.UnixNano() + min = numeric.Int64ToFloat64(startInt64) } if !q.End.IsZero() { - max = numeric.Int64ToFloat64(q.End.UnixNano()) + if !isDatetimeCompatible(q.End) { + // overflow + return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) + } + endInt64 := q.End.UnixNano() + max = numeric.Int64ToFloat64(endInt64) } return &min, &max, nil @@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error { } return nil } + +func isDatetimeCompatible(t BleveQueryTime) bool { + if QueryDateTimeFormat == time.RFC3339 && + (t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) { + return false + } + + return true +} diff --git a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go index 2bc1d70441..a1fc1439a6 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go +++ b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go @@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, if len(ss) < 1 { return searcher.NewMatchNoneSearcher(i) - } else if len(ss) == 1 && int(q.Min) == ss[0].Min() { - // apply optimization only if both conditions below are satisfied: - // - disjunction searcher has only 1 child searcher - // - parent searcher's min setting is equal to child searcher's min - - return ss[0], nil } return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) diff --git a/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go new file mode 100644 index 0000000000..41c7f7f3ab --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go @@ -0,0 +1,94 @@ +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "encoding/json" + "fmt" + + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/mapping" + "github.com/blevesearch/bleve/search" + "github.com/blevesearch/bleve/search/searcher" +) + +type GeoBoundingPolygonQuery struct { + Points []geo.Point `json:"polygon_points"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` +} + +func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery { + return &GeoBoundingPolygonQuery{ + Points: points} +} + +func (q *GeoBoundingPolygonQuery) SetBoost(b float64) { + boost := Boost(b) + q.BoostVal = &boost +} + +func (q *GeoBoundingPolygonQuery) Boost() float64 { + return q.BoostVal.Value() +} + +func (q *GeoBoundingPolygonQuery) SetField(f string) { + q.FieldVal = f +} + +func (q *GeoBoundingPolygonQuery) Field() string { + return q.FieldVal +} + +func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader, + m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { + field := q.FieldVal + if q.FieldVal == "" { + field = m.DefaultSearchField() + } + + return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options) +} + +func (q *GeoBoundingPolygonQuery) Validate() error { + return nil +} + +func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error { + tmp := struct { + Points []interface{} `json:"polygon_points"` + FieldVal string `json:"field,omitempty"` + BoostVal *Boost `json:"boost,omitempty"` + }{} + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + + q.Points = make([]geo.Point, 0, len(tmp.Points)) + for _, i := range tmp.Points { + // now use our generic point parsing code from the geo package + lon, lat, found := geo.ExtractGeoPoint(i) + if !found { + return fmt.Errorf("geo polygon point: %v is not in a valid format", i) + } + q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat}) + } + + q.FieldVal = tmp.FieldVal + q.BoostVal = tmp.BoostVal + return nil +} diff --git a/vendor/github.com/blevesearch/bleve/search/query/query.go b/vendor/github.com/blevesearch/bleve/search/query/query.go index c7c1eefb80..18aca228d4 100644 --- a/vendor/github.com/blevesearch/bleve/search/query/query.go +++ b/vendor/github.com/blevesearch/bleve/search/query/query.go @@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) { } return &rv, nil } + _, hasPoints := tmp["polygon_points"] + if hasPoints { + var rv GeoBoundingPolygonQuery + err := json.Unmarshal(input, &rv) + if err != nil { + return nil, err + } + return &rv, nil + } return nil, fmt.Errorf("unknown query type") } diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go index 5544f2d011..718de2ea5e 100644 --- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go +++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go @@ -40,6 +40,7 @@ type TermQueryScorer struct { idf float64 options search.SearcherOptions idfExplanation *search.Explanation + includeScore bool queryNorm float64 queryWeight float64 queryWeightExplanation *search.Explanation @@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int { func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { rv := TermQueryScorer{ - queryTerm: string(queryTerm), - queryField: queryField, - queryBoost: queryBoost, - docTerm: docTerm, - docTotal: docTotal, - idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), - options: options, - queryWeight: 1.0, + queryTerm: string(queryTerm), + queryField: queryField, + queryBoost: queryBoost, + docTerm: docTerm, + docTotal: docTotal, + idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), + options: options, + queryWeight: 1.0, + includeScore: options.Score != "none", } if options.Explain { @@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { } func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { - var scoreExplanation *search.Explanation - - // need to compute score - var tf float64 - if termMatch.Freq < MaxSqrtCache { - tf = SqrtCache[int(termMatch.Freq)] - } else { - tf = math.Sqrt(float64(termMatch.Freq)) - } - score := tf * termMatch.Norm * s.idf - - if s.options.Explain { - childrenExplanations := make([]*search.Explanation, 3) - childrenExplanations[0] = &search.Explanation{ - Value: tf, - Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), - } - childrenExplanations[1] = &search.Explanation{ - Value: termMatch.Norm, - Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), - } - childrenExplanations[2] = s.idfExplanation - scoreExplanation = &search.Explanation{ - Value: score, - Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), - Children: childrenExplanations, + rv := ctx.DocumentMatchPool.Get() + // perform any score computations only when needed + if s.includeScore || s.options.Explain { + var scoreExplanation *search.Explanation + var tf float64 + if termMatch.Freq < MaxSqrtCache { + tf = SqrtCache[int(termMatch.Freq)] + } else { + tf = math.Sqrt(float64(termMatch.Freq)) } - } + score := tf * termMatch.Norm * s.idf - // if the query weight isn't 1, multiply - if s.queryWeight != 1.0 { - score = score * s.queryWeight if s.options.Explain { - childExplanations := make([]*search.Explanation, 2) - childExplanations[0] = s.queryWeightExplanation - childExplanations[1] = scoreExplanation + childrenExplanations := make([]*search.Explanation, 3) + childrenExplanations[0] = &search.Explanation{ + Value: tf, + Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), + } + childrenExplanations[1] = &search.Explanation{ + Value: termMatch.Norm, + Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), + } + childrenExplanations[2] = s.idfExplanation scoreExplanation = &search.Explanation{ Value: score, - Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), - Children: childExplanations, + Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), + Children: childrenExplanations, } } + + // if the query weight isn't 1, multiply + if s.queryWeight != 1.0 { + score = score * s.queryWeight + if s.options.Explain { + childExplanations := make([]*search.Explanation, 2) + childExplanations[0] = s.queryWeightExplanation + childExplanations[1] = scoreExplanation + scoreExplanation = &search.Explanation{ + Value: score, + Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), + Children: childExplanations, + } + } + } + + if s.includeScore { + rv.Score = score + } + + if s.options.Explain { + rv.Expl = scoreExplanation + } } - rv := ctx.DocumentMatchPool.Get() rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) - rv.Score = score - if s.options.Explain { - rv.Expl = scoreExplanation - } if len(termMatch.Vectors) > 0 { if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { diff --git a/vendor/github.com/blevesearch/bleve/search/search.go b/vendor/github.com/blevesearch/bleve/search/search.go index f8a282d165..8ed23de454 100644 --- a/vendor/github.com/blevesearch/bleve/search/search.go +++ b/vendor/github.com/blevesearch/bleve/search/search.go @@ -17,6 +17,7 @@ package search import ( "fmt" "reflect" + "sort" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/size" @@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { return true } +func (ap ArrayPositions) Compare(other ArrayPositions) int { + for i, p := range ap { + if i >= len(other) { + return 1 + } + if p < other[i] { + return -1 + } + if p > other[i] { + return 1 + } + } + if len(ap) < len(other) { + return -1 + } + return 0 +} + type Location struct { // Pos is the position of the term within the field, starting at 1 Pos uint64 `json:"pos"` @@ -68,6 +87,46 @@ func (l *Location) Size() int { type Locations []*Location +func (p Locations) Len() int { return len(p) } +func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func (p Locations) Less(i, j int) bool { + c := p[i].ArrayPositions.Compare(p[j].ArrayPositions) + if c < 0 { + return true + } + if c > 0 { + return false + } + return p[i].Pos < p[j].Pos +} + +func (p Locations) Dedupe() Locations { // destructive! + if len(p) <= 1 { + return p + } + + sort.Sort(p) + + slow := 0 + + for _, pfast := range p { + pslow := p[slow] + if pslow.Pos == pfast.Pos && + pslow.Start == pfast.Start && + pslow.End == pfast.End && + pslow.ArrayPositions.Equals(pfast.ArrayPositions) { + continue // duplicate, so only move fast ahead + } + + slow++ + + p[slow] = pfast + } + + return p[:slow+1] +} + type TermLocationMap map[string]Locations func (t TermLocationMap) AddLocation(term string, location *Location) { @@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { var lastField string var tlm TermLocationMap + var needsDedupe bool for i, ftl := range dm.FieldTermLocations { if lastField != ftl.Field { @@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) } - tlm[ftl.Term] = append(tlm[ftl.Term], loc) + locs := tlm[ftl.Term] + + // if the loc is before or at the last location, then there + // might be duplicates that need to be deduplicated + if !needsDedupe && len(locs) > 0 { + last := locs[len(locs)-1] + cmp := loc.ArrayPositions.Compare(last.ArrayPositions) + if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) { + needsDedupe = true + } + } + + tlm[ftl.Term] = append(locs, loc) dm.FieldTermLocations[i] = FieldTermLocation{ // recycle Location: Location{ @@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { }, } } + + if needsDedupe { + for _, tlm := range dm.Locations { + for term, locs := range tlm { + tlm[term] = locs.Dedupe() + } + } + } } dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle @@ -279,6 +359,7 @@ type SearcherOptions struct { type SearchContext struct { DocumentMatchPool *DocumentMatchPool Collector Collector + IndexReader index.IndexReader } func (sc *SearchContext) Size() int { diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go index bbbced4795..7f0bfa4246 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go @@ -45,6 +45,7 @@ type BooleanSearcher struct { scorer *scorer.ConjunctionQueryScorer matches []*search.DocumentMatch initialized bool + done bool } func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { @@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { + if s.done { + return nil, nil + } + if !s.initialized { err := s.initSearchers(ctx) if err != nil { @@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch } } + if rv == nil { + s.done = true + } + return rv, nil } func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { + if s.done { + return nil, nil + } + if !s.initialized { err := s.initSearchers(ctx) if err != nil { @@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter } } - // Advance the searchers only if the currentID cursor is trailing the lookup ID, - // additionally if the mustNotSearcher has been initialized, ensure that the - // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by - // currentID) is trailing the lookup ID as well - for in the case where currentID - // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT - // advance the currentID or the currMustNot cursors. - if (s.currentID == nil || s.currentID.Compare(ID) < 0) && - (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) { + // Advance the searcher only if the cursor is trailing the lookup ID + if s.currentID == nil || s.currentID.Compare(ID) < 0 { var err error if s.mustSearcher != nil { if s.currMust != nil { @@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter } if s.mustNotSearcher != nil { - if s.currMustNot != nil { - ctx.DocumentMatchPool.Put(s.currMustNot) - } - s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) - if err != nil { - return nil, err + // Additional check for mustNotSearcher, whose cursor isn't tracked by + // currentID to prevent it from moving when the searcher's tracked + // position is already ahead of or at the requested ID. + if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 { + if s.currMustNot != nil { + ctx.DocumentMatchPool.Put(s.currMustNot) + } + s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) + if err != nil { + return nil, err + } } } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go index 289e416782..38cb6467fb 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go @@ -22,6 +22,11 @@ import ( "github.com/blevesearch/bleve/search" ) +type filterFunc func(key []byte) bool + +var GeoBitsShift1 = (geo.GeoBits << 1) +var GeoBitsShift1Minus1 = GeoBitsShift1 - 1 + func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( @@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, } // do math to produce list of terms needed for this search - onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, - minLon, minLat, maxLon, maxLat, checkBoundaries) + onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1, + minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field) + if err != nil { + return nil, err + } var onBoundarySearcher search.Searcher dvReader, err := indexReader.DocValueReader([]string{field}) @@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 func ComputeGeoRange(term uint64, shift uint, - sminLon, sminLat, smaxLon, smaxLat float64, - checkBoundaries bool) ( - onBoundary [][]byte, notOnBoundary [][]byte) { - split := term | uint64(0x1)<<shift - var upperMax uint64 - if shift < 63 { - upperMax = term | ((uint64(1) << (shift + 1)) - 1) - } else { - upperMax = 0xffffffffffffffff + sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, + indexReader index.IndexReader, field string) ( + onBoundary [][]byte, notOnBoundary [][]byte, err error) { + preallocBytesLen := 32 + preallocBytes := make([]byte, preallocBytesLen) + + makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) { + if len(preallocBytes) <= 0 { + preallocBytesLen = preallocBytesLen * 2 + preallocBytes = make([]byte, preallocBytesLen) + } + + rv, preallocBytes, err = + numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes) + + return rv + } + + var fieldDict index.FieldDictContains + var isIndexed filterFunc + if irr, ok := indexReader.(index.IndexReaderContains); ok { + fieldDict, err = irr.FieldDictContains(field) + if err != nil { + return nil, nil, err + } + + isIndexed = func(term []byte) bool { + found, err := fieldDict.Contains(term) + return err == nil && found + } } - lowerMax := split - 1 - onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift, - sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) - plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift, - sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) - onBoundary = append(onBoundary, plusOnBoundary...) - notOnBoundary = append(notOnBoundary, plusNotOnBoundary...) - return -} -func relateAndRecurse(start, end uint64, res uint, - sminLon, sminLat, smaxLon, smaxLat float64, - checkBoundaries bool) ( - onBoundary [][]byte, notOnBoundary [][]byte) { - minLon := geo.MortonUnhashLon(start) - minLat := geo.MortonUnhashLat(start) - maxLon := geo.MortonUnhashLon(end) - maxLat := geo.MortonUnhashLat(end) - - level := ((geo.GeoBits << 1) - res) >> 1 - - within := res%document.GeoPrecisionStep == 0 && - geo.RectWithin(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat) - if within || (level == geoDetailLevel && - geo.RectIntersects(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat)) { - if !within && checkBoundaries { - return [][]byte{ - numeric.MustNewPrefixCodedInt64(int64(start), res), - }, nil + defer func() { + if fieldDict != nil { + if fd, ok := fieldDict.(index.FieldDict); ok { + cerr := fd.Close() + if cerr != nil { + err = cerr + } + } } - return nil, - [][]byte{ - numeric.MustNewPrefixCodedInt64(int64(start), res), + }() + + if isIndexed == nil { + isIndexed = func(term []byte) bool { + if indexReader != nil { + reader, err := indexReader.TermFieldReader(term, field, false, false, false) + if err != nil || reader == nil { + return false + } + if reader.Count() == 0 { + _ = reader.Close() + return false + } + _ = reader.Close() } - } else if level < geoDetailLevel && - geo.RectIntersects(minLon, minLat, maxLon, maxLat, - sminLon, sminLat, smaxLon, smaxLat) { - return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, - checkBoundaries) + return true + } } - return nil, nil + + var computeGeoRange func(term uint64, shift uint) // declare for recursion + + relateAndRecurse := func(start, end uint64, res, level uint) { + minLon := geo.MortonUnhashLon(start) + minLat := geo.MortonUnhashLat(start) + maxLon := geo.MortonUnhashLon(end) + maxLat := geo.MortonUnhashLat(end) + + within := res%document.GeoPrecisionStep == 0 && + geo.RectWithin(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat) + if within || (level == geoDetailLevel && + geo.RectIntersects(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat)) { + codedTerm := makePrefixCoded(int64(start), res) + if isIndexed(codedTerm) { + if !within && checkBoundaries { + onBoundary = append(onBoundary, codedTerm) + } else { + notOnBoundary = append(notOnBoundary, codedTerm) + } + } + } else if level < geoDetailLevel && + geo.RectIntersects(minLon, minLat, maxLon, maxLat, + sminLon, sminLat, smaxLon, smaxLat) { + computeGeoRange(start, res-1) + } + } + + computeGeoRange = func(term uint64, shift uint) { + if err != nil { + return + } + + split := term | uint64(0x1)<<shift + var upperMax uint64 + if shift < 63 { + upperMax = term | ((uint64(1) << (shift + 1)) - 1) + } else { + upperMax = 0xffffffffffffffff + } + + lowerMax := split - 1 + + level := (GeoBitsShift1 - shift) >> 1 + + relateAndRecurse(term, lowerMax, shift, level) + relateAndRecurse(split, upperMax, shift, level) + } + + computeGeoRange(term, shift) + + if err != nil { + return nil, nil, err + } + + return onBoundary, notOnBoundary, err } func buildRectFilter(dvReader index.DocValueReader, field string, diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go index a15c194e86..b01ae6a0af 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go @@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, // build a searcher for the box boxSearcher, err := boxSearcher(indexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, - field, boost, options) + field, boost, options, false) if err != nil { return nil, err } @@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, // two boxes joined through a disjunction searcher func boxSearcher(indexReader index.IndexReader, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, - field string, boost float64, options search.SearcherOptions) ( + field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( search.Searcher, error) { if bottomRightLon < topLeftLon { // cross date line, rewrite as two parts leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, -180, bottomRightLat, bottomRightLon, topLeftLat, - field, boost, options, false) + field, boost, options, checkBoundaries) if err != nil { return nil, err } rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, - topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) + topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, + checkBoundaries) if err != nil { _ = leftSearcher.Close() return nil, err @@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader, // build geoboundinggox searcher for that bounding box boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, - options, false) + options, checkBoundaries) if err != nil { return nil, err } diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go new file mode 100644 index 0000000000..3bb47519d0 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go @@ -0,0 +1,110 @@ +// Copyright (c) 2019 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package searcher + +import ( + "github.com/blevesearch/bleve/geo" + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/numeric" + "github.com/blevesearch/bleve/search" + "math" +) + +func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, + polygon []geo.Point, field string, boost float64, + options search.SearcherOptions) (search.Searcher, error) { + + // compute the bounding box enclosing the polygon + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err := + geo.BoundingRectangleForPolygon(polygon) + if err != nil { + return nil, err + } + + // build a searcher for the bounding box on the polygon + boxSearcher, err := boxSearcher(indexReader, + topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, + field, boost, options, true) + if err != nil { + return nil, err + } + + dvReader, err := indexReader.DocValueReader([]string{field}) + if err != nil { + return nil, err + } + + // wrap it in a filtering searcher that checks for the polygon inclusivity + return NewFilteringSearcher(boxSearcher, + buildPolygonFilter(dvReader, field, polygon)), nil +} + +const float64EqualityThreshold = 1e-6 + +func almostEqual(a, b float64) bool { + return math.Abs(a-b) <= float64EqualityThreshold +} + +// buildPolygonFilter returns true if the point lies inside the +// polygon. It is based on the ray-casting technique as referred +// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html +func buildPolygonFilter(dvReader index.DocValueReader, field string, + polygon []geo.Point) FilterFunc { + return func(d *search.DocumentMatch) bool { + var lon, lat float64 + var found bool + + err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lon = geo.MortonUnhashLon(uint64(i64)) + lat = geo.MortonUnhashLat(uint64(i64)) + found = true + } + } + }) + + // Note: this approach works for points which are strictly inside + // the polygon. ie it might fail for certain points on the polygon boundaries. + if err == nil && found { + nVertices := len(polygon) + var inside bool + // check for a direct vertex match + if almostEqual(polygon[0].Lat, lat) && + almostEqual(polygon[0].Lon, lon) { + return true + } + + for i := 1; i < nVertices; i++ { + if almostEqual(polygon[i].Lat, lat) && + almostEqual(polygon[i].Lon, lon) { + return true + } + if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) && + lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/ + (polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon { + inside = !inside + } + } + return inside + + } + return false + } +} diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go index e52ef9a825..83107f0201 100644 --- a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go +++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go @@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, if !*inclusiveMax && maxInt64 != math.MinInt64 { maxInt64-- } + + var fieldDict index.FieldDictContains + var isIndexed filterFunc + var err error + if irr, ok := indexReader.(index.IndexReaderContains); ok { + fieldDict, err = irr.FieldDictContains(field) + if err != nil { + return nil, err + } + + isIndexed = func(term []byte) bool { + found, err := fieldDict.Contains(term) + return err == nil && found + } + } + // FIXME hard-coded precision, should match field declaration termRanges := splitInt64Range(minInt64, maxInt64, 4) - terms := termRanges.Enumerate() + terms := termRanges.Enumerate(isIndexed) + if fieldDict != nil { + if fd, ok := fieldDict.(index.FieldDict); ok { + cerr := fd.Close() + if cerr != nil { + err = cerr + } + } + } + if len(terms) < 1 { // cannot return MatchNoneSearcher because of interaction with // commit f391b991c20f02681bacd197afc6d8aed444e132 return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, true) } - var err error - terms, err = filterCandidateTerms(indexReader, terms, field) - if err != nil { - return nil, err + + // for upside_down + if isIndexed == nil { + terms, err = filterCandidateTerms(indexReader, terms, field) + if err != nil { + return nil, err + } } + if tooManyClauses(len(terms)) { return nil, tooManyClausesErr(len(terms)) } @@ -125,11 +154,17 @@ type termRange struct { endTerm []byte } -func (t *termRange) Enumerate() [][]byte { +func (t *termRange) Enumerate(filter filterFunc) [][]byte { var rv [][]byte next := t.startTerm for bytes.Compare(next, t.endTerm) <= 0 { - rv = append(rv, next) + if filter != nil { + if filter(next) { + rv = append(rv, next) + } + } else { + rv = append(rv, next) + } next = incrementBytes(next) } return rv @@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte { type termRanges []*termRange -func (tr termRanges) Enumerate() [][]byte { +func (tr termRanges) Enumerate(filter filterFunc) [][]byte { var rv [][]byte for _, tri := range tr { - trie := tri.Enumerate() + trie := tri.Enumerate(filter) rv = append(rv, trie...) } return rv diff --git a/vendor/github.com/blevesearch/bleve/search/sort.go b/vendor/github.com/blevesearch/bleve/search/sort.go index e17f707879..6e4ed80fa2 100644 --- a/vendor/github.com/blevesearch/bleve/search/sort.go +++ b/vendor/github.com/blevesearch/bleve/search/sort.go @@ -38,6 +38,8 @@ type SearchSort interface { RequiresScoring() bool RequiresFields() []string + Reverse() + Copy() SearchSort } @@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool { return rv } +func (so SortOrder) Reverse() { + for _, soi := range so { + soi.Reverse() + } +} + // SortFieldType lets you control some internal sort behavior // normally leaving this to the zero-value of SortFieldAuto is fine type SortFieldType int @@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort { return &rv } +func (s *SortField) Reverse() { + s.Desc = !s.Desc + if s.Missing == SortFieldMissingFirst { + s.Missing = SortFieldMissingLast + } else { + s.Missing = SortFieldMissingFirst + } +} + // SortDocID will sort results by the document identifier type SortDocID struct { Desc bool @@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort { return &rv } +func (s *SortDocID) Reverse() { + s.Desc = !s.Desc +} + // SortScore will sort results by the document match score type SortScore struct { Desc bool @@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort { return &rv } +func (s *SortScore) Reverse() { + s.Desc = !s.Desc +} + var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) // NewSortGeoDistance creates SearchSort instance for sorting documents by @@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort { return &rv } +func (s *SortGeoDistance) Reverse() { + s.Desc = !s.Desc +} + type BytesSlice [][]byte func (p BytesSlice) Len() int { return len(p) } |