summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/blevesearch
diff options
context:
space:
mode:
authorMura Li <typeless@users.noreply.github.com>2019-11-27 17:23:33 +0800
committerLauris BH <lauris@nix.lv>2019-11-27 11:23:33 +0200
commit9591185c8f45fefb39e84aa465a943b24ad60a26 (patch)
treec46c697093c98eb5404aa7e2519fbe4ba1a2ad47 /vendor/github.com/blevesearch
parentb50dee5a61b3959bb3606973d359741f0ca9af9a (diff)
downloadgitea-9591185c8f45fefb39e84aa465a943b24ad60a26.tar.gz
gitea-9591185c8f45fefb39e84aa465a943b24ad60a26.zip
Upgrade blevesearch to v0.8.1 (#9177)
For #1441 https://github.com/blevesearch/bleve/commit/a91b427b59b893f112021841ba7370d285f8426f
Diffstat (limited to 'vendor/github.com/blevesearch')
-rw-r--r--vendor/github.com/blevesearch/bleve/.travis.yml9
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_text.go4
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/geo.go38
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/geohash.go223
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/parse.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/index.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/introducer.go4
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/merge.go25
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/persister.go57
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/scorch.go13
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go84
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go17
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go14
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go13
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go133
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go39
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go62
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/stats.go3
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go18
-rw-r--r--vendor/github.com/blevesearch/bleve/index_alias_impl.go46
-rw-r--r--vendor/github.com/blevesearch/bleve/index_impl.go199
-rw-r--r--vendor/github.com/blevesearch/bleve/mapping/document.go26
-rw-r--r--vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go21
-rw-r--r--vendor/github.com/blevesearch/bleve/search.go41
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/topn.go30
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/date_range.go31
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/disjunction.go6
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go94
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/query.go9
-rw-r--r--vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go103
-rw-r--r--vendor/github.com/blevesearch/bleve/search/search.go83
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go40
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go172
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go11
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go110
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go53
-rw-r--r--vendor/github.com/blevesearch/bleve/search/sort.go29
-rw-r--r--vendor/github.com/blevesearch/segment/.travis.yml3
46 files changed, 1378 insertions, 536 deletions
diff --git a/vendor/github.com/blevesearch/bleve/.travis.yml b/vendor/github.com/blevesearch/bleve/.travis.yml
index 35f7b60f23..e00e7b9948 100644
--- a/vendor/github.com/blevesearch/bleve/.travis.yml
+++ b/vendor/github.com/blevesearch/bleve/.travis.yml
@@ -3,9 +3,9 @@ sudo: false
language: go
go:
- - "1.9.x"
- "1.10.x"
- "1.11.x"
+ - "1.12.x"
script:
- go get golang.org/x/tools/cmd/cover
@@ -15,7 +15,12 @@ script:
- gvt restore
- go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/)
- - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
+ - go test ./test -v -indexType scorch
+ - if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
+ echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
+ else
+ errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
+ fi
- docs/project-code-coverage.sh
- docs/build_children.sh
diff --git a/vendor/github.com/blevesearch/bleve/document/field_text.go b/vendor/github.com/blevesearch/bleve/document/field_text.go
index c8e871c9d5..6bd74c7127 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_text.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_text.go
@@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
return fieldLength, tokenFreqs
}
+func (t *TextField) Analyzer() *analysis.Analyzer {
+ return t.analyzer
+}
+
func (t *TextField) Value() []byte {
return t.value
}
diff --git a/vendor/github.com/blevesearch/bleve/geo/geo.go b/vendor/github.com/blevesearch/bleve/geo/geo.go
index 86861b4f3b..583451e308 100644
--- a/vendor/github.com/blevesearch/bleve/geo/geo.go
+++ b/vendor/github.com/blevesearch/bleve/geo/geo.go
@@ -37,6 +37,12 @@ var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
+// Point represents a geo point.
+type Point struct {
+ Lon float64
+ Lat float64
+}
+
// MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 {
@@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
}
return nil
}
+
+func BoundingRectangleForPolygon(polygon []Point) (
+ float64, float64, float64, float64, error) {
+ err := checkLongitude(polygon[0].Lon)
+ if err != nil {
+ return 0, 0, 0, 0, err
+ }
+ err = checkLatitude(polygon[0].Lat)
+ if err != nil {
+ return 0, 0, 0, 0, err
+ }
+ maxY, minY := polygon[0].Lat, polygon[0].Lat
+ maxX, minX := polygon[0].Lon, polygon[0].Lon
+ for i := 1; i < len(polygon); i++ {
+ err := checkLongitude(polygon[i].Lon)
+ if err != nil {
+ return 0, 0, 0, 0, err
+ }
+ err = checkLatitude(polygon[i].Lat)
+ if err != nil {
+ return 0, 0, 0, 0, err
+ }
+
+ maxY = math.Max(maxY, polygon[i].Lat)
+ minY = math.Min(minY, polygon[i].Lat)
+
+ maxX = math.Max(maxX, polygon[i].Lon)
+ minX = math.Min(minX, polygon[i].Lon)
+ }
+
+ return minX, maxY, maxX, minY, nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/geo/geohash.go b/vendor/github.com/blevesearch/bleve/geo/geohash.go
index 35db720c0f..d3d4dfa8b5 100644
--- a/vendor/github.com/blevesearch/bleve/geo/geohash.go
+++ b/vendor/github.com/blevesearch/bleve/geo/geohash.go
@@ -1,32 +1,21 @@
-// The code here was obtained from:
-// https://github.com/mmcloughlin/geohash
-
-// The MIT License (MIT)
-// Copyright (c) 2015 Michael McLoughlin
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
+// Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// This implementation is inspired from the geohash-js
+// ref: https://github.com/davetroy/geohash-js
package geo
-import (
- "math"
-)
-
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
@@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
return e
}
-// Decode string into bits of a 64-bit word. The string s may be at most 12
-// characters.
-func (e *encoding) decode(s string) uint64 {
- x := uint64(0)
- for i := 0; i < len(s); i++ {
- x = (x << 5) | uint64(e.dec[s[i]])
- }
- return x
-}
-
-// Encode bits of 64-bit word into a string.
-func (e *encoding) encode(x uint64) string {
- b := [12]byte{}
- for i := 0; i < 12; i++ {
- b[11-i] = e.enc[x&0x1f]
- x >>= 5
- }
- return string(b[:])
-}
-
-// Base32Encoding with the Geohash alphabet.
+// base32encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
-// BoundingBox returns the region encoded by the given string geohash.
-func geoBoundingBox(hash string) geoBox {
- bits := uint(5 * len(hash))
- inthash := base32encoding.decode(hash)
- return geoBoundingBoxIntWithPrecision(inthash, bits)
-}
-
-// Box represents a rectangle in latitude/longitude space.
-type geoBox struct {
- minLat float64
- maxLat float64
- minLng float64
- maxLng float64
-}
-
-// Round returns a point inside the box, making an effort to round to minimal
-// precision.
-func (b geoBox) round() (lat, lng float64) {
- x := maxDecimalPower(b.maxLat - b.minLat)
- lat = math.Ceil(b.minLat/x) * x
- x = maxDecimalPower(b.maxLng - b.minLng)
- lng = math.Ceil(b.minLng/x) * x
- return
-}
-
-// precalculated for performance
-var exp232 = math.Exp2(32)
-
-// errorWithPrecision returns the error range in latitude and longitude for in
-// integer geohash with bits of precision.
-func errorWithPrecision(bits uint) (latErr, lngErr float64) {
- b := int(bits)
- latBits := b / 2
- lngBits := b - latBits
- latErr = math.Ldexp(180.0, -latBits)
- lngErr = math.Ldexp(360.0, -lngBits)
- return
-}
-
-// minDecimalPlaces returns the minimum number of decimal places such that
-// there must exist an number with that many places within any range of width
-// r. This is intended for returning minimal precision coordinates inside a
-// box.
-func maxDecimalPower(r float64) float64 {
- m := int(math.Floor(math.Log10(r)))
- return math.Pow10(m)
-}
-
-// Encode the position of x within the range -r to +r as a 32-bit integer.
-func encodeRange(x, r float64) uint32 {
- p := (x + r) / (2 * r)
- return uint32(p * exp232)
-}
-
-// Decode the 32-bit range encoding X back to a value in the range -r to +r.
-func decodeRange(X uint32, r float64) float64 {
- p := float64(X) / exp232
- x := 2*r*p - r
- return x
-}
-
-// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
-// ignored, and may take any value.
-func squash(X uint64) uint32 {
- X &= 0x5555555555555555
- X = (X | (X >> 1)) & 0x3333333333333333
- X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
- X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
- X = (X | (X >> 8)) & 0x0000ffff0000ffff
- X = (X | (X >> 16)) & 0x00000000ffffffff
- return uint32(X)
-}
+var masks = []uint64{16, 8, 4, 2, 1}
+
+// DecodeGeoHash decodes the string geohash faster with
+// higher precision. This api is in experimental phase.
+func DecodeGeoHash(geoHash string) (float64, float64) {
+ even := true
+ lat := []float64{-90.0, 90.0}
+ lon := []float64{-180.0, 180.0}
+
+ for i := 0; i < len(geoHash); i++ {
+ cd := uint64(base32encoding.dec[geoHash[i]])
+ for j := 0; j < 5; j++ {
+ if even {
+ if cd&masks[j] > 0 {
+ lon[0] = (lon[0] + lon[1]) / 2
+ } else {
+ lon[1] = (lon[0] + lon[1]) / 2
+ }
+ } else {
+ if cd&masks[j] > 0 {
+ lat[0] = (lat[0] + lat[1]) / 2
+ } else {
+ lat[1] = (lat[0] + lat[1]) / 2
+ }
+ }
+ even = !even
+ }
+ }
-// Deinterleave the bits of X into 32-bit words containing the even and odd
-// bitlevels of X, respectively.
-func deinterleave(X uint64) (uint32, uint32) {
- return squash(X), squash(X >> 1)
+ return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
}
-// BoundingBoxIntWithPrecision returns the region encoded by the integer
-// geohash with the specified precision.
-func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
- fullHash := hash << (64 - bits)
- latInt, lngInt := deinterleave(fullHash)
- lat := decodeRange(latInt, 90)
- lng := decodeRange(lngInt, 180)
- latErr, lngErr := errorWithPrecision(bits)
- return geoBox{
- minLat: lat,
- maxLat: lat + latErr,
- minLng: lng,
- maxLng: lng + lngErr,
+func EncodeGeoHash(lat, lon float64) string {
+ even := true
+ lats := []float64{-90.0, 90.0}
+ lons := []float64{-180.0, 180.0}
+ precision := 12
+ var ch, bit uint64
+ var geoHash string
+
+ for len(geoHash) < precision {
+ if even {
+ mid := (lons[0] + lons[1]) / 2
+ if lon > mid {
+ ch |= masks[bit]
+ lons[0] = mid
+ } else {
+ lons[1] = mid
+ }
+ } else {
+ mid := (lats[0] + lats[1]) / 2
+ if lat > mid {
+ ch |= masks[bit]
+ lats[0] = mid
+ } else {
+ lats[1] = mid
+ }
+ }
+ even = !even
+ if bit < 4 {
+ bit++
+ } else {
+ geoHash += string(base32encoding.enc[ch])
+ ch = 0
+ bit = 0
+ }
}
-}
-
-// ----------------------------------------------------------------------
-// Decode the string geohash to a (lat, lng) point.
-func GeoHashDecode(hash string) (lat, lng float64) {
- box := geoBoundingBox(hash)
- return box.round()
+ return geoHash
}
diff --git a/vendor/github.com/blevesearch/bleve/geo/parse.go b/vendor/github.com/blevesearch/bleve/geo/parse.go
index 0511fea7b6..5d833d9110 100644
--- a/vendor/github.com/blevesearch/bleve/geo/parse.go
+++ b/vendor/github.com/blevesearch/bleve/geo/parse.go
@@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
} else {
// geohash
- lat, lon = GeoHashDecode(geoStr)
+ lat, lon = DecodeGeoHash(geoStr)
foundLat = true
foundLon = true
}
diff --git a/vendor/github.com/blevesearch/bleve/index.go b/vendor/github.com/blevesearch/bleve/index.go
index 99357eee01..ef6ede9343 100644
--- a/vendor/github.com/blevesearch/bleve/index.go
+++ b/vendor/github.com/blevesearch/bleve/index.go
@@ -117,6 +117,8 @@ func (b *Batch) String() string {
// be re-used in the future.
func (b *Batch) Reset() {
b.internal.Reset()
+ b.lastDocSize = 0
+ b.totalSize = 0
}
func (b *Batch) Merge(o *Batch) {
diff --git a/vendor/github.com/blevesearch/bleve/index/index.go b/vendor/github.com/blevesearch/bleve/index/index.go
index 6aa444cfd8..3e866f3aab 100644
--- a/vendor/github.com/blevesearch/bleve/index/index.go
+++ b/vendor/github.com/blevesearch/bleve/index/index.go
@@ -121,6 +121,10 @@ type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
}
+type IndexReaderContains interface {
+ FieldDictContains(field string) (FieldDictContains, error)
+}
+
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@@ -230,6 +234,10 @@ type FieldDict interface {
Close() error
}
+type FieldDictContains interface {
+ Contains(key []byte) (bool, error)
+}
+
// DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources.
type DocIDReader interface {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
index 2d04bd38e5..ac627796f5 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
@@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
fileSegments++
}
}
+
}
// before the newMerge introduction, need to clean the newly
@@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}
-
// In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil &&
@@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
-
s.rootLock.Lock()
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
@@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
}
newSnapshot.updateSize()
-
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
index bcbf5b7106..d7144772fd 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
@@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"os"
+ "strings"
"sync/atomic"
"time"
@@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
-
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
var notifications []chan *IndexSnapshot
+ var filenames []string
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
segmentsToMerge = append(segmentsToMerge, zapSeg)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
+ // track the files getting merged for unsetting the
+ // removal ineligibility. This helps to unflip files
+ // even with fast merger, slow persister work flows.
+ path := zapSeg.Path()
+ filenames = append(filenames,
+ strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
}
}
@@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
+ err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
+ if err != nil {
+ s.unmarkIneligibleForRemoval(filename)
+ return fmt.Errorf("merge validation failed: %v", err)
+ }
oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
}
}
+ // once all the newly merged segment introductions are done,
+ // its safe to unflip the removal ineligibility for the replaced
+ // older segments
+ for _, f := range filenames {
+ s.unmarkIneligibleForRemoval(f)
+ }
+
return nil
}
@@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err
}
+ err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
+ if err != nil {
+ return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
+ }
// update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
index 349ccdc0e9..064e9e6a85 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
@@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
+
+ var unpersistedCallbacks []index.BatchCallback
+
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@@ -111,7 +114,6 @@ OUTER:
if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch
}
-
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers, po)
@@ -150,11 +152,25 @@ OUTER:
_ = ourSnapshot.DecRef()
break OUTER
}
+
+ // save this current snapshot's persistedCallbacks, to invoke during
+ // the retry attempt
+ unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
+
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
+
+ if unpersistedCallbacks != nil {
+ // in the event of this being a retry attempt for persisting a snapshot
+ // that had earlier failed, prepend the persistedCallbacks associated
+ // with earlier segment(s) to the latest persistedCallbacks
+ ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
+ unpersistedCallbacks = nil
+ }
+
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
@@ -179,7 +195,6 @@ OUTER:
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed {
- s.removeOldData()
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER
}
@@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext
}
-func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
- persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
+func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
+ lastMergedEpoch uint64, persistWatchers []*epochWatcher,
+ po *persisterOptions) (uint64, []*epochWatcher) {
- // first, let the watchers proceed if they lag behind
+ // First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
- // check the merger lag by counting the segment files on disk,
+ // Check the merger lag by counting the segment files on disk,
+ numFilesOnDisk, _ := s.diskFileStats()
+
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
- // On finding too many files on disk, persister pause until the merger
- // catches up to reduce the segment file count under the threshold.
- // But if there is memory pressure, then skip this sleep maneuvers.
- numFilesOnDisk, _ := s.diskFileStats()
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select {
@@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
return lastMergedEpoch, persistWatchers
}
+ // Finding too many files on disk could be due to two reasons.
+ // 1. Too many older snapshots awaiting the clean up.
+ // 2. The merger could be lagging behind on merging the disk files.
+ if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
+ s.removeOldData()
+ numFilesOnDisk, _ = s.diskFileStats()
+ }
+
+ // Persister pause until the merger catches up to reduce the segment
+ // file count under the threshold.
+ // But if there is memory pressure, then skip this sleep maneuvers.
OUTER:
for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
@@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
+
rv := &IndexSnapshot{
parent: s,
internal: make(map[string][]byte),
refs: 1,
creator: "loadSnapshot",
}
-
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
running += segmentSnapshot.segment.Count()
}
}
-
return rv, nil
}
@@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() {
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
}
+ atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
- if removed > 0 {
- err = s.removeOldZapFiles()
- if err != nil {
- s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
- }
+ err = s.removeOldZapFiles()
+ if err != nil {
+ s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
index 3f3d8bffce..44a97d1ea6 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
@@ -41,12 +41,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
+ nextSegmentID uint64
+ stats Stats
+ iStats internalStats
+
readOnly bool
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
- stats Stats
- nextSegmentID uint64
path string
unsafeBatch bool
@@ -73,8 +75,6 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
- iStats internalStats
-
pauseLock sync.RWMutex
pauseCount uint64
@@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis?
- if len(batch.IndexOps) > 0 {
+ if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
+ s.rootLock.RLock()
+ m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
+ s.rootLock.RUnlock()
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
m["updates"] = m["TotUpdates"]
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
index 165a01bc16..fdc407a747 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
@@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{}
}
+func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
+ return false, nil
+}
+
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
+func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
+ return false, nil
+}
+
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
index a4836ebf8a..55299d8f7a 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
@@ -19,7 +19,10 @@
package segment
-import "fmt"
+import (
+ "errors"
+ "fmt"
+)
const (
MaxVarintSize = 9
@@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
}
return b[length:], v, nil
}
+
+// ------------------------------------------------------------
+
+type MemUvarintReader struct {
+ C int // index of next byte to read from S
+ S []byte
+}
+
+func NewMemUvarintReader(s []byte) *MemUvarintReader {
+ return &MemUvarintReader{S: s}
+}
+
+// Len returns the number of unread bytes.
+func (r *MemUvarintReader) Len() int {
+ n := len(r.S) - r.C
+ if n < 0 {
+ return 0
+ }
+ return n
+}
+
+var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
+
+// ReadUvarint reads an encoded uint64. The original code this was
+// based on is at encoding/binary/ReadUvarint().
+func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
+ var x uint64
+ var s uint
+ var C = r.C
+ var S = r.S
+
+ for {
+ b := S[C]
+ C++
+
+ if b < 0x80 {
+ r.C = C
+
+ // why 63? The original code had an 'i += 1' loop var and
+ // checked for i > 9 || i == 9 ...; but, we no longer
+ // check for the i var, but instead check here for s,
+ // which is incremented by 7. So, 7*9 == 63.
+ //
+ // why the "extra" >= check? The normal case is that s <
+ // 63, so we check this single >= guard first so that we
+ // hit the normal, nil-error return pathway sooner.
+ if s >= 63 && (s > 63 || s == 63 && b > 1) {
+ return 0, ErrMemUvarintReaderOverflow
+ }
+
+ return x | uint64(b)<<s, nil
+ }
+
+ x |= uint64(b&0x7f) << s
+ s += 7
+ }
+}
+
+// SkipUvarint skips ahead one encoded uint64.
+func (r *MemUvarintReader) SkipUvarint() {
+ for {
+ b := r.S[r.C]
+ r.C++
+
+ if b < 0x80 {
+ return
+ }
+ }
+}
+
+// SkipBytes skips a count number of bytes.
+func (r *MemUvarintReader) SkipBytes(count int) {
+ r.C = r.C + count
+}
+
+func (r *MemUvarintReader) Reset(s []byte) {
+ r.C = 0
+ r.S = s
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
index 3aa151d64d..3a31f41498 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
@@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
s = s.Sub[0]
}
- if s.Op == syntax.OpLiteral {
+ if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
index b94d6f979f..34c2bc2048 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
@@ -59,6 +59,8 @@ type TermDictionary interface {
AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
+
+ Contains(key []byte) (bool, error)
}
type DictionaryIterator interface {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
index 91bfd4e24e..c02333cee0 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
@@ -16,6 +16,7 @@ package zap
import (
"bufio"
+ "github.com/couchbase/vellum"
"math"
"os"
)
@@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader),
+ fieldFSTs: make(map[uint16]*vellum.FST),
}
sb.updateSize()
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
index 2c0e1bf2ad..ad4a8f8dc5 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
@@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
return rv
}
+func (d *Dictionary) Contains(key []byte) (bool, error) {
+ return d.fst.Contains(key)
+}
+
// Iterator returns an iterator for this dictionary
func (d *Dictionary) Iterator() segment.DictionaryIterator {
rv := &DictionaryIterator{
@@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
}
// need to increment the end position to be inclusive
- endBytes := []byte(end)
- if endBytes[len(endBytes)-1] < 0xff {
- endBytes[len(endBytes)-1]++
- } else {
- endBytes = append(endBytes, 0xff)
+ var endBytes []byte
+ if len(end) > 0 {
+ endBytes = []byte(end)
+ if endBytes[len(endBytes)-1] < 0xff {
+ endBytes[len(endBytes)-1]++
+ } else {
+ endBytes = append(endBytes, 0xff)
+ }
}
if d.fst != nil {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
index bcc0f94728..a819ca239f 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
@@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
type docVisitState struct {
dvrs map[uint16]*docValueReader
- segment *Segment
+ segment *SegmentBase
}
type docValueReader struct {
@@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted {
- return nil, fmt.Errorf("loadFieldDocValueReader: "+
- "no docValues found for field: %s", field)
+ // no docValues found, nothing to do
+ return nil, nil
}
// read the number of chunks, and chunk offsets position
@@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
+ } else {
+ return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
}
fdvIter := &docValueReader{
@@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
// VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface
-func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState)
@@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() {
- err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
+ err := dvr.loadDvChunk(docInChunk, s)
if err != nil {
return dvs, err
}
@@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
-func (s *Segment) VisitableDocValueFields() ([]string, error) {
+func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
index 4ef222c1a2..50bd7207a5 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
@@ -31,6 +31,14 @@ import (
var DefaultFileMergerBufferSize = 1024 * 1024
+// ValidateMerge can be set by applications to perform additional checks
+// on a new segment produced by a merge, by default this does nothing.
+// Caller should provide EITHER segments or memSegments, but not both.
+// This API is experimental and may be removed at any time.
+var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
+ return nil
+}
+
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
index 22b69913e4..c108ec16dd 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
@@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
+// ValidateDocFields can be set by applications to perform additional checks
+// on fields in a document being added to a new segment, by default it does
+// nothing.
+// This API is experimental and may be removed at any time.
+var ValidateDocFields = func(field document.Field) error {
+ return nil
+}
+
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
// SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
if opts.IncludeDocValues() {
s.IncludeDocValues[fieldID] = true
}
+
+ err := ValidateDocFields(field)
+ if err != nil {
+ return 0, err
+ }
}
var curr int
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
index 26378c27e0..4c43fdb9b9 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
@@ -15,10 +15,8 @@
package zap
import (
- "bytes"
"encoding/binary"
"fmt"
- "io"
"math"
"reflect"
@@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
}
rv.postings = p
- rv.includeFreqNorm = includeFreq || includeNorm
+ rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
rv.includeLocs = includeLocs
if p.normBits1Hit != 0 {
@@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
- var n uint64
+ var n, e uint64
if p.normBits1Hit != 0 {
n = 1
+ if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
+ e = 1
+ }
} else if p.postings != nil {
n = p.postings.GetCardinality()
- }
- var e uint64
- if p.except != nil {
- e = p.except.GetCardinality()
- }
- if n <= e {
- return 0
+ if p.except != nil {
+ e = p.postings.AndCardinality(p.except)
+ }
}
return n - e
}
@@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
postings *PostingsList
- all roaring.IntIterable
- Actual roaring.IntIterable
+ all roaring.IntPeekable
+ Actual roaring.IntPeekable
ActualBM *roaring.Bitmap
currChunk uint32
currChunkFreqNorm []byte
currChunkLoc []byte
- freqNormReader *bytes.Reader
- locReader *bytes.Reader
+ freqNormReader *segment.MemUvarintReader
+ locReader *segment.MemUvarintReader
freqChunkOffsets []uint64
freqChunkStart uint64
@@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil {
- i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
+ i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
} else {
i.freqNormReader.Reset(i.currChunkFreqNorm)
}
@@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil {
- i.locReader = bytes.NewReader(i.currChunkLoc)
+ i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
} else {
i.locReader.Reset(i.currChunkLoc)
}
@@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
return 1, i.normBits1Hit, false, nil
}
- freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
+ freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
}
+
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
- normBits, err := binary.ReadUvarint(i.freqNormReader)
+ normBits, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
}
- return freq, normBits, hasLocs, err
+ return freq, normBits, hasLocs, nil
+}
+
+func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
+ if i.normBits1Hit != 0 {
+ return false, nil
+ }
+
+ freqHasLocs, err := i.freqNormReader.ReadUvarint()
+ if err != nil {
+ return false, fmt.Errorf("error reading freqHasLocs: %v", err)
+ }
+
+ i.freqNormReader.SkipUvarint() // Skip normBits.
+
+ return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
}
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
@@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
}
// readLocation processes all the integers on the stream representing a single
-// location. if you care about it, pass in a non-nil location struct, and we
-// will fill it. if you don't care about it, pass in nil and we safely consume
-// the contents.
+// location.
func (i *PostingsIterator) readLocation(l *Location) error {
// read off field
- fieldID, err := binary.ReadUvarint(i.locReader)
+ fieldID, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location field: %v", err)
}
// read off pos
- pos, err := binary.ReadUvarint(i.locReader)
+ pos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location pos: %v", err)
}
// read off start
- start, err := binary.ReadUvarint(i.locReader)
+ start, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location start: %v", err)
}
// read off end
- end, err := binary.ReadUvarint(i.locReader)
+ end, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location end: %v", err)
}
// read off num array pos
- numArrayPos, err := binary.ReadUvarint(i.locReader)
+ numArrayPos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err)
}
- // group these together for less branching
- if l != nil {
- l.field = i.postings.sb.fieldsInv[fieldID]
- l.pos = pos
- l.start = start
- l.end = end
- if cap(l.ap) < int(numArrayPos) {
- l.ap = make([]uint64, int(numArrayPos))
- } else {
- l.ap = l.ap[:int(numArrayPos)]
- }
+ l.field = i.postings.sb.fieldsInv[fieldID]
+ l.pos = pos
+ l.start = start
+ l.end = end
+
+ if cap(l.ap) < int(numArrayPos) {
+ l.ap = make([]uint64, int(numArrayPos))
+ } else {
+ l.ap = l.ap[:int(numArrayPos)]
}
// read off array positions
for k := 0; k < int(numArrayPos); k++ {
- ap, err := binary.ReadUvarint(i.locReader)
+ ap, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading array position: %v", err)
}
- if l != nil {
- l.ap[k] = ap
- }
+
+ l.ap[k] = ap
}
return nil
@@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
}
rv.locs = i.nextSegmentLocs[:0]
- numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
}
@@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
if hasLocs {
startLoc := len(i.currChunkLoc) - i.locReader.Len()
- numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return 0, 0, 0, nil, nil,
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
}
// skip over all the location bytes
- _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
- if err != nil {
- return 0, 0, 0, nil, nil, err
- }
+ i.locReader.SkipBytes(int(numLocsBytes))
endLoc := len(i.currChunkLoc) - i.locReader.Len()
bytesLoc = i.currChunkLoc[startLoc:endLoc]
@@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
return i.nextDocNumAtOrAfterClean(atOrAfter)
}
- n := i.Actual.Next()
- for uint64(n) < atOrAfter && i.Actual.HasNext() {
- n = i.Actual.Next()
- }
- if uint64(n) < atOrAfter {
+ i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
+
+ if !i.Actual.HasNext() {
// couldn't find anything
return 0, false, nil
}
+
+ n := i.Actual.Next()
allN := i.all.Next()
nChunk := n / i.postings.sb.chunkFactor
@@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
// no deletions) where the all bitmap is the same as the actual bitmap
func (i *PostingsIterator) nextDocNumAtOrAfterClean(
atOrAfter uint64) (uint64, bool, error) {
- n := i.Actual.Next()
if !i.includeFreqNorm {
- for uint64(n) < atOrAfter && i.Actual.HasNext() {
- n = i.Actual.Next()
- }
+ i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
- if uint64(n) < atOrAfter {
+ if !i.Actual.HasNext() {
return 0, false, nil // couldn't find anything
}
- return uint64(n), true, nil
+ return uint64(i.Actual.Next()), true, nil
}
// freq-norm's needed, so maintain freq-norm chunk reader
sameChunkNexts := 0 // # of times we called Next() in the same chunk
-
+ n := i.Actual.Next()
nChunk := n / i.postings.sb.chunkFactor
for uint64(n) < atOrAfter && i.Actual.HasNext() {
@@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
}
// read off freq/offsets even though we don't care about them
- _, _, hasLocs, err := i.readFreqNormHasLocs()
+ hasLocs, err := i.skipFreqNormReadHasLocs()
if err != nil {
return err
}
if i.includeLocs && hasLocs {
- numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location numLocsBytes: %v", err)
}
// skip over all the location bytes
- _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
- if err != nil {
- return err
- }
+ i.locReader.SkipBytes(int(numLocsBytes))
}
return nil
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
index 7ba28c2366..5aa33a26c9 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
@@ -20,8 +20,8 @@ import (
"fmt"
"io"
"os"
- "reflect"
"sync"
+ "unsafe"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
@@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
- reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
+ reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
}
// Open returns a zap impl of a segment
@@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16),
fieldDvReaders: make(map[uint16]*docValueReader),
+ fieldFSTs: make(map[uint16]*vellum.FST),
},
f: f,
mm: mm,
@@ -101,6 +102,9 @@ type SegmentBase struct {
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
+
+ m sync.Mutex
+ fieldFSTs map[uint16]*vellum.FST
}
func (sb *SegmentBase) Size() int {
@@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
dictStart := sb.dictLocs[rv.fieldID]
if dictStart > 0 {
- // read the length of the vellum data
- vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
- fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
- if fstBytes != nil {
+ var ok bool
+ sb.m.Lock()
+ if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
+ // read the length of the vellum data
+ vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
+ fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
rv.fst, err = vellum.Load(fstBytes)
if err != nil {
+ sb.m.Unlock()
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
}
- rv.fstReader, err = rv.fst.Reader()
- if err != nil {
- return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
- }
+
+ sb.fieldFSTs[rv.fieldID] = rv.fst
}
+
+ sb.m.Unlock()
+ rv.fstReader, err = rv.fst.Reader()
+ if err != nil {
+ return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
+ }
+
}
}
@@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
}
func (s *SegmentBase) loadDvReaders() error {
- if s.docValueOffset == fieldNotUninverted {
+ if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
return nil
}
@@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
}
read += uint64(n)
- fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+ fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+ if err != nil {
+ return err
+ }
if fieldDvReader != nil {
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, field)
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
index 8babb31fa4..47cc809b21 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
@@ -28,13 +28,14 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
- lev2 "github.com/couchbase/vellum/levenshtein2"
+ lev "github.com/couchbase/vellum/levenshtein"
)
// re usable, threadsafe levenshtein builders
-var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
+var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct {
+ dict segment.TermDictionary
dictItr segment.DictionaryIterator
index int
@@ -51,11 +52,11 @@ func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
- lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
+ lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
}
- lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
+ lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
}
@@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
}
}
-func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
+func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
+ makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
+ randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
@@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
- results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+ if randomLookup {
+ results <- &asynchSegmentResult{dict: dict}
+ } else {
+ results <- &asynchSegmentResult{dictItr: makeItr(dict)}
+ }
}
}(index, segment)
}
@@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil {
err = asr.err
} else {
- next, err2 := asr.dictItr.Next()
- if err2 != nil && err == nil {
- err = err2
- }
- if next != nil {
+ if !randomLookup {
+ next, err2 := asr.dictItr.Next()
+ if err2 != nil && err == nil {
+ err = err2
+ }
+ if next != nil {
+ rv.cursors = append(rv.cursors, &segmentDictCursor{
+ itr: asr.dictItr,
+ curr: *next,
+ })
+ }
+ } else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
- itr: asr.dictItr,
- curr: *next,
+ dict: asr.dict,
})
}
}
@@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
return nil, err
}
- // prepare heap
- heap.Init(rv)
+
+ if !randomLookup {
+ // prepare heap
+ heap.Init(rv)
+ }
return rv, nil
}
@@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator()
- })
+ }, false)
}
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm))
- })
+ }, false)
}
func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix))
- })
+ }, false)
}
func (i *IndexSnapshot) FieldDictRegexp(field string,
@@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
- })
+ }, false)
}
func (i *IndexSnapshot) getLevAutomaton(term string,
@@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
- })
+ }, false)
}
func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount)
- })
+ }, false)
+}
+
+func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
+ return i.newIndexSnapshotFieldDict(field, nil, true)
}
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
index abd3bde8c1..47486c2554 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
@@ -22,6 +22,7 @@ import (
)
type segmentDictCursor struct {
+ dict segment.TermDictionary
itr segment.DictionaryIterator
curr index.DictEntry
}
@@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error {
return nil
}
+
+func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
+ if len(i.cursors) == 0 {
+ return false, nil
+ }
+
+ for _, cursor := range i.cursors {
+ if found, _ := cursor.dict.Contains(key); found {
+ return true, nil
+ }
+ }
+
+ return false, nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
index f3a2c56a98..96742b4f94 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
@@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
}
type cachedDocs struct {
+ size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
- size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
index 2eb832f2cf..6549fddf51 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
@@ -107,6 +107,9 @@ type Stats struct {
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
+ CurFilesIneligibleForRemoval uint64
+ TotSnapshotsRemovedFromMetaStore uint64
+
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
index e4bc3d8f02..24f5aae949 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
@@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
- numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return
}
+ return udc.UpdateWithAnalysis(doc, result, backIndexRow)
+}
+
+func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
+ result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update
indexStart := time.Now()
var kvwriter store.KVWriter
@@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
- atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
+ atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
}
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
+ persistedCallback := batch.PersistedCallback()
+ if persistedCallback != nil {
+ defer persistedCallback(err)
+ }
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
- if len(batch.IndexOps) > 0 {
+ if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.errors, 1)
}
- persistedCallback := batch.PersistedCallback()
- if persistedCallback != nil {
- persistedCallback(err)
- }
return
}
diff --git a/vendor/github.com/blevesearch/bleve/index_alias_impl.go b/vendor/github.com/blevesearch/bleve/index_alias_impl.go
index 335fcade2e..4366fc7956 100644
--- a/vendor/github.com/blevesearch/bleve/index_alias_impl.go
+++ b/vendor/github.com/blevesearch/bleve/index_alias_impl.go
@@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
Score: req.Score,
+ SearchAfter: req.SearchAfter,
+ SearchBefore: req.SearchBefore,
}
return &rv
}
@@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes))
+ var reverseQueryExecution bool
+ if req.SearchBefore != nil {
+ reverseQueryExecution = true
+ req.Sort.Reverse()
+ req.SearchAfter = req.SearchBefore
+ req.SearchBefore = nil
+ }
+
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
@@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
// sort all hits with the requested order
if len(req.Sort) > 0 {
- sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
+ sorter := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter)
}
@@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
sr.Facets.Fixup(name, fr.Size)
}
+ if reverseQueryExecution {
+ // reverse the sort back to the original
+ req.Sort.Reverse()
+ // resort using the original order
+ mhs := newSearchHitSorter(req.Sort, sr.Hits)
+ sort.Sort(mhs)
+ // reset request
+ req.SearchBefore = req.SearchAfter
+ req.SearchAfter = nil
+ }
+
// fix up original request
sr.Request = req
searchDuration := time.Since(searchStart)
@@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}
-
-type multiSearchHitSorter struct {
- hits search.DocumentMatchCollection
- sort search.SortOrder
- cachedScoring []bool
- cachedDesc []bool
-}
-
-func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
- return &multiSearchHitSorter{
- sort: sort,
- hits: hits,
- cachedScoring: sort.CacheIsScore(),
- cachedDesc: sort.CacheDescending(),
- }
-}
-
-func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
-func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
-func (m *multiSearchHitSorter) Less(i, j int) bool {
- c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
- return c < 0
-}
diff --git a/vendor/github.com/blevesearch/bleve/index_impl.go b/vendor/github.com/blevesearch/bleve/index_impl.go
index fe61b8064a..6324d960eb 100644
--- a/vendor/github.com/blevesearch/bleve/index_impl.go
+++ b/vendor/github.com/blevesearch/bleve/index_impl.go
@@ -19,6 +19,7 @@ import (
"encoding/json"
"fmt"
"os"
+ "sort"
"sync"
"sync/atomic"
"time"
@@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed
}
- collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
+ var reverseQueryExecution bool
+ if req.SearchBefore != nil {
+ reverseQueryExecution = true
+ req.Sort.Reverse()
+ req.SearchAfter = req.SearchBefore
+ req.SearchBefore = nil
+ }
+
+ var coll *collector.TopNCollector
+ if req.SearchAfter != nil {
+ coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
+ } else {
+ coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
+ }
// open a reader for this search
indexReader, err := i.i.Reader()
@@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
facetsBuilder.Add(facetName, facetBuilder)
}
}
- collector.SetFacetsBuilder(facetsBuilder)
+ coll.SetFacetsBuilder(facetsBuilder)
}
- memNeeded := memNeededForSearch(req, searcher, collector)
+ memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded)
@@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
- err = collector.Collect(ctx, searcher, indexReader)
+ err = coll.Collect(ctx, searcher, indexReader)
if err != nil {
return nil, err
}
- hits := collector.Results()
+ hits := coll.Results()
var highlighter highlight.Highlighter
@@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
for _, hit := range hits {
- if len(req.Fields) > 0 || highlighter != nil {
- doc, err := indexReader.Document(hit.ID)
- if err == nil && doc != nil {
- if len(req.Fields) > 0 {
- fieldsToLoad := deDuplicate(req.Fields)
- for _, f := range fieldsToLoad {
- for _, docF := range doc.Fields {
- if f == "*" || docF.Name() == f {
- var value interface{}
- switch docF := docF.(type) {
- case *document.TextField:
- value = string(docF.Value())
- case *document.NumericField:
- num, err := docF.Number()
- if err == nil {
- value = num
- }
- case *document.DateTimeField:
- datetime, err := docF.DateTime()
- if err == nil {
- value = datetime.Format(time.RFC3339)
- }
- case *document.BooleanField:
- boolean, err := docF.Boolean()
- if err == nil {
- value = boolean
- }
- case *document.GeoPointField:
- lon, err := docF.Lon()
- if err == nil {
- lat, err := docF.Lat()
- if err == nil {
- value = []float64{lon, lat}
- }
- }
- }
- if value != nil {
- hit.AddFieldValue(docF.Name(), value)
- }
- }
- }
- }
- }
- if highlighter != nil {
- highlightFields := req.Highlight.Fields
- if highlightFields == nil {
- // add all fields with matches
- highlightFields = make([]string, 0, len(hit.Locations))
- for k := range hit.Locations {
- highlightFields = append(highlightFields, k)
- }
- }
- for _, hf := range highlightFields {
- highlighter.BestFragmentsInField(hit, doc, hf, 1)
- }
- }
- } else if doc == nil {
- // unexpected case, a doc ID that was found as a search hit
- // was unable to be found during document lookup
- return nil, ErrorIndexReadInconsistency
- }
- }
if i.name != "" {
hit.Index = i.name
}
+ err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
+ if err != nil {
+ return nil, err
+ }
}
atomic.AddUint64(&i.stats.searches, 1)
@@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
logger.Printf("slow search took %s - %v", searchDuration, req)
}
+ if reverseQueryExecution {
+ // reverse the sort back to the original
+ req.Sort.Reverse()
+ // resort using the original order
+ mhs := newSearchHitSorter(req.Sort, hits)
+ sort.Sort(mhs)
+ // reset request
+ req.SearchBefore = req.SearchAfter
+ req.SearchAfter = nil
+ }
+
return &SearchResult{
Status: &SearchStatus{
Total: 1,
@@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
},
Request: req,
Hits: hits,
- Total: collector.Total(),
- MaxScore: collector.MaxScore(),
+ Total: coll.Total(),
+ MaxScore: coll.MaxScore(),
Took: searchDuration,
- Facets: collector.FacetResults(),
+ Facets: coll.FacetResults(),
}, nil
}
+func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
+ indexName string, r index.IndexReader,
+ highlighter highlight.Highlighter) error {
+ if len(req.Fields) > 0 || highlighter != nil {
+ doc, err := r.Document(hit.ID)
+ if err == nil && doc != nil {
+ if len(req.Fields) > 0 {
+ fieldsToLoad := deDuplicate(req.Fields)
+ for _, f := range fieldsToLoad {
+ for _, docF := range doc.Fields {
+ if f == "*" || docF.Name() == f {
+ var value interface{}
+ switch docF := docF.(type) {
+ case *document.TextField:
+ value = string(docF.Value())
+ case *document.NumericField:
+ num, err := docF.Number()
+ if err == nil {
+ value = num
+ }
+ case *document.DateTimeField:
+ datetime, err := docF.DateTime()
+ if err == nil {
+ value = datetime.Format(time.RFC3339)
+ }
+ case *document.BooleanField:
+ boolean, err := docF.Boolean()
+ if err == nil {
+ value = boolean
+ }
+ case *document.GeoPointField:
+ lon, err := docF.Lon()
+ if err == nil {
+ lat, err := docF.Lat()
+ if err == nil {
+ value = []float64{lon, lat}
+ }
+ }
+ }
+ if value != nil {
+ hit.AddFieldValue(docF.Name(), value)
+ }
+ }
+ }
+ }
+ }
+ if highlighter != nil {
+ highlightFields := req.Highlight.Fields
+ if highlightFields == nil {
+ // add all fields with matches
+ highlightFields = make([]string, 0, len(hit.Locations))
+ for k := range hit.Locations {
+ highlightFields = append(highlightFields, k)
+ }
+ }
+ for _, hf := range highlightFields {
+ highlighter.BestFragmentsInField(hit, doc, hf, 1)
+ }
+ }
+ } else if doc == nil {
+ // unexpected case, a doc ID that was found as a search hit
+ // was unable to be found during document lookup
+ return ErrorIndexReadInconsistency
+ }
+ }
+
+ return nil
+}
+
// Fields returns the name of all the fields this
// Index has operated on.
func (i *indexImpl) Fields() (fields []string, err error) {
@@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string {
}
return ret
}
+
+type searchHitSorter struct {
+ hits search.DocumentMatchCollection
+ sort search.SortOrder
+ cachedScoring []bool
+ cachedDesc []bool
+}
+
+func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
+ return &searchHitSorter{
+ sort: sort,
+ hits: hits,
+ cachedScoring: sort.CacheIsScore(),
+ cachedDesc: sort.CacheDescending(),
+ }
+}
+
+func (m *searchHitSorter) Len() int { return len(m.hits) }
+func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
+func (m *searchHitSorter) Less(i, j int) bool {
+ c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
+ return c < 0
+}
diff --git a/vendor/github.com/blevesearch/bleve/mapping/document.go b/vendor/github.com/blevesearch/bleve/mapping/document.go
index f950b59bef..15cb6b5fa1 100644
--- a/vendor/github.com/blevesearch/bleve/mapping/document.go
+++ b/vendor/github.com/blevesearch/bleve/mapping/document.go
@@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
-
- txt, err := property.MarshalText()
- if err == nil && subDocMapping != nil {
- // index by explicit mapping
+ // ONLY process TextMarshaler if there is an explicit mapping
+ // AND all of the fiels are of type text
+ // OTHERWISE process field without TextMarshaler
+ if subDocMapping != nil {
+ allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields {
- if fieldMapping.Type == "text" {
- fieldMapping.processString(string(txt), pathString, path, indexes, context)
+ if fieldMapping.Type != "text" {
+ allFieldsText = false
+ break
}
}
- } else {
- dm.walkDocument(property, path, indexes, context)
+ txt, err := property.MarshalText()
+ if err == nil && allFieldsText {
+ txtStr := string(txt)
+ for _, fieldMapping := range subDocMapping.Fields {
+ fieldMapping.processString(txtStr, pathString, path, indexes, context)
+ }
+ return
+ }
}
-
+ dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}
diff --git a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
index 76ea001ba7..29bd0fc5c1 100644
--- a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
+++ b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
@@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
+ rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
+ return rv, err
+}
+
+func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
+ rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 {
- return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
+ return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
}
nChars := ((63 - shift) / 7) + 1
- rv := make(PrefixCoded, nChars+1)
+
+ size := int(nChars + 1)
+ if len(prealloc) >= size {
+ rv = PrefixCoded(prealloc[0:size])
+ preallocRest = prealloc[size:]
+ } else {
+ rv = make(PrefixCoded, size)
+ }
+
rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars--
sortableBits = int64(uint64(sortableBits) >> 7)
}
- return rv, nil
+
+ return rv, preallocRest, nil
}
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
diff --git a/vendor/github.com/blevesearch/bleve/search.go b/vendor/github.com/blevesearch/bleve/search.go
index ebd69971ef..b337edc9e4 100644
--- a/vendor/github.com/blevesearch/bleve/search.go
+++ b/vendor/github.com/blevesearch/bleve/search.go
@@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
// result score explanations.
// Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed
+// SearchAfter supports deep paging by providing a minimum sort key
+// SearchBefore supports deep paging by providing a maximum sort key
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
@@ -275,6 +277,8 @@ type SearchRequest struct {
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
+ SearchAfter []string `json:"search_after"`
+ SearchBefore []string `json:"search_before"`
}
func (r *SearchRequest) Validate() error {
@@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
}
}
+ if r.SearchAfter != nil && r.SearchBefore != nil {
+ return fmt.Errorf("cannot use search after and search before together")
+ }
+
+ if r.SearchAfter != nil {
+ if r.From != 0 {
+ return fmt.Errorf("cannot use search after with from !=0")
+ }
+ if len(r.SearchAfter) != len(r.Sort) {
+ return fmt.Errorf("search after must have same size as sort order")
+ }
+ }
+ if r.SearchBefore != nil {
+ if r.From != 0 {
+ return fmt.Errorf("cannot use search before with from !=0")
+ }
+ if len(r.SearchBefore) != len(r.Sort) {
+ return fmt.Errorf("search before must have same size as sort order")
+ }
+ }
+
return r.Facets.Validate()
}
@@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order
}
+// SetSearchAfter sets the request to skip over hits with a sort
+// value less than the provided sort after key
+func (r *SearchRequest) SetSearchAfter(after []string) {
+ r.SearchAfter = after
+}
+
+// SetSearchBefore sets the request to skip over hits with a sort
+// value greater than the provided sort before key
+func (r *SearchRequest) SetSearchBefore(before []string) {
+ r.SearchBefore = before
+}
+
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
+ SearchAfter []string `json:"search_after"`
+ SearchBefore []string `json:"search_before"`
}
err := json.Unmarshal(input, &temp)
@@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
+ r.SearchAfter = temp.SearchAfter
+ r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
index 378a7b114a..a027a12c22 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/topn.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
@@ -69,6 +69,7 @@ type TopNCollector struct {
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader
+ searchAfter *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
@@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
+ return newTopNCollector(size, skip, sort)
+}
+
+// NewTopNCollector builds a collector to find the top 'size' hits
+// skipping over the first 'skip' hits
+// ordering hits by the provided sort order
+func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
+ rv := newTopNCollector(size, 0, sort)
+ rv.searchAfter = &search.DocumentMatch{
+ Sort: after,
+ }
+ return rv
+}
+
+func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing
@@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
+ IndexReader: reader,
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
if d == nil {
return nil
}
+
+ // support search after based pagination,
+ // if this hit is <= the search after sort key
+ // we should skip it
+ if hc.searchAfter != nil {
+ // exact sort order matches use hit number to break tie
+ // but we want to allow for exact match, so we pretend
+ hc.searchAfter.HitNumber = d.HitNumber
+ if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
+ return nil
+ }
+ }
+
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed
diff --git a/vendor/github.com/blevesearch/bleve/search/query/date_range.go b/vendor/github.com/blevesearch/bleve/search/query/date_range.go
index ff67a7bb70..3ac0322f55 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/date_range.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/date_range.go
@@ -41,6 +41,14 @@ type BleveQueryTime struct {
time.Time
}
+var MinRFC3339CompatibleTime time.Time
+var MaxRFC3339CompatibleTime time.Time
+
+func init() {
+ MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
+ MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
+}
+
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
@@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
- min = numeric.Int64ToFloat64(q.Start.UnixNano())
+ if !isDatetimeCompatible(q.Start) {
+ // overflow
+ return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
+ }
+ startInt64 := q.Start.UnixNano()
+ min = numeric.Int64ToFloat64(startInt64)
}
if !q.End.IsZero() {
- max = numeric.Int64ToFloat64(q.End.UnixNano())
+ if !isDatetimeCompatible(q.End) {
+ // overflow
+ return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
+ }
+ endInt64 := q.End.UnixNano()
+ max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
@@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
}
return nil
}
+
+func isDatetimeCompatible(t BleveQueryTime) bool {
+ if QueryDateTimeFormat == time.RFC3339 &&
+ (t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
+ return false
+ }
+
+ return true
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
index 2bc1d70441..a1fc1439a6 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
@@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
- } else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
- // apply optimization only if both conditions below are satisfied:
- // - disjunction searcher has only 1 child searcher
- // - parent searcher's min setting is equal to child searcher's min
-
- return ss[0], nil
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
diff --git a/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
new file mode 100644
index 0000000000..41c7f7f3ab
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
@@ -0,0 +1,94 @@
+// Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package query
+
+import (
+ "encoding/json"
+ "fmt"
+
+ "github.com/blevesearch/bleve/geo"
+ "github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/mapping"
+ "github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/search/searcher"
+)
+
+type GeoBoundingPolygonQuery struct {
+ Points []geo.Point `json:"polygon_points"`
+ FieldVal string `json:"field,omitempty"`
+ BoostVal *Boost `json:"boost,omitempty"`
+}
+
+func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
+ return &GeoBoundingPolygonQuery{
+ Points: points}
+}
+
+func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
+ boost := Boost(b)
+ q.BoostVal = &boost
+}
+
+func (q *GeoBoundingPolygonQuery) Boost() float64 {
+ return q.BoostVal.Value()
+}
+
+func (q *GeoBoundingPolygonQuery) SetField(f string) {
+ q.FieldVal = f
+}
+
+func (q *GeoBoundingPolygonQuery) Field() string {
+ return q.FieldVal
+}
+
+func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
+ m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
+ field := q.FieldVal
+ if q.FieldVal == "" {
+ field = m.DefaultSearchField()
+ }
+
+ return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
+}
+
+func (q *GeoBoundingPolygonQuery) Validate() error {
+ return nil
+}
+
+func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
+ tmp := struct {
+ Points []interface{} `json:"polygon_points"`
+ FieldVal string `json:"field,omitempty"`
+ BoostVal *Boost `json:"boost,omitempty"`
+ }{}
+ err := json.Unmarshal(data, &tmp)
+ if err != nil {
+ return err
+ }
+
+ q.Points = make([]geo.Point, 0, len(tmp.Points))
+ for _, i := range tmp.Points {
+ // now use our generic point parsing code from the geo package
+ lon, lat, found := geo.ExtractGeoPoint(i)
+ if !found {
+ return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
+ }
+ q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
+ }
+
+ q.FieldVal = tmp.FieldVal
+ q.BoostVal = tmp.BoostVal
+ return nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/query.go b/vendor/github.com/blevesearch/bleve/search/query/query.go
index c7c1eefb80..18aca228d4 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/query.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/query.go
@@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
+ _, hasPoints := tmp["polygon_points"]
+ if hasPoints {
+ var rv GeoBoundingPolygonQuery
+ err := json.Unmarshal(input, &rv)
+ if err != nil {
+ return nil, err
+ }
+ return &rv, nil
+ }
return nil, fmt.Errorf("unknown query type")
}
diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
index 5544f2d011..718de2ea5e 100644
--- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
+++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
@@ -40,6 +40,7 @@ type TermQueryScorer struct {
idf float64
options search.SearcherOptions
idfExplanation *search.Explanation
+ includeScore bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
@@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
- queryTerm: string(queryTerm),
- queryField: queryField,
- queryBoost: queryBoost,
- docTerm: docTerm,
- docTotal: docTotal,
- idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
- options: options,
- queryWeight: 1.0,
+ queryTerm: string(queryTerm),
+ queryField: queryField,
+ queryBoost: queryBoost,
+ docTerm: docTerm,
+ docTotal: docTotal,
+ idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
+ options: options,
+ queryWeight: 1.0,
+ includeScore: options.Score != "none",
}
if options.Explain {
@@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
- var scoreExplanation *search.Explanation
-
- // need to compute score
- var tf float64
- if termMatch.Freq < MaxSqrtCache {
- tf = SqrtCache[int(termMatch.Freq)]
- } else {
- tf = math.Sqrt(float64(termMatch.Freq))
- }
- score := tf * termMatch.Norm * s.idf
-
- if s.options.Explain {
- childrenExplanations := make([]*search.Explanation, 3)
- childrenExplanations[0] = &search.Explanation{
- Value: tf,
- Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
- }
- childrenExplanations[1] = &search.Explanation{
- Value: termMatch.Norm,
- Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
- }
- childrenExplanations[2] = s.idfExplanation
- scoreExplanation = &search.Explanation{
- Value: score,
- Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
- Children: childrenExplanations,
+ rv := ctx.DocumentMatchPool.Get()
+ // perform any score computations only when needed
+ if s.includeScore || s.options.Explain {
+ var scoreExplanation *search.Explanation
+ var tf float64
+ if termMatch.Freq < MaxSqrtCache {
+ tf = SqrtCache[int(termMatch.Freq)]
+ } else {
+ tf = math.Sqrt(float64(termMatch.Freq))
}
- }
+ score := tf * termMatch.Norm * s.idf
- // if the query weight isn't 1, multiply
- if s.queryWeight != 1.0 {
- score = score * s.queryWeight
if s.options.Explain {
- childExplanations := make([]*search.Explanation, 2)
- childExplanations[0] = s.queryWeightExplanation
- childExplanations[1] = scoreExplanation
+ childrenExplanations := make([]*search.Explanation, 3)
+ childrenExplanations[0] = &search.Explanation{
+ Value: tf,
+ Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
+ }
+ childrenExplanations[1] = &search.Explanation{
+ Value: termMatch.Norm,
+ Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
+ }
+ childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
- Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
- Children: childExplanations,
+ Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
+ Children: childrenExplanations,
}
}
+
+ // if the query weight isn't 1, multiply
+ if s.queryWeight != 1.0 {
+ score = score * s.queryWeight
+ if s.options.Explain {
+ childExplanations := make([]*search.Explanation, 2)
+ childExplanations[0] = s.queryWeightExplanation
+ childExplanations[1] = scoreExplanation
+ scoreExplanation = &search.Explanation{
+ Value: score,
+ Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
+ Children: childExplanations,
+ }
+ }
+ }
+
+ if s.includeScore {
+ rv.Score = score
+ }
+
+ if s.options.Explain {
+ rv.Expl = scoreExplanation
+ }
}
- rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
- rv.Score = score
- if s.options.Explain {
- rv.Expl = scoreExplanation
- }
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
diff --git a/vendor/github.com/blevesearch/bleve/search/search.go b/vendor/github.com/blevesearch/bleve/search/search.go
index f8a282d165..8ed23de454 100644
--- a/vendor/github.com/blevesearch/bleve/search/search.go
+++ b/vendor/github.com/blevesearch/bleve/search/search.go
@@ -17,6 +17,7 @@ package search
import (
"fmt"
"reflect"
+ "sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
@@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return true
}
+func (ap ArrayPositions) Compare(other ArrayPositions) int {
+ for i, p := range ap {
+ if i >= len(other) {
+ return 1
+ }
+ if p < other[i] {
+ return -1
+ }
+ if p > other[i] {
+ return 1
+ }
+ }
+ if len(ap) < len(other) {
+ return -1
+ }
+ return 0
+}
+
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
@@ -68,6 +87,46 @@ func (l *Location) Size() int {
type Locations []*Location
+func (p Locations) Len() int { return len(p) }
+func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+func (p Locations) Less(i, j int) bool {
+ c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
+ if c < 0 {
+ return true
+ }
+ if c > 0 {
+ return false
+ }
+ return p[i].Pos < p[j].Pos
+}
+
+func (p Locations) Dedupe() Locations { // destructive!
+ if len(p) <= 1 {
+ return p
+ }
+
+ sort.Sort(p)
+
+ slow := 0
+
+ for _, pfast := range p {
+ pslow := p[slow]
+ if pslow.Pos == pfast.Pos &&
+ pslow.Start == pfast.Start &&
+ pslow.End == pfast.End &&
+ pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
+ continue // duplicate, so only move fast ahead
+ }
+
+ slow++
+
+ p[slow] = pfast
+ }
+
+ return p[:slow+1]
+}
+
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
@@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var lastField string
var tlm TermLocationMap
+ var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
@@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
- tlm[ftl.Term] = append(tlm[ftl.Term], loc)
+ locs := tlm[ftl.Term]
+
+ // if the loc is before or at the last location, then there
+ // might be duplicates that need to be deduplicated
+ if !needsDedupe && len(locs) > 0 {
+ last := locs[len(locs)-1]
+ cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
+ if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
+ needsDedupe = true
+ }
+ }
+
+ tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
@@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
},
}
}
+
+ if needsDedupe {
+ for _, tlm := range dm.Locations {
+ for term, locs := range tlm {
+ tlm[term] = locs.Dedupe()
+ }
+ }
+ }
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
@@ -279,6 +359,7 @@ type SearcherOptions struct {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
Collector Collector
+ IndexReader index.IndexReader
}
func (sc *SearchContext) Size() int {
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
index bbbced4795..7f0bfa4246 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
@@ -45,6 +45,7 @@ type BooleanSearcher struct {
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
+ done bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
+ if s.done {
+ return nil, nil
+ }
+
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
}
}
+ if rv == nil {
+ s.done = true
+ }
+
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
+ if s.done {
+ return nil, nil
+ }
+
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
}
- // Advance the searchers only if the currentID cursor is trailing the lookup ID,
- // additionally if the mustNotSearcher has been initialized, ensure that the
- // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
- // currentID) is trailing the lookup ID as well - for in the case where currentID
- // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
- // advance the currentID or the currMustNot cursors.
- if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
- (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
+ // Advance the searcher only if the cursor is trailing the lookup ID
+ if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
@@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
if s.mustNotSearcher != nil {
- if s.currMustNot != nil {
- ctx.DocumentMatchPool.Put(s.currMustNot)
- }
- s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
+ // Additional check for mustNotSearcher, whose cursor isn't tracked by
+ // currentID to prevent it from moving when the searcher's tracked
+ // position is already ahead of or at the requested ID.
+ if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
+ if s.currMustNot != nil {
+ ctx.DocumentMatchPool.Put(s.currMustNot)
+ }
+ s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
}
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
index 289e416782..38cb6467fb 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
@@ -22,6 +22,11 @@ import (
"github.com/blevesearch/bleve/search"
)
+type filterFunc func(key []byte) bool
+
+var GeoBitsShift1 = (geo.GeoBits << 1)
+var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
+
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
@@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
}
// do math to produce list of terms needed for this search
- onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
- minLon, minLat, maxLon, maxLat, checkBoundaries)
+ onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
+ minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
+ if err != nil {
+ return nil, err
+ }
var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
@@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
- sminLon, sminLat, smaxLon, smaxLat float64,
- checkBoundaries bool) (
- onBoundary [][]byte, notOnBoundary [][]byte) {
- split := term | uint64(0x1)<<shift
- var upperMax uint64
- if shift < 63 {
- upperMax = term | ((uint64(1) << (shift + 1)) - 1)
- } else {
- upperMax = 0xffffffffffffffff
+ sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
+ indexReader index.IndexReader, field string) (
+ onBoundary [][]byte, notOnBoundary [][]byte, err error) {
+ preallocBytesLen := 32
+ preallocBytes := make([]byte, preallocBytesLen)
+
+ makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
+ if len(preallocBytes) <= 0 {
+ preallocBytesLen = preallocBytesLen * 2
+ preallocBytes = make([]byte, preallocBytesLen)
+ }
+
+ rv, preallocBytes, err =
+ numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
+
+ return rv
+ }
+
+ var fieldDict index.FieldDictContains
+ var isIndexed filterFunc
+ if irr, ok := indexReader.(index.IndexReaderContains); ok {
+ fieldDict, err = irr.FieldDictContains(field)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ isIndexed = func(term []byte) bool {
+ found, err := fieldDict.Contains(term)
+ return err == nil && found
+ }
}
- lowerMax := split - 1
- onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
- sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
- plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
- sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
- onBoundary = append(onBoundary, plusOnBoundary...)
- notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
- return
-}
-func relateAndRecurse(start, end uint64, res uint,
- sminLon, sminLat, smaxLon, smaxLat float64,
- checkBoundaries bool) (
- onBoundary [][]byte, notOnBoundary [][]byte) {
- minLon := geo.MortonUnhashLon(start)
- minLat := geo.MortonUnhashLat(start)
- maxLon := geo.MortonUnhashLon(end)
- maxLat := geo.MortonUnhashLat(end)
-
- level := ((geo.GeoBits << 1) - res) >> 1
-
- within := res%document.GeoPrecisionStep == 0 &&
- geo.RectWithin(minLon, minLat, maxLon, maxLat,
- sminLon, sminLat, smaxLon, smaxLat)
- if within || (level == geoDetailLevel &&
- geo.RectIntersects(minLon, minLat, maxLon, maxLat,
- sminLon, sminLat, smaxLon, smaxLat)) {
- if !within && checkBoundaries {
- return [][]byte{
- numeric.MustNewPrefixCodedInt64(int64(start), res),
- }, nil
+ defer func() {
+ if fieldDict != nil {
+ if fd, ok := fieldDict.(index.FieldDict); ok {
+ cerr := fd.Close()
+ if cerr != nil {
+ err = cerr
+ }
+ }
}
- return nil,
- [][]byte{
- numeric.MustNewPrefixCodedInt64(int64(start), res),
+ }()
+
+ if isIndexed == nil {
+ isIndexed = func(term []byte) bool {
+ if indexReader != nil {
+ reader, err := indexReader.TermFieldReader(term, field, false, false, false)
+ if err != nil || reader == nil {
+ return false
+ }
+ if reader.Count() == 0 {
+ _ = reader.Close()
+ return false
+ }
+ _ = reader.Close()
}
- } else if level < geoDetailLevel &&
- geo.RectIntersects(minLon, minLat, maxLon, maxLat,
- sminLon, sminLat, smaxLon, smaxLat) {
- return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
- checkBoundaries)
+ return true
+ }
}
- return nil, nil
+
+ var computeGeoRange func(term uint64, shift uint) // declare for recursion
+
+ relateAndRecurse := func(start, end uint64, res, level uint) {
+ minLon := geo.MortonUnhashLon(start)
+ minLat := geo.MortonUnhashLat(start)
+ maxLon := geo.MortonUnhashLon(end)
+ maxLat := geo.MortonUnhashLat(end)
+
+ within := res%document.GeoPrecisionStep == 0 &&
+ geo.RectWithin(minLon, minLat, maxLon, maxLat,
+ sminLon, sminLat, smaxLon, smaxLat)
+ if within || (level == geoDetailLevel &&
+ geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+ sminLon, sminLat, smaxLon, smaxLat)) {
+ codedTerm := makePrefixCoded(int64(start), res)
+ if isIndexed(codedTerm) {
+ if !within && checkBoundaries {
+ onBoundary = append(onBoundary, codedTerm)
+ } else {
+ notOnBoundary = append(notOnBoundary, codedTerm)
+ }
+ }
+ } else if level < geoDetailLevel &&
+ geo.RectIntersects(minLon, minLat, maxLon, maxLat,
+ sminLon, sminLat, smaxLon, smaxLat) {
+ computeGeoRange(start, res-1)
+ }
+ }
+
+ computeGeoRange = func(term uint64, shift uint) {
+ if err != nil {
+ return
+ }
+
+ split := term | uint64(0x1)<<shift
+ var upperMax uint64
+ if shift < 63 {
+ upperMax = term | ((uint64(1) << (shift + 1)) - 1)
+ } else {
+ upperMax = 0xffffffffffffffff
+ }
+
+ lowerMax := split - 1
+
+ level := (GeoBitsShift1 - shift) >> 1
+
+ relateAndRecurse(term, lowerMax, shift, level)
+ relateAndRecurse(split, upperMax, shift, level)
+ }
+
+ computeGeoRange(term, shift)
+
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return onBoundary, notOnBoundary, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
index a15c194e86..b01ae6a0af 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
@@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
- field, boost, options)
+ field, boost, options, false)
if err != nil {
return nil, err
}
@@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
- field string, boost float64, options search.SearcherOptions) (
+ field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
- field, boost, options, false)
+ field, boost, options, checkBoundaries)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
- topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
+ topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
+ checkBoundaries)
if err != nil {
_ = leftSearcher.Close()
return nil, err
@@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
- options, false)
+ options, checkBoundaries)
if err != nil {
return nil, err
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
new file mode 100644
index 0000000000..3bb47519d0
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
@@ -0,0 +1,110 @@
+// Copyright (c) 2019 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+ "github.com/blevesearch/bleve/geo"
+ "github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/numeric"
+ "github.com/blevesearch/bleve/search"
+ "math"
+)
+
+func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
+ polygon []geo.Point, field string, boost float64,
+ options search.SearcherOptions) (search.Searcher, error) {
+
+ // compute the bounding box enclosing the polygon
+ topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
+ geo.BoundingRectangleForPolygon(polygon)
+ if err != nil {
+ return nil, err
+ }
+
+ // build a searcher for the bounding box on the polygon
+ boxSearcher, err := boxSearcher(indexReader,
+ topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
+ field, boost, options, true)
+ if err != nil {
+ return nil, err
+ }
+
+ dvReader, err := indexReader.DocValueReader([]string{field})
+ if err != nil {
+ return nil, err
+ }
+
+ // wrap it in a filtering searcher that checks for the polygon inclusivity
+ return NewFilteringSearcher(boxSearcher,
+ buildPolygonFilter(dvReader, field, polygon)), nil
+}
+
+const float64EqualityThreshold = 1e-6
+
+func almostEqual(a, b float64) bool {
+ return math.Abs(a-b) <= float64EqualityThreshold
+}
+
+// buildPolygonFilter returns true if the point lies inside the
+// polygon. It is based on the ray-casting technique as referred
+// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
+func buildPolygonFilter(dvReader index.DocValueReader, field string,
+ polygon []geo.Point) FilterFunc {
+ return func(d *search.DocumentMatch) bool {
+ var lon, lat float64
+ var found bool
+
+ err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+ // only consider the values which are shifted 0
+ prefixCoded := numeric.PrefixCoded(term)
+ shift, err := prefixCoded.Shift()
+ if err == nil && shift == 0 {
+ i64, err := prefixCoded.Int64()
+ if err == nil {
+ lon = geo.MortonUnhashLon(uint64(i64))
+ lat = geo.MortonUnhashLat(uint64(i64))
+ found = true
+ }
+ }
+ })
+
+ // Note: this approach works for points which are strictly inside
+ // the polygon. ie it might fail for certain points on the polygon boundaries.
+ if err == nil && found {
+ nVertices := len(polygon)
+ var inside bool
+ // check for a direct vertex match
+ if almostEqual(polygon[0].Lat, lat) &&
+ almostEqual(polygon[0].Lon, lon) {
+ return true
+ }
+
+ for i := 1; i < nVertices; i++ {
+ if almostEqual(polygon[i].Lat, lat) &&
+ almostEqual(polygon[i].Lon, lon) {
+ return true
+ }
+ if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
+ lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
+ (polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
+ inside = !inside
+ }
+ }
+ return inside
+
+ }
+ return false
+ }
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
index e52ef9a825..83107f0201 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
@@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
+
+ var fieldDict index.FieldDictContains
+ var isIndexed filterFunc
+ var err error
+ if irr, ok := indexReader.(index.IndexReaderContains); ok {
+ fieldDict, err = irr.FieldDictContains(field)
+ if err != nil {
+ return nil, err
+ }
+
+ isIndexed = func(term []byte) bool {
+ found, err := fieldDict.Contains(term)
+ return err == nil && found
+ }
+ }
+
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
- terms := termRanges.Enumerate()
+ terms := termRanges.Enumerate(isIndexed)
+ if fieldDict != nil {
+ if fd, ok := fieldDict.(index.FieldDict); ok {
+ cerr := fd.Close()
+ if cerr != nil {
+ err = cerr
+ }
+ }
+ }
+
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
- var err error
- terms, err = filterCandidateTerms(indexReader, terms, field)
- if err != nil {
- return nil, err
+
+ // for upside_down
+ if isIndexed == nil {
+ terms, err = filterCandidateTerms(indexReader, terms, field)
+ if err != nil {
+ return nil, err
+ }
}
+
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
@@ -125,11 +154,17 @@ type termRange struct {
endTerm []byte
}
-func (t *termRange) Enumerate() [][]byte {
+func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
- rv = append(rv, next)
+ if filter != nil {
+ if filter(next) {
+ rv = append(rv, next)
+ }
+ } else {
+ rv = append(rv, next)
+ }
next = incrementBytes(next)
}
return rv
@@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange
-func (tr termRanges) Enumerate() [][]byte {
+func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
for _, tri := range tr {
- trie := tri.Enumerate()
+ trie := tri.Enumerate(filter)
rv = append(rv, trie...)
}
return rv
diff --git a/vendor/github.com/blevesearch/bleve/search/sort.go b/vendor/github.com/blevesearch/bleve/search/sort.go
index e17f707879..6e4ed80fa2 100644
--- a/vendor/github.com/blevesearch/bleve/search/sort.go
+++ b/vendor/github.com/blevesearch/bleve/search/sort.go
@@ -38,6 +38,8 @@ type SearchSort interface {
RequiresScoring() bool
RequiresFields() []string
+ Reverse()
+
Copy() SearchSort
}
@@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
return rv
}
+func (so SortOrder) Reverse() {
+ for _, soi := range so {
+ soi.Reverse()
+ }
+}
+
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
@@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
return &rv
}
+func (s *SortField) Reverse() {
+ s.Desc = !s.Desc
+ if s.Missing == SortFieldMissingFirst {
+ s.Missing = SortFieldMissingLast
+ } else {
+ s.Missing = SortFieldMissingFirst
+ }
+}
+
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
@@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
return &rv
}
+func (s *SortDocID) Reverse() {
+ s.Desc = !s.Desc
+}
+
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
@@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
return &rv
}
+func (s *SortScore) Reverse() {
+ s.Desc = !s.Desc
+}
+
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
@@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
return &rv
}
+func (s *SortGeoDistance) Reverse() {
+ s.Desc = !s.Desc
+}
+
type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) }
diff --git a/vendor/github.com/blevesearch/segment/.travis.yml b/vendor/github.com/blevesearch/segment/.travis.yml
index d032f234ef..b9d58e7c18 100644
--- a/vendor/github.com/blevesearch/segment/.travis.yml
+++ b/vendor/github.com/blevesearch/segment/.travis.yml
@@ -1,10 +1,9 @@
language: go
go:
- - 1.4
+ - 1.7
script:
- - go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
- go test -v -covermode=count -coverprofile=profile.out