summaryrefslogtreecommitdiffstats
path: root/vendor
diff options
context:
space:
mode:
authorLunny Xiao <xiaolunwen@gmail.com>2019-02-18 08:50:26 +0800
committertechknowlogick <matti@mdranta.net>2019-02-17 19:50:26 -0500
commita380cfd8e03148a05859a7496d235fa14bde4796 (patch)
tree9ef2f4b66804e73e242d0d07fd30769898a0ca23 /vendor
parent11e316654e523bd668a20e1e6a95da3f5b9b22de (diff)
downloadgitea-a380cfd8e03148a05859a7496d235fa14bde4796.tar.gz
gitea-a380cfd8e03148a05859a7496d235fa14bde4796.zip
Update bleve dependency to latest master revision (#6100)
* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
Diffstat (limited to 'vendor')
-rw-r--r--vendor/github.com/Smerity/govarint/LICENSE22
-rw-r--r--vendor/github.com/Smerity/govarint/govarint.go229
-rw-r--r--vendor/github.com/blevesearch/bleve/analysis/freq.go41
-rw-r--r--vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/document/document.go29
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_boolean.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_composite.go25
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_datetime.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_geopoint.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_numeric.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/document/field_text.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/geohash.go174
-rw-r--r--vendor/github.com/blevesearch/bleve/geo/parse.go43
-rw-r--r--vendor/github.com/blevesearch/bleve/index.go35
-rw-r--r--vendor/github.com/blevesearch/bleve/index/analysis.go19
-rw-r--r--vendor/github.com/blevesearch/bleve/index/index.go124
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/introducer.go272
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/merge.go144
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go23
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/optimize.go420
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/persister.go285
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/reader.go110
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/scorch.go267
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go40
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go321
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go103
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go178
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go289
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go75
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go43
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go542
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go131
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go10
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go151
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go254
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go83
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go568
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go826
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go801
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go232
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go22
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go402
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go17
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go13
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go85
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go140
-rw-r--r--vendor/github.com/blevesearch/bleve/index/scorch/stats.go156
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go23
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go39
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/row.go31
-rw-r--r--vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go25
-rw-r--r--vendor/github.com/blevesearch/bleve/index_alias_impl.go1
-rw-r--r--vendor/github.com/blevesearch/bleve/index_impl.go107
-rw-r--r--vendor/github.com/blevesearch/bleve/index_meta.go3
-rw-r--r--vendor/github.com/blevesearch/bleve/mapping/document.go14
-rw-r--r--vendor/github.com/blevesearch/bleve/mapping/index.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/mapping/reflect.go3
-rw-r--r--vendor/github.com/blevesearch/bleve/numeric/bin.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go4
-rw-r--r--vendor/github.com/blevesearch/bleve/search.go77
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector.go20
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/heap.go4
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/list.go5
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/slice.go4
-rw-r--r--vendor/github.com/blevesearch/bleve/search/collector/topn.go147
-rw-r--r--vendor/github.com/blevesearch/bleve/search/explanation.go21
-rw-r--r--vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go29
-rw-r--r--vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go29
-rw-r--r--vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go21
-rw-r--r--vendor/github.com/blevesearch/bleve/search/facets_builder.go56
-rw-r--r--vendor/github.com/blevesearch/bleve/search/levenshtein.go17
-rw-r--r--vendor/github.com/blevesearch/bleve/search/pool.go11
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/conjunction.go2
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/disjunction.go11
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/query.go12
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go1
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/regexp.go37
-rw-r--r--vendor/github.com/blevesearch/bleve/search/query/wildcard.go23
-rw-r--r--vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go25
-rw-r--r--vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go19
-rw-r--r--vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go24
-rw-r--r--vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go84
-rw-r--r--vendor/github.com/blevesearch/bleve/search/search.go155
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go109
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go57
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go263
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go343
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go298
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go15
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go49
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go36
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go35
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_match_all.go16
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_match_none.go14
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go8
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go21
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go167
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go48
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_term.go51
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go11
-rw-r--r--vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go6
-rw-r--r--vendor/github.com/blevesearch/bleve/search/sort.go69
-rw-r--r--vendor/github.com/blevesearch/bleve/search/util.go27
-rw-r--r--vendor/github.com/blevesearch/bleve/size/sizes.go59
-rw-r--r--vendor/github.com/boltdb/bolt/bolt_mips64le.go11
-rw-r--r--vendor/github.com/boltdb/bolt/bolt_mipsle.go11
-rw-r--r--vendor/github.com/boltdb/bolt/freelist.go252
-rw-r--r--vendor/github.com/couchbase/vellum/automaton.go2
-rw-r--r--vendor/github.com/couchbase/vellum/builder.go159
-rw-r--r--vendor/github.com/couchbase/vellum/decoder_v1.go4
-rw-r--r--vendor/github.com/couchbase/vellum/fst.go50
-rw-r--r--vendor/github.com/couchbase/vellum/fst_iterator.go131
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/LICENSE203
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go125
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/dfa.go250
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go64
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go292
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go349
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/compile.go79
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/dfa.go38
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/inst.go2
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/regexp.go10
-rw-r--r--vendor/github.com/couchbase/vellum/registry.go36
-rw-r--r--vendor/github.com/couchbase/vellum/utf8/utf8.go98
-rw-r--r--vendor/github.com/etcd-io/bbolt/LICENSE (renamed from vendor/github.com/boltdb/bolt/LICENSE)0
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_386.go (renamed from vendor/github.com/boltdb/bolt/bolt_386.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_amd64.go (renamed from vendor/github.com/boltdb/bolt/bolt_amd64.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_arm.go (renamed from vendor/github.com/boltdb/bolt/bolt_arm.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_arm64.go (renamed from vendor/github.com/boltdb/bolt/bolt_arm64.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_linux.go (renamed from vendor/github.com/boltdb/bolt/bolt_linux.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_mips64x.go (renamed from vendor/github.com/boltdb/bolt/bolt_mips64.go)9
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_mipsx.go (renamed from vendor/github.com/boltdb/bolt/bolt_mips.go)7
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_openbsd.go (renamed from vendor/github.com/boltdb/bolt/bolt_openbsd.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_ppc.go (renamed from vendor/github.com/boltdb/bolt/bolt_ppc.go)5
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_ppc64.go (renamed from vendor/github.com/boltdb/bolt/bolt_ppc64.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go (renamed from vendor/github.com/boltdb/bolt/bolt_ppc64le.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_s390x.go (renamed from vendor/github.com/boltdb/bolt/bolt_s390x.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_unix.go (renamed from vendor/github.com/boltdb/bolt/bolt_unix.go)42
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go (renamed from vendor/github.com/boltdb/bolt/bolt_unix_solaris.go)44
-rw-r--r--vendor/github.com/etcd-io/bbolt/bolt_windows.go (renamed from vendor/github.com/boltdb/bolt/bolt_windows.go)57
-rw-r--r--vendor/github.com/etcd-io/bbolt/boltsync_unix.go (renamed from vendor/github.com/boltdb/bolt/boltsync_unix.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/bucket.go (renamed from vendor/github.com/boltdb/bolt/bucket.go)16
-rw-r--r--vendor/github.com/etcd-io/bbolt/cursor.go (renamed from vendor/github.com/boltdb/bolt/cursor.go)10
-rw-r--r--vendor/github.com/etcd-io/bbolt/db.go (renamed from vendor/github.com/boltdb/bolt/db.go)251
-rw-r--r--vendor/github.com/etcd-io/bbolt/doc.go (renamed from vendor/github.com/boltdb/bolt/doc.go)4
-rw-r--r--vendor/github.com/etcd-io/bbolt/errors.go (renamed from vendor/github.com/boltdb/bolt/errors.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/freelist.go370
-rw-r--r--vendor/github.com/etcd-io/bbolt/freelist_hmap.go178
-rw-r--r--vendor/github.com/etcd-io/bbolt/node.go (renamed from vendor/github.com/boltdb/bolt/node.go)4
-rw-r--r--vendor/github.com/etcd-io/bbolt/page.go (renamed from vendor/github.com/boltdb/bolt/page.go)2
-rw-r--r--vendor/github.com/etcd-io/bbolt/tx.go (renamed from vendor/github.com/boltdb/bolt/tx.go)85
159 files changed, 9816 insertions, 4125 deletions
diff --git a/vendor/github.com/Smerity/govarint/LICENSE b/vendor/github.com/Smerity/govarint/LICENSE
deleted file mode 100644
index be09cac865..0000000000
--- a/vendor/github.com/Smerity/govarint/LICENSE
+++ /dev/null
@@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Stephen Merity
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/vendor/github.com/Smerity/govarint/govarint.go b/vendor/github.com/Smerity/govarint/govarint.go
deleted file mode 100644
index 61328a337b..0000000000
--- a/vendor/github.com/Smerity/govarint/govarint.go
+++ /dev/null
@@ -1,229 +0,0 @@
-package govarint
-
-import "encoding/binary"
-import "io"
-
-type U32VarintEncoder interface {
- PutU32(x uint32) int
- Close()
-}
-
-type U32VarintDecoder interface {
- GetU32() (uint32, error)
-}
-
-///
-
-type U64VarintEncoder interface {
- PutU64(x uint64) int
- Close()
-}
-
-type U64VarintDecoder interface {
- GetU64() (uint64, error)
-}
-
-///
-
-type U32GroupVarintEncoder struct {
- w io.Writer
- index int
- store [4]uint32
- temp [17]byte
-}
-
-func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} }
-
-func (b *U32GroupVarintEncoder) Flush() (int, error) {
- // TODO: Is it more efficient to have a tailored version that's called only in Close()?
- // If index is zero, there are no integers to flush
- if b.index == 0 {
- return 0, nil
- }
- // In the case we're flushing (the group isn't of size four), the non-values should be zero
- // This ensures the unused entries are all zero in the sizeByte
- for i := b.index; i < 4; i++ {
- b.store[i] = 0
- }
- length := 1
- // We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it
- b.temp[0] = 0
- for i, x := range b.store {
- size := byte(0)
- shifts := []byte{24, 16, 8, 0}
- for _, shift := range shifts {
- // Always writes at least one byte -- the first one (shift = 0)
- // Will write more bytes until the rest of the integer is all zeroes
- if (x>>shift) != 0 || shift == 0 {
- size += 1
- b.temp[length] = byte(x >> shift)
- length += 1
- }
- }
- // We store the size in two of the eight bits in the first byte (sizeByte)
- // 0 means there is one byte in total, hence why we subtract one from size
- b.temp[0] |= (size - 1) << (uint8(3-i) * 2)
- }
- // If we're flushing without a full group of four, remove the unused bytes we computed
- // This enables us to realize it's a partial group on decoding thanks to EOF
- if b.index != 4 {
- length -= 4 - b.index
- }
- _, err := b.w.Write(b.temp[:length])
- return length, err
-}
-
-func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) {
- bytesWritten := 0
- b.store[b.index] = x
- b.index += 1
- if b.index == 4 {
- n, err := b.Flush()
- if err != nil {
- return n, err
- }
- bytesWritten += n
- b.index = 0
- }
- return bytesWritten, nil
-}
-
-func (b *U32GroupVarintEncoder) Close() {
- // On Close, we flush any remaining values that might not have been in a full group
- b.Flush()
-}
-
-///
-
-type U32GroupVarintDecoder struct {
- r io.ByteReader
- group [4]uint32
- pos int
- finished bool
- capacity int
-}
-
-func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder {
- return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4}
-}
-
-func (b *U32GroupVarintDecoder) getGroup() error {
- // We should always receive a sizeByte if there are more values to read
- sizeByte, err := b.r.ReadByte()
- if err != nil {
- return err
- }
- // Calculate the size of the four incoming 32 bit integers
- // 0b00 means 1 byte to read, 0b01 = 2, etc
- b.group[0] = uint32((sizeByte >> 6) & 3)
- b.group[1] = uint32((sizeByte >> 4) & 3)
- b.group[2] = uint32((sizeByte >> 2) & 3)
- b.group[3] = uint32(sizeByte & 3)
- //
- for index, size := range b.group {
- b.group[index] = 0
- // Any error that occurs in earlier byte reads should be repeated at the end one
- // Hence we only catch and report the final ReadByte's error
- var err error
- switch size {
- case 0:
- var x byte
- x, err = b.r.ReadByte()
- b.group[index] = uint32(x)
- case 1:
- var x, y byte
- x, _ = b.r.ReadByte()
- y, err = b.r.ReadByte()
- b.group[index] = uint32(x)<<8 | uint32(y)
- case 2:
- var x, y, z byte
- x, _ = b.r.ReadByte()
- y, _ = b.r.ReadByte()
- z, err = b.r.ReadByte()
- b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z)
- case 3:
- var x, y, z, zz byte
- x, _ = b.r.ReadByte()
- y, _ = b.r.ReadByte()
- z, _ = b.r.ReadByte()
- zz, err = b.r.ReadByte()
- b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz)
- }
- if err != nil {
- if err == io.EOF {
- // If we hit EOF here, we have found a partial group
- // We've return any valid entries we have read and return EOF once we run out
- b.capacity = index
- b.finished = true
- break
- } else {
- return err
- }
- }
- }
- // Reset the pos pointer to the beginning of the read values
- b.pos = 0
- return nil
-}
-
-func (b *U32GroupVarintDecoder) GetU32() (uint32, error) {
- // Check if we have any more values to give out - if not, let's get them
- if b.pos == b.capacity {
- // If finished is set, there is nothing else to do
- if b.finished {
- return 0, io.EOF
- }
- err := b.getGroup()
- if err != nil {
- return 0, err
- }
- }
- // Increment pointer and return the value stored at that point
- b.pos += 1
- return b.group[b.pos-1], nil
-}
-
-///
-
-type Base128Encoder struct {
- w io.Writer
- tmpBytes []byte
-}
-
-func NewU32Base128Encoder(w io.Writer) *Base128Encoder {
- return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)}
-}
-func NewU64Base128Encoder(w io.Writer) *Base128Encoder {
- return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)}
-}
-
-func (b *Base128Encoder) PutU32(x uint32) (int, error) {
- writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x))
- return b.w.Write(b.tmpBytes[:writtenBytes])
-}
-
-func (b *Base128Encoder) PutU64(x uint64) (int, error) {
- writtenBytes := binary.PutUvarint(b.tmpBytes, x)
- return b.w.Write(b.tmpBytes[:writtenBytes])
-}
-
-func (b *Base128Encoder) Close() {
-}
-
-///
-
-type Base128Decoder struct {
- r io.ByteReader
-}
-
-func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
-func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
-
-func (b *Base128Decoder) GetU32() (uint32, error) {
- v, err := binary.ReadUvarint(b.r)
- return uint32(v), err
-}
-
-func (b *Base128Decoder) GetU64() (uint64, error) {
- return binary.ReadUvarint(b.r)
-}
diff --git a/vendor/github.com/blevesearch/bleve/analysis/freq.go b/vendor/github.com/blevesearch/bleve/analysis/freq.go
index e1ca2cd6fd..198c149b2b 100644
--- a/vendor/github.com/blevesearch/bleve/analysis/freq.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/freq.go
@@ -14,6 +14,22 @@
package analysis
+import (
+ "reflect"
+
+ "github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeTokenLocation int
+var reflectStaticSizeTokenFreq int
+
+func init() {
+ var tl TokenLocation
+ reflectStaticSizeTokenLocation = int(reflect.TypeOf(tl).Size())
+ var tf TokenFreq
+ reflectStaticSizeTokenFreq = int(reflect.TypeOf(tf).Size())
+}
+
// TokenLocation represents one occurrence of a term at a particular location in
// a field. Start, End and Position have the same meaning as in analysis.Token.
// Field and ArrayPositions identify the field value in the source document.
@@ -26,6 +42,12 @@ type TokenLocation struct {
Position int
}
+func (tl *TokenLocation) Size() int {
+ rv := reflectStaticSizeTokenLocation
+ rv += len(tl.ArrayPositions) * size.SizeOfUint64
+ return rv
+}
+
// TokenFreq represents all the occurrences of a term in all fields of a
// document.
type TokenFreq struct {
@@ -34,6 +56,15 @@ type TokenFreq struct {
frequency int
}
+func (tf *TokenFreq) Size() int {
+ rv := reflectStaticSizeTokenFreq
+ rv += len(tf.Term)
+ for _, loc := range tf.Locations {
+ rv += loc.Size()
+ }
+ return rv
+}
+
func (tf *TokenFreq) Frequency() int {
return tf.frequency
}
@@ -42,6 +73,16 @@ func (tf *TokenFreq) Frequency() int {
// fields.
type TokenFrequencies map[string]*TokenFreq
+func (tfs TokenFrequencies) Size() int {
+ rv := size.SizeOfMap
+ rv += len(tfs) * (size.SizeOfString + size.SizeOfPtr)
+ for k, v := range tfs {
+ rv += len(k)
+ rv += v.Size()
+ }
+ return rv
+}
+
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
// walk the new token frequencies
for tfk, tf := range other {
diff --git a/vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go b/vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
index d691e56463..ff4ce2fea7 100644
--- a/vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
@@ -46,11 +46,11 @@ type Parser struct {
index int
}
-func NewParser(len, position, index int) *Parser {
+func NewParser(length, position, index int) *Parser {
return &Parser{
- bufferLen: len,
- buffer: make([]rune, 0, len),
- tokens: make([]*analysis.Token, 0, len),
+ bufferLen: length,
+ buffer: make([]rune, 0, length),
+ tokens: make([]*analysis.Token, 0, length),
position: position,
index: index,
}
diff --git a/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go b/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
index f0d96c5048..c60e8c9793 100644
--- a/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
@@ -21,7 +21,7 @@ import (
const Name = "unique"
-// UniqueTermFilter retains only the tokens which mark the first occurence of
+// UniqueTermFilter retains only the tokens which mark the first occurrence of
// a term. Tokens whose term appears in a preceding token are dropped.
type UniqueTermFilter struct{}
diff --git a/vendor/github.com/blevesearch/bleve/document/document.go b/vendor/github.com/blevesearch/bleve/document/document.go
index c37585c661..6ac17b9ab7 100644
--- a/vendor/github.com/blevesearch/bleve/document/document.go
+++ b/vendor/github.com/blevesearch/bleve/document/document.go
@@ -14,7 +14,19 @@
package document
-import "fmt"
+import (
+ "fmt"
+ "reflect"
+
+ "github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDocument int
+
+func init() {
+ var d Document
+ reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
+}
type Document struct {
ID string `json:"id"`
@@ -30,6 +42,21 @@ func NewDocument(id string) *Document {
}
}
+func (d *Document) Size() int {
+ sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
+ len(d.ID)
+
+ for _, entry := range d.Fields {
+ sizeInBytes += entry.Size()
+ }
+
+ for _, entry := range d.CompositeFields {
+ sizeInBytes += entry.Size()
+ }
+
+ return sizeInBytes
+}
+
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:
diff --git a/vendor/github.com/blevesearch/bleve/document/field.go b/vendor/github.com/blevesearch/bleve/document/field.go
index c17f81e5d4..2fe9166985 100644
--- a/vendor/github.com/blevesearch/bleve/document/field.go
+++ b/vendor/github.com/blevesearch/bleve/document/field.go
@@ -36,4 +36,6 @@ type Field interface {
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
+
+ Size() int
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_boolean.go b/vendor/github.com/blevesearch/bleve/document/field_boolean.go
index c226374c07..6864b16f44 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_boolean.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_boolean.go
@@ -16,10 +16,19 @@ package document
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeBooleanField int
+
+func init() {
+ var f BooleanField
+ reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
+}
+
const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
type BooleanField struct {
@@ -30,6 +39,13 @@ type BooleanField struct {
numPlainTextBytes uint64
}
+func (b *BooleanField) Size() int {
+ return reflectStaticSizeBooleanField + size.SizeOfPtr +
+ len(b.name) +
+ len(b.arrayPositions)*size.SizeOfUint64 +
+ len(b.value)
+}
+
func (b *BooleanField) Name() string {
return b.name
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_composite.go b/vendor/github.com/blevesearch/bleve/document/field_composite.go
index b41b1b8ed9..a8285880fd 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_composite.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_composite.go
@@ -15,9 +15,19 @@
package document
import (
+ "reflect"
+
"github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeCompositeField int
+
+func init() {
+ var cf CompositeField
+ reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
+}
+
const DefaultCompositeIndexingOptions = IndexField
type CompositeField struct {
@@ -54,6 +64,21 @@ func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, incl
return rv
}
+func (c *CompositeField) Size() int {
+ sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
+ len(c.name)
+
+ for k, _ := range c.includedFields {
+ sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
+ }
+
+ for k, _ := range c.excludedFields {
+ sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
+ }
+
+ return sizeInBytes
+}
+
func (c *CompositeField) Name() string {
return c.name
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_datetime.go b/vendor/github.com/blevesearch/bleve/document/field_datetime.go
index 1db068c87b..583b44cdeb 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_datetime.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_datetime.go
@@ -17,12 +17,21 @@ package document
import (
"fmt"
"math"
+ "reflect"
"time"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeDateTimeField int
+
+func init() {
+ var f DateTimeField
+ reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
+}
+
const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
const DefaultDateTimePrecisionStep uint = 4
@@ -37,6 +46,12 @@ type DateTimeField struct {
numPlainTextBytes uint64
}
+func (n *DateTimeField) Size() int {
+ return reflectStaticSizeDateTimeField + size.SizeOfPtr +
+ len(n.name) +
+ len(n.arrayPositions)*size.SizeOfUint64
+}
+
func (n *DateTimeField) Name() string {
return n.name
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_geopoint.go b/vendor/github.com/blevesearch/bleve/document/field_geopoint.go
index f508b36254..91fe23f96e 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_geopoint.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_geopoint.go
@@ -16,12 +16,21 @@ package document
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeGeoPointField int
+
+func init() {
+ var f GeoPointField
+ reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
+}
+
var GeoPrecisionStep uint = 9
type GeoPointField struct {
@@ -32,6 +41,12 @@ type GeoPointField struct {
numPlainTextBytes uint64
}
+func (n *GeoPointField) Size() int {
+ return reflectStaticSizeGeoPointField + size.SizeOfPtr +
+ len(n.name) +
+ len(n.arrayPositions)*size.SizeOfUint64
+}
+
func (n *GeoPointField) Name() string {
return n.name
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_numeric.go b/vendor/github.com/blevesearch/bleve/document/field_numeric.go
index e32993c887..46c685e84e 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_numeric.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_numeric.go
@@ -16,11 +16,20 @@ package document
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeNumericField int
+
+func init() {
+ var f NumericField
+ reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
+}
+
const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
const DefaultPrecisionStep uint = 4
@@ -33,6 +42,12 @@ type NumericField struct {
numPlainTextBytes uint64
}
+func (n *NumericField) Size() int {
+ return reflectStaticSizeNumericField + size.SizeOfPtr +
+ len(n.name) +
+ len(n.arrayPositions)*size.SizeOfPtr
+}
+
func (n *NumericField) Name() string {
return n.name
}
diff --git a/vendor/github.com/blevesearch/bleve/document/field_text.go b/vendor/github.com/blevesearch/bleve/document/field_text.go
index 5f7a3ab648..c8e871c9d5 100644
--- a/vendor/github.com/blevesearch/bleve/document/field_text.go
+++ b/vendor/github.com/blevesearch/bleve/document/field_text.go
@@ -16,10 +16,19 @@ package document
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTextField int
+
+func init() {
+ var f TextField
+ reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
+}
+
const DefaultTextIndexingOptions = IndexField | DocValues
type TextField struct {
@@ -31,6 +40,13 @@ type TextField struct {
numPlainTextBytes uint64
}
+func (t *TextField) Size() int {
+ return reflectStaticSizeTextField + size.SizeOfPtr +
+ len(t.name) +
+ len(t.arrayPositions)*size.SizeOfUint64 +
+ len(t.value)
+}
+
func (t *TextField) Name() string {
return t.name
}
diff --git a/vendor/github.com/blevesearch/bleve/geo/geohash.go b/vendor/github.com/blevesearch/bleve/geo/geohash.go
new file mode 100644
index 0000000000..35db720c0f
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/geo/geohash.go
@@ -0,0 +1,174 @@
+// The code here was obtained from:
+// https://github.com/mmcloughlin/geohash
+
+// The MIT License (MIT)
+// Copyright (c) 2015 Michael McLoughlin
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package geo
+
+import (
+ "math"
+)
+
+// encoding encapsulates an encoding defined by a given base32 alphabet.
+type encoding struct {
+ enc string
+ dec [256]byte
+}
+
+// newEncoding constructs a new encoding defined by the given alphabet,
+// which must be a 32-byte string.
+func newEncoding(encoder string) *encoding {
+ e := new(encoding)
+ e.enc = encoder
+ for i := 0; i < len(e.dec); i++ {
+ e.dec[i] = 0xff
+ }
+ for i := 0; i < len(encoder); i++ {
+ e.dec[encoder[i]] = byte(i)
+ }
+ return e
+}
+
+// Decode string into bits of a 64-bit word. The string s may be at most 12
+// characters.
+func (e *encoding) decode(s string) uint64 {
+ x := uint64(0)
+ for i := 0; i < len(s); i++ {
+ x = (x << 5) | uint64(e.dec[s[i]])
+ }
+ return x
+}
+
+// Encode bits of 64-bit word into a string.
+func (e *encoding) encode(x uint64) string {
+ b := [12]byte{}
+ for i := 0; i < 12; i++ {
+ b[11-i] = e.enc[x&0x1f]
+ x >>= 5
+ }
+ return string(b[:])
+}
+
+// Base32Encoding with the Geohash alphabet.
+var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
+
+// BoundingBox returns the region encoded by the given string geohash.
+func geoBoundingBox(hash string) geoBox {
+ bits := uint(5 * len(hash))
+ inthash := base32encoding.decode(hash)
+ return geoBoundingBoxIntWithPrecision(inthash, bits)
+}
+
+// Box represents a rectangle in latitude/longitude space.
+type geoBox struct {
+ minLat float64
+ maxLat float64
+ minLng float64
+ maxLng float64
+}
+
+// Round returns a point inside the box, making an effort to round to minimal
+// precision.
+func (b geoBox) round() (lat, lng float64) {
+ x := maxDecimalPower(b.maxLat - b.minLat)
+ lat = math.Ceil(b.minLat/x) * x
+ x = maxDecimalPower(b.maxLng - b.minLng)
+ lng = math.Ceil(b.minLng/x) * x
+ return
+}
+
+// precalculated for performance
+var exp232 = math.Exp2(32)
+
+// errorWithPrecision returns the error range in latitude and longitude for in
+// integer geohash with bits of precision.
+func errorWithPrecision(bits uint) (latErr, lngErr float64) {
+ b := int(bits)
+ latBits := b / 2
+ lngBits := b - latBits
+ latErr = math.Ldexp(180.0, -latBits)
+ lngErr = math.Ldexp(360.0, -lngBits)
+ return
+}
+
+// minDecimalPlaces returns the minimum number of decimal places such that
+// there must exist an number with that many places within any range of width
+// r. This is intended for returning minimal precision coordinates inside a
+// box.
+func maxDecimalPower(r float64) float64 {
+ m := int(math.Floor(math.Log10(r)))
+ return math.Pow10(m)
+}
+
+// Encode the position of x within the range -r to +r as a 32-bit integer.
+func encodeRange(x, r float64) uint32 {
+ p := (x + r) / (2 * r)
+ return uint32(p * exp232)
+}
+
+// Decode the 32-bit range encoding X back to a value in the range -r to +r.
+func decodeRange(X uint32, r float64) float64 {
+ p := float64(X) / exp232
+ x := 2*r*p - r
+ return x
+}
+
+// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
+// ignored, and may take any value.
+func squash(X uint64) uint32 {
+ X &= 0x5555555555555555
+ X = (X | (X >> 1)) & 0x3333333333333333
+ X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
+ X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
+ X = (X | (X >> 8)) & 0x0000ffff0000ffff
+ X = (X | (X >> 16)) & 0x00000000ffffffff
+ return uint32(X)
+}
+
+// Deinterleave the bits of X into 32-bit words containing the even and odd
+// bitlevels of X, respectively.
+func deinterleave(X uint64) (uint32, uint32) {
+ return squash(X), squash(X >> 1)
+}
+
+// BoundingBoxIntWithPrecision returns the region encoded by the integer
+// geohash with the specified precision.
+func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
+ fullHash := hash << (64 - bits)
+ latInt, lngInt := deinterleave(fullHash)
+ lat := decodeRange(latInt, 90)
+ lng := decodeRange(lngInt, 180)
+ latErr, lngErr := errorWithPrecision(bits)
+ return geoBox{
+ minLat: lat,
+ maxLat: lat + latErr,
+ minLng: lng,
+ maxLng: lng + lngErr,
+ }
+}
+
+// ----------------------------------------------------------------------
+
+// Decode the string geohash to a (lat, lng) point.
+func GeoHashDecode(hash string) (lat, lng float64) {
+ box := geoBoundingBox(hash)
+ return box.round()
+}
diff --git a/vendor/github.com/blevesearch/bleve/geo/parse.go b/vendor/github.com/blevesearch/bleve/geo/parse.go
index 04a57538d6..0511fea7b6 100644
--- a/vendor/github.com/blevesearch/bleve/geo/parse.go
+++ b/vendor/github.com/blevesearch/bleve/geo/parse.go
@@ -16,6 +16,7 @@ package geo
import (
"reflect"
+ "strconv"
"strings"
)
@@ -24,6 +25,8 @@ import (
// Container:
// slice length 2 (GeoJSON)
// first element lon, second element lat
+// string (coordinates separated by comma, or a geohash)
+// first element lat, second element lon
// map[string]interface{}
// exact keys lat and lon or lng
// struct
@@ -36,10 +39,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing)
+ if !thingVal.IsValid() {
+ return lon, lat, false
+ }
+
thingTyp := thingVal.Type()
// is it a slice
- if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
+ if thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
first := thingVal.Index(0)
@@ -55,6 +62,35 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
}
+ // is it a string
+ if thingVal.Kind() == reflect.String {
+ geoStr := thingVal.Interface().(string)
+ if strings.Contains(geoStr, ",") {
+ // geo point with coordinates split by comma
+ points := strings.Split(geoStr, ",")
+ for i, point := range points {
+ // trim any leading or trailing white spaces
+ points[i] = strings.TrimSpace(point)
+ }
+ if len(points) == 2 {
+ var err error
+ lat, err = strconv.ParseFloat(points[0], 64)
+ if err == nil {
+ foundLat = true
+ }
+ lon, err = strconv.ParseFloat(points[1], 64)
+ if err == nil {
+ foundLon = true
+ }
+ }
+ } else {
+ // geohash
+ lat, lon = GeoHashDecode(geoStr)
+ foundLat = true
+ foundLon = true
+ }
+ }
+
// is it a map
if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok {
@@ -68,7 +104,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
// now try reflection on struct fields
- if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
+ if thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
@@ -113,6 +149,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
// extract numeric value (if possible) and returns a float64
func extractNumericVal(v interface{}) (float64, bool) {
val := reflect.ValueOf(v)
+ if !val.IsValid() {
+ return 0, false
+ }
typ := val.Type()
switch typ.Kind() {
case reflect.Float32, reflect.Float64:
diff --git a/vendor/github.com/blevesearch/bleve/index.go b/vendor/github.com/blevesearch/bleve/index.go
index ea7b3832ac..99357eee01 100644
--- a/vendor/github.com/blevesearch/bleve/index.go
+++ b/vendor/github.com/blevesearch/bleve/index.go
@@ -21,6 +21,7 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/mapping"
+ "github.com/blevesearch/bleve/size"
)
// A Batch groups together multiple Index and Delete
@@ -32,6 +33,9 @@ import (
type Batch struct {
index Index
internal *index.Batch
+
+ lastDocSize uint64
+ totalSize uint64
}
// Index adds the specified index operation to the
@@ -47,9 +51,22 @@ func (b *Batch) Index(id string, data interface{}) error {
return err
}
b.internal.Update(doc)
+
+ b.lastDocSize = uint64(doc.Size() +
+ len(id) + size.SizeOfString) // overhead from internal
+ b.totalSize += b.lastDocSize
+
return nil
}
+func (b *Batch) LastDocSize() uint64 {
+ return b.lastDocSize
+}
+
+func (b *Batch) TotalDocsSize() uint64 {
+ return b.totalSize
+}
+
// IndexAdvanced adds the specified index operation to the
// batch which skips the mapping. NOTE: the bleve Index is not updated
// until the batch is executed.
@@ -102,6 +119,24 @@ func (b *Batch) Reset() {
b.internal.Reset()
}
+func (b *Batch) Merge(o *Batch) {
+ if o != nil && o.internal != nil {
+ b.internal.Merge(o.internal)
+ if o.LastDocSize() > 0 {
+ b.lastDocSize = o.LastDocSize()
+ }
+ b.totalSize = uint64(b.internal.TotalDocSize())
+ }
+}
+
+func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
+ b.internal.SetPersistedCallback(f)
+}
+
+func (b *Batch) PersistedCallback() index.BatchCallback {
+ return b.internal.PersistedCallback()
+}
+
// An Index implements all the indexing and searching
// capabilities of bleve. An Index can be created
// using the New() and Open() methods.
diff --git a/vendor/github.com/blevesearch/bleve/index/analysis.go b/vendor/github.com/blevesearch/bleve/index/analysis.go
index 840dad97ae..82883af019 100644
--- a/vendor/github.com/blevesearch/bleve/index/analysis.go
+++ b/vendor/github.com/blevesearch/bleve/index/analysis.go
@@ -15,10 +15,20 @@
package index
import (
+ "reflect"
+
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeAnalysisResult int
+
+func init() {
+ var ar AnalysisResult
+ reflectStaticSizeAnalysisResult = int(reflect.TypeOf(ar).Size())
+}
+
type IndexRow interface {
KeySize() int
KeyTo([]byte) (int, error)
@@ -39,6 +49,15 @@ type AnalysisResult struct {
Length []int
}
+func (a *AnalysisResult) Size() int {
+ rv := reflectStaticSizeAnalysisResult
+ for _, analyzedI := range a.Analyzed {
+ rv += analyzedI.Size()
+ }
+ rv += len(a.Length) * size.SizeOfInt
+ return rv
+}
+
type AnalysisWork struct {
i Index
d *document.Document
diff --git a/vendor/github.com/blevesearch/bleve/index/index.go b/vendor/github.com/blevesearch/bleve/index/index.go
index 9870b41726..6aa444cfd8 100644
--- a/vendor/github.com/blevesearch/bleve/index/index.go
+++ b/vendor/github.com/blevesearch/bleve/index/index.go
@@ -18,11 +18,23 @@ import (
"bytes"
"encoding/json"
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTermFieldDoc int
+var reflectStaticSizeTermFieldVector int
+
+func init() {
+ var tfd TermFieldDoc
+ reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
+ var tfv TermFieldVector
+ reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
+}
+
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
type Index interface {
@@ -68,6 +80,8 @@ type IndexReader interface {
Document(id string) (*document.Document, error)
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
+ DocValueReader(fields []string) (DocValueReader, error)
+
Fields() ([]string, error)
GetInternal(key []byte) ([]byte, error)
@@ -84,6 +98,29 @@ type IndexReader interface {
Close() error
}
+// The Regexp interface defines the subset of the regexp.Regexp API
+// methods that are used by bleve indexes, allowing callers to pass in
+// alternate implementations.
+type Regexp interface {
+ FindStringIndex(s string) (loc []int)
+
+ LiteralPrefix() (prefix string, complete bool)
+
+ String() string
+}
+
+type IndexReaderRegexp interface {
+ FieldDictRegexp(field string, regex string) (FieldDict, error)
+}
+
+type IndexReaderFuzzy interface {
+ FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
+}
+
+type IndexReaderOnly interface {
+ FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
+}
+
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@@ -115,6 +152,11 @@ type TermFieldVector struct {
End uint64
}
+func (tfv *TermFieldVector) Size() int {
+ return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
+ len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
+}
+
// IndexInternalID is an opaque document identifier interal to the index impl
type IndexInternalID []byte
@@ -134,14 +176,27 @@ type TermFieldDoc struct {
Vectors []*TermFieldVector
}
+func (tfd *TermFieldDoc) Size() int {
+ sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
+ len(tfd.Term) + len(tfd.ID)
+
+ for _, entry := range tfd.Vectors {
+ sizeInBytes += entry.Size()
+ }
+
+ return sizeInBytes
+}
+
// Reset allows an already allocated TermFieldDoc to be reused
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
// remember the []byte used for the ID
id := tfd.ID
+ vectors := tfd.Vectors
// idiom to copy over from empty TermFieldDoc (0 allocations)
*tfd = TermFieldDoc{}
// reuse the []byte already allocated (and reset len to 0)
tfd.ID = id[:0]
+ tfd.Vectors = vectors[:0]
return tfd
}
@@ -161,6 +216,8 @@ type TermFieldReader interface {
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
+
+ Size() int
}
type DictEntry struct {
@@ -185,12 +242,18 @@ type DocIDReader interface {
// will start there instead. If ID is greater than or equal to the end of
// the range, Next() call will return io.EOF.
Advance(ID IndexInternalID) (IndexInternalID, error)
+
+ Size() int
+
Close() error
}
+type BatchCallback func(error)
+
type Batch struct {
- IndexOps map[string]*document.Document
- InternalOps map[string][]byte
+ IndexOps map[string]*document.Document
+ InternalOps map[string][]byte
+ persistedCallback BatchCallback
}
func NewBatch() *Batch {
@@ -216,6 +279,14 @@ func (b *Batch) DeleteInternal(key []byte) {
b.InternalOps[string(key)] = nil
}
+func (b *Batch) SetPersistedCallback(f BatchCallback) {
+ b.persistedCallback = f
+}
+
+func (b *Batch) PersistedCallback() BatchCallback {
+ return b.persistedCallback
+}
+
func (b *Batch) String() string {
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
for k, v := range b.IndexOps {
@@ -238,4 +309,53 @@ func (b *Batch) String() string {
func (b *Batch) Reset() {
b.IndexOps = make(map[string]*document.Document)
b.InternalOps = make(map[string][]byte)
+ b.persistedCallback = nil
+}
+
+func (b *Batch) Merge(o *Batch) {
+ for k, v := range o.IndexOps {
+ b.IndexOps[k] = v
+ }
+ for k, v := range o.InternalOps {
+ b.InternalOps[k] = v
+ }
+}
+
+func (b *Batch) TotalDocSize() int {
+ var s int
+ for k, v := range b.IndexOps {
+ if v != nil {
+ s += v.Size() + size.SizeOfString
+ }
+ s += len(k)
+ }
+ return s
+}
+
+// Optimizable represents an optional interface that implementable by
+// optimizable resources (e.g., TermFieldReaders, Searchers). These
+// optimizable resources are provided the same OptimizableContext
+// instance, so that they can coordinate via dynamic interface
+// casting.
+type Optimizable interface {
+ Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
+}
+
+// Represents a result of optimization -- see the Finish() method.
+type Optimized interface{}
+
+type OptimizableContext interface {
+ // Once all the optimzable resources have been provided the same
+ // OptimizableContext instance, the optimization preparations are
+ // finished or completed via the Finish() method.
+ //
+ // Depending on the optimization being performed, the Finish()
+ // method might return a non-nil Optimized instance. For example,
+ // the Optimized instance might represent an optimized
+ // TermFieldReader instance.
+ Finish() (Optimized, error)
+}
+
+type DocValueReader interface {
+ VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
index 1a7d656ca7..2d04bd38e5 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
@@ -19,7 +19,9 @@ import (
"sync/atomic"
"github.com/RoaringBitmap/roaring"
+ "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/index/scorch/segment/zap"
)
type segmentIntroduction struct {
@@ -29,8 +31,14 @@ type segmentIntroduction struct {
ids []string
internal map[string][]byte
- applied chan error
- persisted chan error
+ applied chan error
+ persisted chan error
+ persistedCallback index.BatchCallback
+}
+
+type persistIntroduction struct {
+ persisted map[uint64]segment.Segment
+ applied notificationChan
}
type epochWatcher struct {
@@ -48,6 +56,8 @@ func (s *Scorch) mainLoop() {
var epochWatchers []*epochWatcher
OUTER:
for {
+ atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
+
select {
case <-s.closeCh:
break OUTER
@@ -64,6 +74,9 @@ OUTER:
continue OUTER
}
+ case persist := <-s.persists:
+ s.introducePersist(persist)
+
case revertTo := <-s.revertToSnapshots:
err := s.revertToSnapshot(revertTo)
if err != nil {
@@ -92,32 +105,38 @@ OUTER:
}
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
- // acquire lock
- s.rootLock.Lock()
+ atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
+ defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
+
+ s.rootLock.RLock()
+ root := s.root
+ root.AddRef()
+ s.rootLock.RUnlock()
+
+ defer func() { _ = root.DecRef() }()
- nsegs := len(s.root.segment)
+ nsegs := len(root.segment)
// prepare new index snapshot
newSnapshot := &IndexSnapshot{
parent: s,
segment: make([]*SegmentSnapshot, 0, nsegs+1),
offsets: make([]uint64, 0, nsegs+1),
- internal: make(map[string][]byte, len(s.root.internal)),
- epoch: s.nextSnapshotEpoch,
+ internal: make(map[string][]byte, len(root.internal)),
refs: 1,
+ creator: "introduceSegment",
}
- s.nextSnapshotEpoch++
// iterate through current segments
var running uint64
- for i := range s.root.segment {
+ var docsToPersistCount, memSegments, fileSegments uint64
+ for i := range root.segment {
// see if optimistic work included this segment
- delta, ok := next.obsoletes[s.root.segment[i].id]
+ delta, ok := next.obsoletes[root.segment[i].id]
if !ok {
var err error
- delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
+ delta, err = root.segment[i].segment.DocNumbers(next.ids)
if err != nil {
- s.rootLock.Unlock()
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
close(next.applied)
_ = newSnapshot.DecRef()
@@ -126,43 +145,60 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
}
newss := &SegmentSnapshot{
- id: s.root.segment[i].id,
- segment: s.root.segment[i].segment,
- cachedDocs: s.root.segment[i].cachedDocs,
+ id: root.segment[i].id,
+ segment: root.segment[i].segment,
+ cachedDocs: root.segment[i].cachedDocs,
+ creator: root.segment[i].creator,
}
// apply new obsoletions
- if s.root.segment[i].deleted == nil {
+ if root.segment[i].deleted == nil {
newss.deleted = delta
} else {
- newss.deleted = roaring.Or(s.root.segment[i].deleted, delta)
+ newss.deleted = roaring.Or(root.segment[i].deleted, delta)
+ }
+ if newss.deleted.IsEmpty() {
+ newss.deleted = nil
}
// check for live size before copying
if newss.LiveSize() > 0 {
newSnapshot.segment = append(newSnapshot.segment, newss)
- s.root.segment[i].segment.AddRef()
+ root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running)
- running += s.root.segment[i].Count()
+ running += newss.segment.Count()
+ }
+
+ if isMemorySegment(root.segment[i]) {
+ docsToPersistCount += root.segment[i].Count()
+ memSegments++
+ } else {
+ fileSegments++
}
}
+ atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+ atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+ atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
+
// append new segment, if any, to end of the new index snapshot
if next.data != nil {
newSegmentSnapshot := &SegmentSnapshot{
id: next.id,
segment: next.data, // take ownership of next.data's ref-count
cachedDocs: &cachedDocs{cache: nil},
+ creator: "introduceSegment",
}
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
newSnapshot.offsets = append(newSnapshot.offsets, running)
// increment numItemsIntroduced which tracks the number of items
// queued for persistence.
- atomic.AddUint64(&s.stats.numItemsIntroduced, newSegmentSnapshot.Count())
+ atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
+ atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
}
// copy old values
- for key, oldVal := range s.root.internal {
+ for key, oldVal := range root.internal {
newSnapshot.internal[key] = oldVal
}
// set new values and apply deletes
@@ -173,12 +209,21 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
delete(newSnapshot.internal, key)
}
}
+
+ newSnapshot.updateSize()
+ s.rootLock.Lock()
if next.persisted != nil {
s.rootPersisted = append(s.rootPersisted, next.persisted)
}
+ if next.persistedCallback != nil {
+ s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
+ }
// swap in new index snapshot
+ newSnapshot.epoch = s.nextSnapshotEpoch
+ s.nextSnapshotEpoch++
rootPrev := s.root
s.root = newSnapshot
+ atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
@@ -191,42 +236,113 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
return nil
}
-func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
- // acquire lock
+func (s *Scorch) introducePersist(persist *persistIntroduction) {
+ atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
+ defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
+
s.rootLock.Lock()
+ root := s.root
+ root.AddRef()
+ nextSnapshotEpoch := s.nextSnapshotEpoch
+ s.nextSnapshotEpoch++
+ s.rootLock.Unlock()
- // prepare new index snapshot
- currSize := len(s.root.segment)
- newSize := currSize + 1 - len(nextMerge.old)
+ defer func() { _ = root.DecRef() }()
+
+ newIndexSnapshot := &IndexSnapshot{
+ parent: s,
+ epoch: nextSnapshotEpoch,
+ segment: make([]*SegmentSnapshot, len(root.segment)),
+ offsets: make([]uint64, len(root.offsets)),
+ internal: make(map[string][]byte, len(root.internal)),
+ refs: 1,
+ creator: "introducePersist",
+ }
+
+ var docsToPersistCount, memSegments, fileSegments uint64
+ for i, segmentSnapshot := range root.segment {
+ // see if this segment has been replaced
+ if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
+ newSegmentSnapshot := &SegmentSnapshot{
+ id: segmentSnapshot.id,
+ segment: replacement,
+ deleted: segmentSnapshot.deleted,
+ cachedDocs: segmentSnapshot.cachedDocs,
+ creator: "introducePersist",
+ }
+ newIndexSnapshot.segment[i] = newSegmentSnapshot
+ delete(persist.persisted, segmentSnapshot.id)
+
+ // update items persisted incase of a new segment snapshot
+ atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
+ atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
+ fileSegments++
+ } else {
+ newIndexSnapshot.segment[i] = root.segment[i]
+ newIndexSnapshot.segment[i].segment.AddRef()
+
+ if isMemorySegment(root.segment[i]) {
+ docsToPersistCount += root.segment[i].Count()
+ memSegments++
+ } else {
+ fileSegments++
+ }
+ }
+ newIndexSnapshot.offsets[i] = root.offsets[i]
+ }
+
+ for k, v := range root.internal {
+ newIndexSnapshot.internal[k] = v
+ }
+
+ atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+ atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+ atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
+ newIndexSnapshot.updateSize()
+ s.rootLock.Lock()
+ rootPrev := s.root
+ s.root = newIndexSnapshot
+ atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
+ s.rootLock.Unlock()
- // empty segments deletion
- if nextMerge.new == nil {
- newSize--
+ if rootPrev != nil {
+ _ = rootPrev.DecRef()
}
+ close(persist.applied)
+}
+
+func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
+ atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
+ defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
+
+ s.rootLock.RLock()
+ root := s.root
+ root.AddRef()
+ s.rootLock.RUnlock()
+
+ defer func() { _ = root.DecRef() }()
+
newSnapshot := &IndexSnapshot{
parent: s,
- segment: make([]*SegmentSnapshot, 0, newSize),
- offsets: make([]uint64, 0, newSize),
- internal: s.root.internal,
- epoch: s.nextSnapshotEpoch,
+ internal: root.internal,
refs: 1,
+ creator: "introduceMerge",
}
- s.nextSnapshotEpoch++
// iterate through current segments
newSegmentDeleted := roaring.NewBitmap()
- var running uint64
- for i := range s.root.segment {
- segmentID := s.root.segment[i].id
+ var running, docsToPersistCount, memSegments, fileSegments uint64
+ for i := range root.segment {
+ segmentID := root.segment[i].id
if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok {
// this segment is going away, see if anything else was deleted since we started the merge
- if segSnapAtMerge != nil && s.root.segment[i].deleted != nil {
+ if segSnapAtMerge != nil && root.segment[i].deleted != nil {
// assume all these deletes are new
- deletedSince := s.root.segment[i].deleted
+ deletedSince := root.segment[i].deleted
// if we already knew about some of them, remove
if segSnapAtMerge.deleted != nil {
- deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted)
+ deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.deleted)
}
deletedSinceItr := deletedSince.Iterator()
for deletedSinceItr.HasNext() {
@@ -240,18 +356,25 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
// segments left behind in old map after processing
// the root segments would be the obsolete segment set
delete(nextMerge.old, segmentID)
-
- } else if s.root.segment[i].LiveSize() > 0 {
+ } else if root.segment[i].LiveSize() > 0 {
// this segment is staying
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
- id: s.root.segment[i].id,
- segment: s.root.segment[i].segment,
- deleted: s.root.segment[i].deleted,
- cachedDocs: s.root.segment[i].cachedDocs,
+ id: root.segment[i].id,
+ segment: root.segment[i].segment,
+ deleted: root.segment[i].deleted,
+ cachedDocs: root.segment[i].cachedDocs,
+ creator: root.segment[i].creator,
})
- s.root.segment[i].segment.AddRef()
+ root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running)
- running += s.root.segment[i].Count()
+ running += root.segment[i].segment.Count()
+
+ if isMemorySegment(root.segment[i]) {
+ docsToPersistCount += root.segment[i].Count()
+ memSegments++
+ } else {
+ fileSegments++
+ }
}
}
@@ -269,6 +392,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}
+
// In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil &&
@@ -279,15 +403,35 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
segment: nextMerge.new, // take ownership for nextMerge.new's ref-count
deleted: newSegmentDeleted,
cachedDocs: &cachedDocs{cache: nil},
+ creator: "introduceMerge",
})
newSnapshot.offsets = append(newSnapshot.offsets, running)
+ atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1)
+
+ switch nextMerge.new.(type) {
+ case *zap.SegmentBase:
+ docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
+ memSegments++
+ case *zap.Segment:
+ fileSegments++
+ }
}
+ atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+ atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+ atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
+
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
- // swap in new segment
+ newSnapshot.updateSize()
+
+ s.rootLock.Lock()
+ // swap in new index snapshot
+ newSnapshot.epoch = s.nextSnapshotEpoch
+ s.nextSnapshotEpoch++
rootPrev := s.root
s.root = newSnapshot
+ atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
@@ -301,6 +445,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
+ atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1)
+ defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1)
+
if revertTo.snapshot == nil {
err := fmt.Errorf("Cannot revert to a nil snapshot")
revertTo.applied <- err
@@ -318,9 +465,11 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
internal: revertTo.snapshot.internal,
epoch: s.nextSnapshotEpoch,
refs: 1,
+ creator: "revertToSnapshot",
}
s.nextSnapshotEpoch++
+ var docsToPersistCount, memSegments, fileSegments uint64
// iterate through segments
for i, segmentSnapshot := range revertTo.snapshot.segment {
newSnapshot.segment[i] = &SegmentSnapshot{
@@ -328,21 +477,37 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
segment: segmentSnapshot.segment,
deleted: segmentSnapshot.deleted,
cachedDocs: segmentSnapshot.cachedDocs,
+ creator: segmentSnapshot.creator,
}
newSnapshot.segment[i].segment.AddRef()
// remove segment from ineligibleForRemoval map
filename := zapFileName(segmentSnapshot.id)
delete(s.ineligibleForRemoval, filename)
+
+ if isMemorySegment(segmentSnapshot) {
+ docsToPersistCount += segmentSnapshot.Count()
+ memSegments++
+ } else {
+ fileSegments++
+ }
}
+ atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
+ atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
+ atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
+
if revertTo.persisted != nil {
s.rootPersisted = append(s.rootPersisted, revertTo.persisted)
}
+ newSnapshot.updateSize()
+
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot
+
+ atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
@@ -354,3 +519,12 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
return nil
}
+
+func isMemorySegment(s *SegmentSnapshot) bool {
+ switch s.segment.(type) {
+ case *zap.SegmentBase:
+ return true
+ default:
+ return false
+ }
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
index ad756588a6..bcbf5b7106 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/merge.go
@@ -15,9 +15,7 @@
package scorch
import (
- "bytes"
"encoding/json"
-
"fmt"
"os"
"sync/atomic"
@@ -40,16 +38,20 @@ func (s *Scorch) mergerLoop() {
OUTER:
for {
+ atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
+
select {
case <-s.closeCh:
break OUTER
default:
// check to see if there is a new snapshot to persist
- s.rootLock.RLock()
+ s.rootLock.Lock()
ourSnapshot := s.root
ourSnapshot.AddRef()
- s.rootLock.RUnlock()
+ atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
+ atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
+ s.rootLock.Unlock()
if ourSnapshot.epoch != lastEpochMergePlanned {
startTime := time.Now()
@@ -57,12 +59,21 @@ OUTER:
// lets get started
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
if err != nil {
+ atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
+ if err == segment.ErrClosed {
+ // index has been closed
+ _ = ourSnapshot.DecRef()
+ break OUTER
+ }
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
_ = ourSnapshot.DecRef()
+ atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
continue OUTER
}
lastEpochMergePlanned = ourSnapshot.epoch
+ atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
+
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
}
_ = ourSnapshot.DecRef()
@@ -88,7 +99,10 @@ OUTER:
case <-ew.notifyCh:
}
}
+
+ atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
}
+
s.asyncTasks.Done()
}
@@ -105,6 +119,11 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
if err != nil {
return &mergePlannerOptions, err
}
+
+ err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
+ if err != nil {
+ return nil, err
+ }
}
return &mergePlannerOptions, nil
}
@@ -119,32 +138,45 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
}
}
+ atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
+
// give this list to the planner
resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options)
if err != nil {
+ atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
return fmt.Errorf("merge planning err: %v", err)
}
if resultMergePlan == nil {
// nothing to do
+ atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
+ atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
+
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
+
// process tasks in serial for now
var notifications []chan *IndexSnapshot
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
continue
}
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
+
oldMap := make(map[uint64]*SegmentSnapshot)
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
+
for _, planSegment := range task.Segments {
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
oldMap[segSnapshot.id] = segSnapshot
if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok {
if segSnapshot.LiveSize() == 0 {
+ atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
oldMap[segSnapshot.id] = nil
} else {
segmentsToMerge = append(segmentsToMerge, zapSeg)
@@ -155,32 +187,53 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
}
var oldNewDocNums map[uint64][]uint64
- var segment segment.Segment
+ var seg segment.Segment
if len(segmentsToMerge) > 0 {
filename := zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename)
path := s.path + string(os.PathSeparator) + filename
- newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
+
+ fileMergeZapStartTime := time.Now()
+
+ atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
+ newDocNums, _, err := zap.Merge(segmentsToMerge, docsToDrop, path,
+ DefaultChunkFactor, s.closeCh, s)
+ atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
+
+ fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
+ atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
+ if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
+ atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
+ }
+
if err != nil {
s.unmarkIneligibleForRemoval(filename)
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
+ if err == segment.ErrClosed {
+ return err
+ }
return fmt.Errorf("merging failed: %v", err)
}
- segment, err = zap.Open(path)
+
+ seg, err = zap.Open(path)
if err != nil {
s.unmarkIneligibleForRemoval(filename)
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
}
+
+ atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
}
sm := &segmentMerge{
id: newSegmentID,
old: oldMap,
oldNewDocNums: oldNewDocNums,
- new: segment,
+ new: seg,
notify: make(chan *IndexSnapshot, 1),
}
notifications = append(notifications, sm.notify)
@@ -188,21 +241,28 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
// give it to the introducer
select {
case <-s.closeCh:
- _ = segment.Close()
- return nil
+ _ = seg.Close()
+ return segment.ErrClosed
case s.merges <- sm:
+ atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
}
+
+ atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
}
+
for _, notification := range notifications {
select {
case <-s.closeCh:
- return nil
+ atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1)
+ return segment.ErrClosed
case newSnapshot := <-notification:
+ atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
}
}
}
+
return nil
}
@@ -219,44 +279,48 @@ type segmentMerge struct {
// into the root
func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int,
- chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) {
- var br bytes.Buffer
+ chunkFactor uint32) (*IndexSnapshot, uint64, error) {
+ atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
- cr := zap.NewCountHashWriter(&br)
+ memMergeZapStartTime := time.Now()
- newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
- docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
- zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
- if err != nil {
- return 0, nil, 0, err
- }
-
- sb, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
- fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset,
- docValueOffset, dictLocs)
- if err != nil {
- return 0, nil, 0, err
- }
+ atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
-
filename := zapFileName(newSegmentID)
path := s.path + string(os.PathSeparator) + filename
- err = zap.PersistSegmentBase(sb, path)
+
+ newDocNums, _, err :=
+ zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh, s)
+
+ atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
+
+ memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
+ atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
+ if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
+ atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
+ }
+
if err != nil {
- return 0, nil, 0, err
+ atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
+ return nil, 0, err
}
- segment, err := zap.Open(path)
+ seg, err := zap.Open(path)
if err != nil {
- return 0, nil, 0, err
+ atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
+ return nil, 0, err
}
+ // update persisted stats
+ atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
+ atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
+
sm := &segmentMerge{
id: newSegmentID,
old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64),
- new: segment,
+ new: seg,
notify: make(chan *IndexSnapshot, 1),
}
@@ -268,15 +332,21 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
select { // send to introducer
case <-s.closeCh:
- _ = segment.DecRef()
- return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
+ _ = seg.DecRef()
+ return nil, 0, segment.ErrClosed
case s.merges <- sm:
}
select { // wait for introduction to complete
case <-s.closeCh:
- return 0, nil, 0, nil // TODO: return ErrInterruptedClosed?
+ return nil, 0, segment.ErrClosed
case newSnapshot := <-sm.notify:
- return numDocs, newSnapshot, newSegmentID, nil
+ atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
+ atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
+ return newSnapshot, newSegmentID, nil
}
}
+
+func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
+ atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
index 62f643f431..c2a0d3c644 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
@@ -18,6 +18,7 @@
package mergeplan
import (
+ "errors"
"fmt"
"math"
"sort"
@@ -115,7 +116,15 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
return o.FloorSegmentSize
}
-// Suggested default options.
+// MaxSegmentSizeLimit represents the maximum size of a segment,
+// this limit comes with hit-1 optimisation/max encoding limit uint31.
+const MaxSegmentSizeLimit = 1<<31 - 1
+
+// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
+// exceeds the MaxSegmentSizeLimit
+var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
+
+// DefaultMergePlanOptions suggests the default options.
var DefaultMergePlanOptions = MergePlanOptions{
MaxSegmentsPerTier: 10,
MaxSegmentSize: 5000000,
@@ -208,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(roster) > 0 {
rosterScore := scoreSegments(roster, o)
- if len(bestRoster) <= 0 || rosterScore < bestRosterScore {
+ if len(bestRoster) == 0 || rosterScore < bestRosterScore {
bestRoster = roster
bestRosterScore = rosterScore
}
}
}
- if len(bestRoster) <= 0 {
+ if len(bestRoster) == 0 {
return rv, nil
}
@@ -367,3 +376,11 @@ func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan)
return strings.Join(rv, "\n")
}
+
+// ValidateMergePlannerOptions validates the merge planner options
+func ValidateMergePlannerOptions(options *MergePlanOptions) error {
+ if options.MaxSegmentSize > MaxSegmentSizeLimit {
+ return ErrMaxSegmentSizeTooLarge
+ }
+ return nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go b/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
new file mode 100644
index 0000000000..b33e3be3d4
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
@@ -0,0 +1,420 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+ "fmt"
+
+ "github.com/RoaringBitmap/roaring"
+
+ "github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/index/scorch/segment/zap"
+)
+
+var OptimizeConjunction = true
+var OptimizeConjunctionUnadorned = true
+var OptimizeDisjunctionUnadorned = true
+
+func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
+ octx index.OptimizableContext) (index.OptimizableContext, error) {
+ if OptimizeConjunction && kind == "conjunction" {
+ return s.optimizeConjunction(octx)
+ }
+
+ if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
+ return s.optimizeConjunctionUnadorned(octx)
+ }
+
+ if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
+ return s.optimizeDisjunctionUnadorned(octx)
+ }
+
+ return octx, nil
+}
+
+var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
+
+// ----------------------------------------------------------------
+
+func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
+ octx index.OptimizableContext) (index.OptimizableContext, error) {
+ if octx == nil {
+ octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
+ }
+
+ o, ok := octx.(*OptimizeTFRConjunction)
+ if !ok {
+ return octx, nil
+ }
+
+ if o.snapshot != s.snapshot {
+ return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
+ }
+
+ o.tfrs = append(o.tfrs, s)
+
+ return o, nil
+}
+
+type OptimizeTFRConjunction struct {
+ snapshot *IndexSnapshot
+
+ tfrs []*IndexSnapshotTermFieldReader
+}
+
+func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
+ if len(o.tfrs) <= 1 {
+ return nil, nil
+ }
+
+ for i := range o.snapshot.segment {
+ itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator)
+ if !ok || itr0.ActualBM == nil {
+ continue
+ }
+
+ itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator)
+ if !ok || itr1.ActualBM == nil {
+ continue
+ }
+
+ bm := roaring.And(itr0.ActualBM, itr1.ActualBM)
+
+ for _, tfr := range o.tfrs[2:] {
+ itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ if !ok || itr.ActualBM == nil {
+ continue
+ }
+
+ bm.And(itr.ActualBM)
+ }
+
+ // in this conjunction optimization, the postings iterators
+ // will all share the same AND'ed together actual bitmap. The
+ // regular conjunction searcher machinery will still be used,
+ // but the underlying bitmap will be smaller.
+ for _, tfr := range o.tfrs {
+ itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ if ok && itr.ActualBM != nil {
+ itr.ActualBM = bm
+ itr.Actual = bm.Iterator()
+ }
+ }
+ }
+
+ return nil, nil
+}
+
+// ----------------------------------------------------------------
+
+// An "unadorned" conjunction optimization is appropriate when
+// additional or subsidiary information like freq-norm's and
+// term-vectors are not required, and instead only the internal-id's
+// are needed.
+func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
+ octx index.OptimizableContext) (index.OptimizableContext, error) {
+ if octx == nil {
+ octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
+ }
+
+ o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
+ if !ok {
+ return nil, nil
+ }
+
+ if o.snapshot != s.snapshot {
+ return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
+ }
+
+ o.tfrs = append(o.tfrs, s)
+
+ return o, nil
+}
+
+type OptimizeTFRConjunctionUnadorned struct {
+ snapshot *IndexSnapshot
+
+ tfrs []*IndexSnapshotTermFieldReader
+}
+
+var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
+var OptimizeTFRConjunctionUnadornedField = "*"
+
+// Finish of an unadorned conjunction optimization will compute a
+// termFieldReader with an "actual" bitmap that represents the
+// constituent bitmaps AND'ed together. This termFieldReader cannot
+// provide any freq-norm or termVector associated information.
+func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
+ if len(o.tfrs) <= 1 {
+ return nil, nil
+ }
+
+ // We use an artificial term and field because the optimized
+ // termFieldReader can represent multiple terms and fields.
+ oTFR := &IndexSnapshotTermFieldReader{
+ term: OptimizeTFRConjunctionUnadornedTerm,
+ field: OptimizeTFRConjunctionUnadornedField,
+ snapshot: o.snapshot,
+ iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
+ segmentOffset: 0,
+ includeFreq: false,
+ includeNorm: false,
+ includeTermVectors: false,
+ }
+
+ var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
+
+OUTER:
+ for i := range o.snapshot.segment {
+ actualBMs = actualBMs[:0]
+
+ var docNum1HitLast uint64
+ var docNum1HitLastOk bool
+
+ for _, tfr := range o.tfrs {
+ if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok {
+ // An empty postings iterator means the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+
+ itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ if !ok {
+ // We optimize zap postings iterators only.
+ return nil, nil
+ }
+
+ // If the postings iterator is "1-hit" optimized, then we
+ // can perform several optimizations up-front here.
+ docNum1Hit, ok := itr.DocNum1Hit()
+ if ok {
+ if docNum1Hit == zap.DocNum1HitFinished {
+ // An empty docNum here means the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+
+ if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
+ // The docNum1Hit doesn't match the previous
+ // docNum1HitLast, so the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+
+ docNum1HitLast = docNum1Hit
+ docNum1HitLastOk = true
+
+ continue
+ }
+
+ if itr.ActualBM == nil {
+ // An empty actual bitmap means the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+
+ // Collect the actual bitmap for more processing later.
+ actualBMs = append(actualBMs, itr.ActualBM)
+ }
+
+ if docNum1HitLastOk {
+ // We reach here if all the 1-hit optimized posting
+ // iterators had the same 1-hit docNum, so we can check if
+ // our collected actual bitmaps also have that docNum.
+ for _, bm := range actualBMs {
+ if !bm.Contains(uint32(docNum1HitLast)) {
+ // The docNum1Hit isn't in one of our actual
+ // bitmaps, so the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+ }
+
+ // The actual bitmaps and docNum1Hits all contain or have
+ // the same 1-hit docNum, so that's our AND'ed result.
+ oTFR.iterators[i], err = zap.PostingsIteratorFrom1Hit(
+ docNum1HitLast, zap.NormBits1Hit, false, false)
+ if err != nil {
+ return nil, nil
+ }
+
+ continue OUTER
+ }
+
+ if len(actualBMs) == 0 {
+ // If we've collected no actual bitmaps at this point,
+ // then the entire AND is empty.
+ oTFR.iterators[i] = segment.AnEmptyPostingsIterator
+ continue OUTER
+ }
+
+ if len(actualBMs) == 1 {
+ // If we've only 1 actual bitmap, then that's our result.
+ oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
+ actualBMs[0], false, false)
+ if err != nil {
+ return nil, nil
+ }
+
+ continue OUTER
+ }
+
+ // Else, AND together our collected bitmaps as our result.
+ bm := roaring.And(actualBMs[0], actualBMs[1])
+
+ for _, actualBM := range actualBMs[2:] {
+ bm.And(actualBM)
+ }
+
+ oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(
+ bm, false, false)
+ if err != nil {
+ return nil, nil
+ }
+ }
+
+ return oTFR, nil
+}
+
+// ----------------------------------------------------------------
+
+// An "unadorned" disjunction optimization is appropriate when
+// additional or subsidiary information like freq-norm's and
+// term-vectors are not required, and instead only the internal-id's
+// are needed.
+func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
+ octx index.OptimizableContext) (index.OptimizableContext, error) {
+ if octx == nil {
+ octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
+ }
+
+ o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
+ if !ok {
+ return nil, nil
+ }
+
+ if o.snapshot != s.snapshot {
+ return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
+ }
+
+ o.tfrs = append(o.tfrs, s)
+
+ return o, nil
+}
+
+type OptimizeTFRDisjunctionUnadorned struct {
+ snapshot *IndexSnapshot
+
+ tfrs []*IndexSnapshotTermFieldReader
+}
+
+var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
+var OptimizeTFRDisjunctionUnadornedField = "*"
+
+// Finish of an unadorned disjunction optimization will compute a
+// termFieldReader with an "actual" bitmap that represents the
+// constituent bitmaps OR'ed together. This termFieldReader cannot
+// provide any freq-norm or termVector associated information.
+func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
+ if len(o.tfrs) <= 1 {
+ return nil, nil
+ }
+
+ for i := range o.snapshot.segment {
+ var cMax uint64
+
+ for _, tfr := range o.tfrs {
+ itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ if !ok {
+ return nil, nil
+ }
+
+ if itr.ActualBM != nil {
+ c := itr.ActualBM.GetCardinality()
+ if cMax < c {
+ cMax = c
+ }
+ }
+ }
+
+ // Heuristic to skip the optimization if all the constituent
+ // bitmaps are too small, where the processing & resource
+ // overhead to create the OR'ed bitmap outweighs the benefit.
+ if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
+ return nil, nil
+ }
+ }
+
+ // We use an artificial term and field because the optimized
+ // termFieldReader can represent multiple terms and fields.
+ oTFR := &IndexSnapshotTermFieldReader{
+ term: OptimizeTFRDisjunctionUnadornedTerm,
+ field: OptimizeTFRDisjunctionUnadornedField,
+ snapshot: o.snapshot,
+ iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
+ segmentOffset: 0,
+ includeFreq: false,
+ includeNorm: false,
+ includeTermVectors: false,
+ }
+
+ var docNums []uint32 // Collected docNum's from 1-hit posting lists.
+ var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
+
+ for i := range o.snapshot.segment {
+ docNums = docNums[:0]
+ actualBMs = actualBMs[:0]
+
+ for _, tfr := range o.tfrs {
+ itr, ok := tfr.iterators[i].(*zap.PostingsIterator)
+ if !ok {
+ return nil, nil
+ }
+
+ docNum, ok := itr.DocNum1Hit()
+ if ok {
+ docNums = append(docNums, uint32(docNum))
+ continue
+ }
+
+ if itr.ActualBM != nil {
+ actualBMs = append(actualBMs, itr.ActualBM)
+ }
+ }
+
+ var bm *roaring.Bitmap
+ if len(actualBMs) > 2 {
+ bm = roaring.HeapOr(actualBMs...)
+ } else if len(actualBMs) == 2 {
+ bm = roaring.Or(actualBMs[0], actualBMs[1])
+ } else if len(actualBMs) == 1 {
+ bm = actualBMs[0].Clone()
+ }
+
+ if bm == nil {
+ bm = roaring.New()
+ }
+
+ bm.AddMany(docNums)
+
+ oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(bm, false, false)
+ if err != nil {
+ return nil, nil
+ }
+ }
+
+ return oTFR, nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
index c21bb14394..349ccdc0e9 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/persister.go
@@ -16,9 +16,12 @@ package scorch
import (
"bytes"
+ "encoding/binary"
+ "encoding/json"
"fmt"
"io/ioutil"
"log"
+ "math"
"os"
"path/filepath"
"strconv"
@@ -27,23 +30,57 @@ import (
"time"
"github.com/RoaringBitmap/roaring"
+ "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
var DefaultChunkFactor uint32 = 1024
-// Arbitrary number, need to make it configurable.
-// Lower values like 10/making persister really slow
-// doesn't work well as it is creating more files to
-// persist for in next persist iteration and spikes the # FDs.
-// Ideal value should let persister also proceed at
-// an optimum pace so that the merger can skip
-// many intermediate snapshots.
-// This needs to be based on empirical data.
-// TODO - may need to revisit this approach/value.
-var epochDistance = uint64(5)
+// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
+// persistence of segments with the default safe batch option.
+// If the default safe batch option results in high number of
+// files on disk, then users may initialise this configuration parameter
+// with higher values so that the persister will nap a bit within it's
+// work loop to favour better in-memory merging of segments to result
+// in fewer segment files on disk. But that may come with an indexing
+// performance overhead.
+// Unsafe batch users are advised to override this to higher value
+// for better performance especially with high data density.
+var DefaultPersisterNapTimeMSec int = 0 // ms
+
+// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
+// persister. At times of a slow merger progress with heavy file merging
+// operations, its better to pace down the persister for letting the merger
+// to catch up within a range defined by this parameter.
+// Fewer files on disk (as per the merge plan) would result in keeping the
+// file handle usage under limit, faster disk merger and a healthier index.
+// Its been observed that such a loosely sync'ed introducer-persister-merger
+// trio results in better overall performance.
+var DefaultPersisterNapUnderNumFiles int = 1000
+
+var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
+
+type persisterOptions struct {
+ // PersisterNapTimeMSec controls the wait/delay injected into
+ // persistence workloop to improve the chances for
+ // a healthier and heavier in-memory merging
+ PersisterNapTimeMSec int
+
+ // PersisterNapTimeMSec > 0, and the number of files is less than
+ // PersisterNapUnderNumFiles, then the persister will sleep
+ // PersisterNapTimeMSec amount of time to improve the chances for
+ // a healthier and heavier in-memory merging
+ PersisterNapUnderNumFiles int
+
+ // MemoryPressurePauseThreshold let persister to have a better leeway
+ // for prudently performing the memory merge of segments on a memory
+ // pressure situation. Here the config value is an upper threshold
+ // for the number of paused application threads. The default value would
+ // be a very high number to always favour the merging of memory segments.
+ MemoryPressurePauseThreshold uint64
+}
type notificationChan chan struct{}
@@ -53,8 +90,17 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
+ po, err := s.parsePersisterOptions()
+ if err != nil {
+ s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
+ s.asyncTasks.Done()
+ return
+ }
+
OUTER:
for {
+ atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
+
select {
case <-s.closeCh:
break OUTER
@@ -65,11 +111,13 @@ OUTER:
if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch
}
- persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
- &lastMergedEpoch, persistWatchers)
+
+ lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
+ lastMergedEpoch, persistWatchers, po)
var ourSnapshot *IndexSnapshot
var ourPersisted []chan error
+ var ourPersistedCallbacks []index.BatchCallback
// check to see if there is a new snapshot to persist
s.rootLock.Lock()
@@ -78,13 +126,17 @@ OUTER:
ourSnapshot.AddRef()
ourPersisted = s.rootPersisted
s.rootPersisted = nil
+ ourPersistedCallbacks = s.persistedCallbacks
+ s.persistedCallbacks = nil
+ atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
+ atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
}
s.rootLock.Unlock()
if ourSnapshot != nil {
startTime := time.Now()
- err := s.persistSnapshot(ourSnapshot)
+ err := s.persistSnapshot(ourSnapshot, po)
for _, ch := range ourPersisted {
if err != nil {
ch <- err
@@ -92,10 +144,22 @@ OUTER:
close(ch)
}
if err != nil {
+ atomic.StoreUint64(&s.iStats.persistEpoch, 0)
+ if err == segment.ErrClosed {
+ // index has been closed
+ _ = ourSnapshot.DecRef()
+ break OUTER
+ }
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef()
+ atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
+ for i := range ourPersistedCallbacks {
+ ourPersistedCallbacks[i](err)
+ }
+
+ atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
lastPersistedEpoch = ourSnapshot.epoch
for _, ew := range persistWatchers {
@@ -115,6 +179,8 @@ OUTER:
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed {
+ s.removeOldData()
+ atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER
}
}
@@ -133,17 +199,21 @@ OUTER:
s.removeOldData() // might as well cleanup while waiting
+ atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
+
select {
case <-s.closeCh:
break OUTER
case <-w.notifyCh:
// woken up, next loop should pick up work
- continue OUTER
+ atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1)
case ew = <-s.persisterNotifier:
// if the watchers are already caught up then let them wait,
// else let them continue to do the catch up
persistWatchers = append(persistWatchers, ew)
}
+
+ atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1)
}
}
@@ -160,38 +230,95 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext
}
-func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64,
- persistWatchers []*epochWatcher) []*epochWatcher {
+func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
+ persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+ // check the merger lag by counting the segment files on disk,
+ // On finding fewer files on disk, persister takes a short pause
+ // for sufficient in-memory segments to pile up for the next
+ // memory merge cum persist loop.
+ // On finding too many files on disk, persister pause until the merger
+ // catches up to reduce the segment file count under the threshold.
+ // But if there is memory pressure, then skip this sleep maneuvers.
+ numFilesOnDisk, _ := s.diskFileStats()
+ if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
+ po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
+ select {
+ case <-s.closeCh:
+ case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
+ atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
+
+ case ew := <-s.persisterNotifier:
+ // unblock the merger in meantime
+ persistWatchers = append(persistWatchers, ew)
+ lastMergedEpoch = ew.epoch
+ persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+ atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
+ }
+ return lastMergedEpoch, persistWatchers
+ }
+
OUTER:
- // check for slow merger and await until the merger catch up
- for lastPersistedEpoch > *lastMergedEpoch+epochDistance {
+ for po.PersisterNapUnderNumFiles > 0 &&
+ numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
+ lastMergedEpoch < lastPersistedEpoch {
+ atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
select {
case <-s.closeCh:
break OUTER
case ew := <-s.persisterNotifier:
persistWatchers = append(persistWatchers, ew)
- *lastMergedEpoch = ew.epoch
+ lastMergedEpoch = ew.epoch
}
+ atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1)
+
// let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
+
+ numFilesOnDisk, _ = s.diskFileStats()
}
- return persistWatchers
+ return lastMergedEpoch, persistWatchers
}
-func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
- persisted, err := s.persistSnapshotMaybeMerge(snapshot)
- if err != nil {
- return err
+func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
+ po := persisterOptions{
+ PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
+ PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
+ MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
}
- if persisted {
- return nil
+ if v, ok := s.config["scorchPersisterOptions"]; ok {
+ b, err := json.Marshal(v)
+ if err != nil {
+ return &po, err
+ }
+
+ err = json.Unmarshal(b, &po)
+ if err != nil {
+ return &po, err
+ }
+ }
+ return &po, nil
+}
+
+func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
+ po *persisterOptions) error {
+ // Perform in-memory segment merging only when the memory pressure is
+ // below the configured threshold, else the persister performs the
+ // direct persistence of segments.
+ if s.paused() < po.MemoryPressurePauseThreshold {
+ persisted, err := s.persistSnapshotMaybeMerge(snapshot)
+ if err != nil {
+ return err
+ }
+ if persisted {
+ return nil
+ }
}
return s.persistSnapshotDirect(snapshot)
@@ -224,7 +351,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
return false, nil
}
- _, newSnapshot, newSegmentID, err := s.mergeSegmentBases(
+ newSnapshot, newSegmentID, err := s.mergeSegmentBases(
snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor)
if err != nil {
return false, err
@@ -249,6 +376,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
segment: make([]*SegmentSnapshot, 0, len(snapshot.segment)),
internal: snapshot.internal,
epoch: snapshot.epoch,
+ creator: "persistSnapshotMaybeMerge",
}
// copy to the equiv the segments that weren't replaced
@@ -301,6 +429,22 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
return err
}
+ // persist meta values
+ metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
+ if err != nil {
+ return err
+ }
+ err = metaBucket.Put([]byte("type"), []byte(zap.Type))
+ if err != nil {
+ return err
+ }
+ buf := make([]byte, binary.MaxVarintLen32)
+ binary.BigEndian.PutUint32(buf, zap.Version)
+ err = metaBucket.Put([]byte("version"), buf)
+ if err != nil {
+ return err
+ }
+
// persist internal values
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
if err != nil {
@@ -390,44 +534,21 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
}
}
- s.rootLock.Lock()
- newIndexSnapshot := &IndexSnapshot{
- parent: s,
- epoch: s.nextSnapshotEpoch,
- segment: make([]*SegmentSnapshot, len(s.root.segment)),
- offsets: make([]uint64, len(s.root.offsets)),
- internal: make(map[string][]byte, len(s.root.internal)),
- refs: 1,
- }
- s.nextSnapshotEpoch++
- for i, segmentSnapshot := range s.root.segment {
- // see if this segment has been replaced
- if replacement, ok := newSegments[segmentSnapshot.id]; ok {
- newSegmentSnapshot := &SegmentSnapshot{
- id: segmentSnapshot.id,
- segment: replacement,
- deleted: segmentSnapshot.deleted,
- cachedDocs: segmentSnapshot.cachedDocs,
- }
- newIndexSnapshot.segment[i] = newSegmentSnapshot
- delete(newSegments, segmentSnapshot.id)
- // update items persisted incase of a new segment snapshot
- atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count())
- } else {
- newIndexSnapshot.segment[i] = s.root.segment[i]
- newIndexSnapshot.segment[i].segment.AddRef()
- }
- newIndexSnapshot.offsets[i] = s.root.offsets[i]
+ persist := &persistIntroduction{
+ persisted: newSegments,
+ applied: make(notificationChan),
}
- for k, v := range s.root.internal {
- newIndexSnapshot.internal[k] = v
+
+ select {
+ case <-s.closeCh:
+ return segment.ErrClosed
+ case s.persists <- persist:
}
- rootPrev := s.root
- s.root = newIndexSnapshot
- s.rootLock.Unlock()
- if rootPrev != nil {
- _ = rootPrev.DecRef()
+ select {
+ case <-s.closeCh:
+ return segment.ErrClosed
+ case <-persist.applied:
}
}
@@ -462,6 +583,7 @@ var boltSnapshotsBucket = []byte{'s'}
var boltPathKey = []byte{'p'}
var boltDeletedKey = []byte{'d'}
var boltInternalKey = []byte{'i'}
+var boltMetaDataKey = []byte{'m'}
func (s *Scorch) loadFromBolt() error {
return s.rootBolt.View(func(tx *bolt.Tx) error {
@@ -478,19 +600,19 @@ func (s *Scorch) loadFromBolt() error {
continue
}
if foundRoot {
- s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+ s.AddEligibleForRemoval(snapshotEpoch)
continue
}
snapshot := snapshots.Bucket(k)
if snapshot == nil {
log.Printf("snapshot key, but bucket missing %x, continuing", k)
- s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+ s.AddEligibleForRemoval(snapshotEpoch)
continue
}
indexSnapshot, err := s.loadSnapshot(snapshot)
if err != nil {
log.Printf("unable to load snapshot, %v, continuing", err)
- s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch)
+ s.AddEligibleForRemoval(snapshotEpoch)
continue
}
indexSnapshot.epoch = snapshotEpoch
@@ -500,13 +622,16 @@ func (s *Scorch) loadFromBolt() error {
return err
}
s.nextSegmentID++
- s.nextSnapshotEpoch = snapshotEpoch + 1
s.rootLock.Lock()
- if s.root != nil {
- _ = s.root.DecRef()
- }
+ s.nextSnapshotEpoch = snapshotEpoch + 1
+ rootPrev := s.root
s.root = indexSnapshot
s.rootLock.Unlock()
+
+ if rootPrev != nil {
+ _ = rootPrev.DecRef()
+ }
+
foundRoot = true
}
return nil
@@ -524,7 +649,7 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
snapshotKey := segment.EncodeUvarintAscending(nil, epoch)
snapshot := snapshots.Bucket(snapshotKey)
if snapshot == nil {
- return nil
+ return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
}
rv, err = s.loadSnapshot(snapshot)
return err
@@ -536,12 +661,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
-
rv := &IndexSnapshot{
parent: s,
internal: make(map[string][]byte),
refs: 1,
+ creator: "loadSnapshot",
}
+
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@@ -556,7 +682,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
_ = rv.DecRef()
return nil, err
}
- } else {
+ } else if k[0] != boltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(k)
if segmentBucket == nil {
_ = rv.DecRef()
@@ -577,6 +703,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
running += segmentSnapshot.segment.Count()
}
}
+
return rv, nil
}
@@ -604,7 +731,9 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
_ = segment.Close()
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
}
- rv.deleted = deletedBitmap
+ if !deletedBitmap.IsEmpty() {
+ rv.deleted = deletedBitmap
+ }
}
return rv, nil
@@ -643,14 +772,14 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
return 0, err
}
- if len(persistedEpochs) <= NumSnapshotsToKeep {
+ if len(persistedEpochs) <= s.numSnapshotsToKeep {
// we need to keep everything
return 0, nil
}
// make a map of epochs to protect from deletion
- protectedEpochs := make(map[uint64]struct{}, NumSnapshotsToKeep)
- for _, epoch := range persistedEpochs[0:NumSnapshotsToKeep] {
+ protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep)
+ for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] {
protectedEpochs[epoch] = struct{}{}
}
@@ -668,7 +797,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
s.eligibleForRemoval = newEligible
s.rootLock.Unlock()
- if len(epochsToRemove) <= 0 {
+ if len(epochsToRemove) == 0 {
return 0, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/reader.go b/vendor/github.com/blevesearch/bleve/index/scorch/reader.go
deleted file mode 100644
index 365ecb6706..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/reader.go
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package scorch
-
-import (
- "github.com/blevesearch/bleve/document"
- "github.com/blevesearch/bleve/index"
-)
-
-type Reader struct {
- root *IndexSnapshot // Owns 1 ref-count on the index snapshot.
-}
-
-func (r *Reader) TermFieldReader(term []byte, field string, includeFreq,
- includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
- return r.root.TermFieldReader(term, field, includeFreq, includeNorm, includeTermVectors)
-}
-
-// DocIDReader returns an iterator over all doc ids
-// The caller must close returned instance to release associated resources.
-func (r *Reader) DocIDReaderAll() (index.DocIDReader, error) {
- return r.root.DocIDReaderAll()
-}
-
-func (r *Reader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
- return r.root.DocIDReaderOnly(ids)
-}
-
-func (r *Reader) FieldDict(field string) (index.FieldDict, error) {
- return r.root.FieldDict(field)
-}
-
-// FieldDictRange is currently defined to include the start and end terms
-func (r *Reader) FieldDictRange(field string, startTerm []byte,
- endTerm []byte) (index.FieldDict, error) {
- return r.root.FieldDictRange(field, startTerm, endTerm)
-}
-
-func (r *Reader) FieldDictPrefix(field string,
- termPrefix []byte) (index.FieldDict, error) {
- return r.root.FieldDictPrefix(field, termPrefix)
-}
-
-func (r *Reader) Document(id string) (*document.Document, error) {
- return r.root.Document(id)
-}
-func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
- visitor index.DocumentFieldTermVisitor) error {
- return r.root.DocumentVisitFieldTerms(id, fields, visitor)
-}
-
-func (r *Reader) Fields() ([]string, error) {
- return r.root.Fields()
-}
-
-func (r *Reader) GetInternal(key []byte) ([]byte, error) {
- return r.root.GetInternal(key)
-}
-
-func (r *Reader) DocCount() (uint64, error) {
- return r.root.DocCount()
-}
-
-func (r *Reader) ExternalID(id index.IndexInternalID) (string, error) {
- return r.root.ExternalID(id)
-}
-
-func (r *Reader) InternalID(id string) (index.IndexInternalID, error) {
- return r.root.InternalID(id)
-}
-
-func (r *Reader) DumpAll() chan interface{} {
- rv := make(chan interface{})
- go func() {
- close(rv)
- }()
- return rv
-}
-
-func (r *Reader) DumpDoc(id string) chan interface{} {
- rv := make(chan interface{})
- go func() {
- close(rv)
- }()
- return rv
-}
-
-func (r *Reader) DumpFields() chan interface{} {
- rv := make(chan interface{})
- go func() {
- close(rv)
- }()
- return rv
-}
-
-func (r *Reader) Close() error {
- return r.root.DecRef()
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
index f539313d1c..3f3d8bffce 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
@@ -17,6 +17,7 @@ package scorch
import (
"encoding/json"
"fmt"
+ "io/ioutil"
"os"
"sync"
"sync/atomic"
@@ -27,23 +28,24 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/blevesearch/bleve/index/scorch/segment/mem"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
const Name = "scorch"
-const Version uint8 = 1
+const Version uint8 = 2
+
+var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
readOnly bool
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
- stats *Stats
+ stats Stats
nextSegmentID uint64
path string
@@ -52,12 +54,15 @@ type Scorch struct {
rootLock sync.RWMutex
root *IndexSnapshot // holds 1 ref-count on the root
rootPersisted []chan error // closed when root is persisted
+ persistedCallbacks []index.BatchCallback
nextSnapshotEpoch uint64
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
+ numSnapshotsToKeep int
closeCh chan struct{}
introductions chan *segmentIntroduction
+ persists chan *persistIntroduction
merges chan *segmentMerge
introducerNotifier chan *epochWatcher
revertToSnapshots chan *snapshotReversion
@@ -67,6 +72,23 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
+
+ iStats internalStats
+
+ pauseLock sync.RWMutex
+
+ pauseCount uint64
+}
+
+type internalStats struct {
+ persistEpoch uint64
+ persistSnapshotSize uint64
+ mergeEpoch uint64
+ mergeSnapshotSize uint64
+ newSegBufBytesAdded uint64
+ newSegBufBytesRemoved uint64
+ analysisBytesAdded uint64
+ analysisBytesRemoved uint64
}
func NewScorch(storeName string,
@@ -80,8 +102,7 @@ func NewScorch(storeName string,
closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{},
}
- rv.stats = &Stats{i: rv}
- rv.root = &IndexSnapshot{parent: rv, refs: 1}
+ rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
ro, ok := config["read_only"].(bool)
if ok {
rv.readOnly = ro
@@ -101,9 +122,30 @@ func NewScorch(storeName string,
return rv, nil
}
+func (s *Scorch) paused() uint64 {
+ s.pauseLock.Lock()
+ pc := s.pauseCount
+ s.pauseLock.Unlock()
+ return pc
+}
+
+func (s *Scorch) incrPause() {
+ s.pauseLock.Lock()
+ s.pauseCount++
+ s.pauseLock.Unlock()
+}
+
+func (s *Scorch) decrPause() {
+ s.pauseLock.Lock()
+ s.pauseCount--
+ s.pauseLock.Unlock()
+}
+
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) {
if s.onEvent != nil {
+ s.incrPause()
s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
+ s.decrPause()
}
}
@@ -111,6 +153,7 @@ func (s *Scorch) fireAsyncError(err error) {
if s.onAsyncError != nil {
s.onAsyncError(err)
}
+ atomic.AddUint64(&s.stats.TotOnErrors, 1)
}
func (s *Scorch) Open() error {
@@ -172,7 +215,10 @@ func (s *Scorch) openBolt() error {
}
}
+ atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
+
s.introductions = make(chan *segmentIntroduction)
+ s.persists = make(chan *persistIntroduction)
s.merges = make(chan *segmentMerge)
s.introducerNotifier = make(chan *epochWatcher, 1)
s.revertToSnapshots = make(chan *snapshotReversion)
@@ -186,6 +232,17 @@ func (s *Scorch) openBolt() error {
}
}
+ s.numSnapshotsToKeep = NumSnapshotsToKeep
+ if v, ok := s.config["numSnapshotsToKeep"]; ok {
+ var t int
+ if t, err = parseToInteger(v); err != nil {
+ return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
+ }
+ if t > 0 {
+ s.numSnapshotsToKeep = t
+ }
+ }
+
return nil
}
@@ -255,65 +312,83 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis?
- go func() {
- for _, doc := range batch.IndexOps {
- if doc != nil {
- aw := index.NewAnalysisWork(s, doc, resultChan)
- // put the work on the queue
- s.analysisQueue.Queue(aw)
+ if len(batch.IndexOps) > 0 {
+ go func() {
+ for _, doc := range batch.IndexOps {
+ if doc != nil {
+ aw := index.NewAnalysisWork(s, doc, resultChan)
+ // put the work on the queue
+ s.analysisQueue.Queue(aw)
+ }
}
- }
- }()
+ }()
+ }
// wait for analysis result
analysisResults := make([]*index.AnalysisResult, int(numUpdates))
var itemsDeQueued uint64
+ var totalAnalysisSize int
for itemsDeQueued < numUpdates {
result := <-resultChan
+ resultSize := result.Size()
+ atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
+ totalAnalysisSize += resultSize
analysisResults[itemsDeQueued] = result
itemsDeQueued++
}
close(resultChan)
+ defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
+
+ atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
- atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(start)))
+ indexStart := time.Now()
// notify handlers that we're about to introduce a segment
s.fireEvent(EventKindBatchIntroductionStart, 0)
var newSegment segment.Segment
+ var bufBytes uint64
if len(analysisResults) > 0 {
- newSegment, err = zap.NewSegmentBase(mem.NewFromAnalyzedDocs(analysisResults), DefaultChunkFactor)
+ newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor)
if err != nil {
return err
}
+ atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
+ } else {
+ atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
}
- err = s.prepareSegment(newSegment, ids, batch.InternalOps)
+ err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
if err != nil {
if newSegment != nil {
_ = newSegment.Close()
}
- atomic.AddUint64(&s.stats.errors, 1)
+ atomic.AddUint64(&s.stats.TotOnErrors, 1)
} else {
- atomic.AddUint64(&s.stats.updates, numUpdates)
- atomic.AddUint64(&s.stats.deletes, numDeletes)
- atomic.AddUint64(&s.stats.batches, 1)
- atomic.AddUint64(&s.stats.numPlainTextBytesIndexed, numPlainTextBytes)
+ atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
+ atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
+ atomic.AddUint64(&s.stats.TotBatches, 1)
+ atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
}
+
+ atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
+ atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
+
return err
}
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
- internalOps map[string][]byte) error {
+ internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
// new introduction
introduction := &segmentIntroduction{
- id: atomic.AddUint64(&s.nextSegmentID, 1),
- data: newSegment,
- ids: ids,
- obsoletes: make(map[uint64]*roaring.Bitmap),
- internal: internalOps,
- applied: make(chan error),
+ id: atomic.AddUint64(&s.nextSegmentID, 1),
+ data: newSegment,
+ ids: ids,
+ obsoletes: make(map[uint64]*roaring.Bitmap),
+ internal: internalOps,
+ applied: make(chan error),
+ persistedCallback: persistedCallback,
}
if !s.unsafeBatch {
@@ -326,6 +401,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
root.AddRef()
s.rootLock.RUnlock()
+ defer func() { _ = root.DecRef() }()
+
for _, seg := range root.segment {
delta, err := seg.segment.DocNumbers(ids)
if err != nil {
@@ -334,7 +411,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
introduction.obsoletes[seg.id] = delta
}
- _ = root.DecRef()
+ introStartTime := time.Now()
s.introductions <- introduction
@@ -348,6 +425,12 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
err = <-introduction.persisted
}
+ introTime := uint64(time.Since(introStartTime))
+ atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
+ if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
+ atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
+ }
+
return err
}
@@ -366,18 +449,69 @@ func (s *Scorch) DeleteInternal(key []byte) error {
// Reader returns a low-level accessor on the index data. Close it to
// release associated resources.
func (s *Scorch) Reader() (index.IndexReader, error) {
+ return s.currentSnapshot(), nil
+}
+
+func (s *Scorch) currentSnapshot() *IndexSnapshot {
s.rootLock.RLock()
- rv := &Reader{root: s.root}
- rv.root.AddRef()
+ rv := s.root
+ if rv != nil {
+ rv.AddRef()
+ }
s.rootLock.RUnlock()
- return rv, nil
+ return rv
}
func (s *Scorch) Stats() json.Marshaler {
- return s.stats
+ return &s.stats
}
+
+func (s *Scorch) diskFileStats() (uint64, uint64) {
+ var numFilesOnDisk, numBytesUsedDisk uint64
+ if s.path != "" {
+ finfos, err := ioutil.ReadDir(s.path)
+ if err == nil {
+ for _, finfo := range finfos {
+ if !finfo.IsDir() {
+ numBytesUsedDisk += uint64(finfo.Size())
+ numFilesOnDisk++
+ }
+ }
+ }
+ }
+ return numFilesOnDisk, numBytesUsedDisk
+}
+
func (s *Scorch) StatsMap() map[string]interface{} {
- m, _ := s.stats.statsMap()
+ m := s.stats.ToMap()
+
+ numFilesOnDisk, numBytesUsedDisk := s.diskFileStats()
+
+ m["CurOnDiskBytes"] = numBytesUsedDisk
+ m["CurOnDiskFiles"] = numFilesOnDisk
+
+ // TODO: consider one day removing these backwards compatible
+ // names for apps using the old names
+ m["updates"] = m["TotUpdates"]
+ m["deletes"] = m["TotDeletes"]
+ m["batches"] = m["TotBatches"]
+ m["errors"] = m["TotOnErrors"]
+ m["analysis_time"] = m["TotAnalysisTime"]
+ m["index_time"] = m["TotIndexTime"]
+ m["term_searchers_started"] = m["TotTermSearchersStarted"]
+ m["term_searchers_finished"] = m["TotTermSearchersFinished"]
+ m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
+ m["num_items_introduced"] = m["TotIntroducedItems"]
+ m["num_items_persisted"] = m["TotPersistedItems"]
+ m["num_recs_to_persist"] = m["TotItemsToPersist"]
+ m["num_bytes_used_disk"] = m["CurOnDiskBytes"]
+ m["num_files_on_disk"] = m["CurOnDiskFiles"]
+ m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
+ m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
+ m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
+ m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
+ m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
+
return m
}
@@ -394,7 +528,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult {
rv.Analyzed[i] = tokenFreqs
rv.Length[i] = fieldLength
- if len(d.CompositeFields) > 0 {
+ if len(d.CompositeFields) > 0 && field.Name() != "_id" {
// see if any of the composite fields need this
for _, compositeField := range d.CompositeFields {
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
@@ -418,20 +552,43 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
s.rootLock.Unlock()
}
-func (s *Scorch) MemoryUsed() uint64 {
- var memUsed uint64
- s.rootLock.RLock()
- if s.root != nil {
- for _, segmentSnapshot := range s.root.segment {
- memUsed += 8 /* size of id -> uint64 */ +
- segmentSnapshot.segment.SizeInBytes()
- if segmentSnapshot.deleted != nil {
- memUsed += segmentSnapshot.deleted.GetSizeInBytes()
- }
- memUsed += segmentSnapshot.cachedDocs.sizeInBytes()
- }
+func (s *Scorch) MemoryUsed() (memUsed uint64) {
+ indexSnapshot := s.currentSnapshot()
+ if indexSnapshot == nil {
+ return
}
- s.rootLock.RUnlock()
+
+ defer func() {
+ _ = indexSnapshot.Close()
+ }()
+
+ // Account for current root snapshot overhead
+ memUsed += uint64(indexSnapshot.Size())
+
+ // Account for snapshot that the persister may be working on
+ persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
+ persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
+ if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
+ // the snapshot that the persister is working on isn't the same as
+ // the current snapshot
+ memUsed += persistSnapshotSize
+ }
+
+ // Account for snapshot that the merger may be working on
+ mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
+ mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
+ if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
+ // the snapshot that the merger is working on isn't the same as
+ // the current snapshot
+ memUsed += mergeSnapshotSize
+ }
+
+ memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
+ atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
+
+ memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
+ atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
+
return memUsed
}
@@ -450,3 +607,15 @@ func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
func init() {
registry.RegisterIndexType(Name, NewScorch)
}
+
+func parseToInteger(i interface{}) (int, error) {
+ switch v := i.(type) {
+ case float64:
+ return int(v), nil
+ case int:
+ return v, nil
+
+ default:
+ return 0, fmt.Errorf("expects int or float64 value")
+ }
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
index 83454644da..165a01bc16 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
@@ -17,6 +17,7 @@ package segment
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
+ "github.com/couchbase/vellum"
)
type EmptySegment struct{}
@@ -29,6 +30,10 @@ func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisit
return nil
}
+func (e *EmptySegment) DocID(num uint64) ([]byte, error) {
+ return nil, nil
+}
+
func (e *EmptySegment) Count() uint64 {
return 0
}
@@ -46,6 +51,10 @@ func (e *EmptySegment) Close() error {
return nil
}
+func (e *EmptySegment) Size() uint64 {
+ return 0
+}
+
func (e *EmptySegment) AddRef() {
}
@@ -55,8 +64,8 @@ func (e *EmptySegment) DecRef() error {
type EmptyDictionary struct{}
-func (e *EmptyDictionary) PostingsList(term string,
- except *roaring.Bitmap) (PostingsList, error) {
+func (e *EmptyDictionary) PostingsList(term []byte,
+ except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) {
return &EmptyPostingsList{}, nil
}
@@ -72,18 +81,37 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
+func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton,
+ startKeyInclusive, endKeyExclusive []byte) DictionaryIterator {
+ return &EmptyDictionaryIterator{}
+}
+
+func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
+ includeCount bool) DictionaryIterator {
+ return &EmptyDictionaryIterator{}
+}
+
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
+func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
+ return nil, nil
+}
+
type EmptyPostingsList struct{}
-func (e *EmptyPostingsList) Iterator() PostingsIterator {
+func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool,
+ prealloc PostingsIterator) PostingsIterator {
return &EmptyPostingsIterator{}
}
+func (e *EmptyPostingsList) Size() int {
+ return 0
+}
+
func (e *EmptyPostingsList) Count() uint64 {
return 0
}
@@ -93,3 +121,9 @@ type EmptyPostingsIterator struct{}
func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil
}
+
+func (e *EmptyPostingsIterator) Size() int {
+ return 0
+}
+
+var AnEmptyPostingsIterator = &EmptyPostingsIterator{}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go
deleted file mode 100644
index 57d60dc890..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
- "math"
- "sort"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/analysis"
- "github.com/blevesearch/bleve/document"
- "github.com/blevesearch/bleve/index"
-)
-
-// NewFromAnalyzedDocs places the analyzed document mutations into a new segment
-func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
- s := New()
-
- // ensure that _id field get fieldID 0
- s.getOrDefineField("_id")
-
- // fill Dicts/DictKeys and preallocate memory
- s.initializeDict(results)
-
- // walk each doc
- for _, result := range results {
- s.processDocument(result)
- }
-
- // go back and sort the dictKeys
- for _, dict := range s.DictKeys {
- sort.Strings(dict)
- }
-
- // compute memory usage of segment
- s.updateSizeInBytes()
-
- // professional debugging
- //
- // log.Printf("fields: %v\n", s.FieldsMap)
- // log.Printf("fieldsInv: %v\n", s.FieldsInv)
- // log.Printf("fieldsLoc: %v\n", s.FieldsLoc)
- // log.Printf("dicts: %v\n", s.Dicts)
- // log.Printf("dict keys: %v\n", s.DictKeys)
- // for i, posting := range s.Postings {
- // log.Printf("posting %d: %v\n", i, posting)
- // }
- // for i, freq := range s.Freqs {
- // log.Printf("freq %d: %v\n", i, freq)
- // }
- // for i, norm := range s.Norms {
- // log.Printf("norm %d: %v\n", i, norm)
- // }
- // for i, field := range s.Locfields {
- // log.Printf("field %d: %v\n", i, field)
- // }
- // for i, start := range s.Locstarts {
- // log.Printf("start %d: %v\n", i, start)
- // }
- // for i, end := range s.Locends {
- // log.Printf("end %d: %v\n", i, end)
- // }
- // for i, pos := range s.Locpos {
- // log.Printf("pos %d: %v\n", i, pos)
- // }
- // for i, apos := range s.Locarraypos {
- // log.Printf("apos %d: %v\n", i, apos)
- // }
- // log.Printf("stored: %v\n", s.Stored)
- // log.Printf("stored types: %v\n", s.StoredTypes)
- // log.Printf("stored pos: %v\n", s.StoredPos)
-
- return s
-}
-
-// fill Dicts/DictKeys and preallocate memory for postings
-func (s *Segment) initializeDict(results []*index.AnalysisResult) {
- var numPostingsLists int
-
- numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
- numLocsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
-
- var numTokenFrequencies int
- var totLocs int
-
- // initial scan for all fieldID's to sort them
- for _, result := range results {
- for _, field := range result.Document.CompositeFields {
- s.getOrDefineField(field.Name())
- }
- for _, field := range result.Document.Fields {
- s.getOrDefineField(field.Name())
- }
- }
- sort.Strings(s.FieldsInv[1:]) // keep _id as first field
- s.FieldsMap = make(map[string]uint16, len(s.FieldsInv))
- for fieldID, fieldName := range s.FieldsInv {
- s.FieldsMap[fieldName] = uint16(fieldID + 1)
- }
-
- processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
- for term, tf := range tfs {
- pidPlus1, exists := s.Dicts[fieldID][term]
- if !exists {
- numPostingsLists++
- pidPlus1 = uint64(numPostingsLists)
- s.Dicts[fieldID][term] = pidPlus1
- s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
- numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
- numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
- }
- pid := pidPlus1 - 1
- numTermsPerPostingsList[pid] += 1
- numLocsPerPostingsList[pid] += len(tf.Locations)
- totLocs += len(tf.Locations)
- }
- numTokenFrequencies += len(tfs)
- }
-
- for _, result := range results {
- // walk each composite field
- for _, field := range result.Document.CompositeFields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- _, tf := field.Analyze()
- processField(fieldID, tf)
- }
-
- // walk each field
- for i, field := range result.Document.Fields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- tf := result.Analyzed[i]
- processField(fieldID, tf)
- }
- }
-
- s.Postings = make([]*roaring.Bitmap, numPostingsLists)
- for i := 0; i < numPostingsLists; i++ {
- s.Postings[i] = roaring.New()
- }
- s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
- for i := 0; i < numPostingsLists; i++ {
- s.PostingsLocs[i] = roaring.New()
- }
-
- // Preallocate big, contiguous backing arrays.
- auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos.
- uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos.
- float32Backing := make([]float32, numTokenFrequencies) // For sub-Norms.
- uint16Backing := make([]uint16, totLocs) // For sub-Locfields.
-
- // Point top-level slices to the backing arrays.
- s.Freqs = auint64Backing[0:numPostingsLists]
- auint64Backing = auint64Backing[numPostingsLists:]
-
- s.Norms = make([][]float32, numPostingsLists)
-
- s.Locfields = make([][]uint16, numPostingsLists)
-
- s.Locstarts = auint64Backing[0:numPostingsLists]
- auint64Backing = auint64Backing[numPostingsLists:]
-
- s.Locends = auint64Backing[0:numPostingsLists]
- auint64Backing = auint64Backing[numPostingsLists:]
-
- s.Locpos = auint64Backing[0:numPostingsLists]
- auint64Backing = auint64Backing[numPostingsLists:]
-
- s.Locarraypos = make([][][]uint64, numPostingsLists)
-
- // Point sub-slices to the backing arrays.
- for pid, numTerms := range numTermsPerPostingsList {
- s.Freqs[pid] = uint64Backing[0:0]
- uint64Backing = uint64Backing[numTerms:]
-
- s.Norms[pid] = float32Backing[0:0]
- float32Backing = float32Backing[numTerms:]
- }
-
- for pid, numLocs := range numLocsPerPostingsList {
- s.Locfields[pid] = uint16Backing[0:0]
- uint16Backing = uint16Backing[numLocs:]
-
- s.Locstarts[pid] = uint64Backing[0:0]
- uint64Backing = uint64Backing[numLocs:]
-
- s.Locends[pid] = uint64Backing[0:0]
- uint64Backing = uint64Backing[numLocs:]
-
- s.Locpos[pid] = uint64Backing[0:0]
- uint64Backing = uint64Backing[numLocs:]
-
- s.Locarraypos[pid] = auint64Backing[0:0]
- auint64Backing = auint64Backing[numLocs:]
- }
-}
-
-func (s *Segment) processDocument(result *index.AnalysisResult) {
- // used to collate information across fields
- docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap))
- fieldLens := make(map[uint16]int, len(s.FieldsMap))
-
- docNum := uint64(s.addDocument())
-
- processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {
- fieldLens[field] += l
- if existingFreqs, ok := docMap[field]; ok {
- existingFreqs.MergeAll(name, tf)
- } else {
- docMap[field] = tf
- }
- }
-
- storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
- s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
- s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
- s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
- }
-
- // walk each composite field
- for _, field := range result.Document.CompositeFields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- l, tf := field.Analyze()
- processField(fieldID, field.Name(), l, tf)
- }
-
- // walk each field
- for i, field := range result.Document.Fields {
- fieldID := uint16(s.getOrDefineField(field.Name()))
- l := result.Length[i]
- tf := result.Analyzed[i]
- processField(fieldID, field.Name(), l, tf)
- if field.Options().IsStored() {
- storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
- }
-
- if field.Options().IncludeDocValues() {
- s.DocValueFields[fieldID] = true
- }
- }
-
- // now that its been rolled up into docMap, walk that
- for fieldID, tokenFrequencies := range docMap {
- for term, tokenFreq := range tokenFrequencies {
- pid := s.Dicts[fieldID][term] - 1
- bs := s.Postings[pid]
- bs.AddInt(int(docNum))
- s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
- s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
- locationBS := s.PostingsLocs[pid]
- if len(tokenFreq.Locations) > 0 {
- locationBS.AddInt(int(docNum))
- for _, loc := range tokenFreq.Locations {
- var locf = fieldID
- if loc.Field != "" {
- locf = uint16(s.getOrDefineField(loc.Field))
- }
- s.Locfields[pid] = append(s.Locfields[pid], locf)
- s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start))
- s.Locends[pid] = append(s.Locends[pid], uint64(loc.End))
- s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position))
- if len(loc.ArrayPositions) > 0 {
- s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions)
- } else {
- s.Locarraypos[pid] = append(s.Locarraypos[pid], nil)
- }
- }
- }
- }
- }
-}
-
-func (s *Segment) getOrDefineField(name string) int {
- fieldIDPlus1, ok := s.FieldsMap[name]
- if !ok {
- fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
- s.FieldsMap[name] = fieldIDPlus1
- s.FieldsInv = append(s.FieldsInv, name)
- s.Dicts = append(s.Dicts, make(map[string]uint64))
- s.DictKeys = append(s.DictKeys, make([]string, 0))
- }
- return int(fieldIDPlus1 - 1)
-}
-
-func (s *Segment) addDocument() int {
- docNum := len(s.Stored)
- s.Stored = append(s.Stored, map[uint16][][]byte{})
- s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{})
- s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{})
- return docNum
-}
-
-func encodeFieldType(f document.Field) byte {
- fieldType := byte('x')
- switch f.(type) {
- case *document.TextField:
- fieldType = 't'
- case *document.NumericField:
- fieldType = 'n'
- case *document.DateTimeField:
- fieldType = 'd'
- case *document.BooleanField:
- fieldType = 'b'
- case *document.GeoPointField:
- fieldType = 'g'
- case *document.CompositeField:
- fieldType = 'c'
- }
- return fieldType
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go
deleted file mode 100644
index cf92ef71f6..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
- "sort"
- "strings"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-// Dictionary is the in-memory representation of the term dictionary
-type Dictionary struct {
- segment *Segment
- field string
- fieldID uint16
-}
-
-// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string,
- except *roaring.Bitmap) (segment.PostingsList, error) {
- return &PostingsList{
- dictionary: d,
- term: term,
- postingsID: d.segment.Dicts[d.fieldID][term],
- except: except,
- }, nil
-}
-
-// Iterator returns an iterator for this dictionary
-func (d *Dictionary) Iterator() segment.DictionaryIterator {
- return &DictionaryIterator{
- d: d,
- }
-}
-
-// PrefixIterator returns an iterator which only visits terms having the
-// the specified prefix
-func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
- offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix)
- return &DictionaryIterator{
- d: d,
- prefix: prefix,
- offset: offset,
- }
-}
-
-// RangeIterator returns an iterator which only visits terms between the
-// start and end terms. NOTE: bleve.index API specifies the end is inclusive.
-func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
- offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start)
- return &DictionaryIterator{
- d: d,
- offset: offset,
- end: end,
- }
-}
-
-// DictionaryIterator is an iterator for term dictionary
-type DictionaryIterator struct {
- d *Dictionary
- prefix string
- end string
- offset int
-
- dictEntry index.DictEntry // reused across Next()'s
-}
-
-// Next returns the next entry in the dictionary
-func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
- if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
- return nil, nil
- }
- next := d.d.segment.DictKeys[d.d.fieldID][d.offset]
- // check prefix
- if d.prefix != "" && !strings.HasPrefix(next, d.prefix) {
- return nil, nil
- }
- // check end (bleve.index API demands inclusive end)
- if d.end != "" && next > d.end {
- return nil, nil
- }
-
- d.offset++
- postingID := d.d.segment.Dicts[d.d.fieldID][next]
- d.dictEntry.Term = next
- d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
- return &d.dictEntry, nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go
deleted file mode 100644
index d91a005615..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-// PostingsList is an in-memory represenation of a postings list
-type PostingsList struct {
- dictionary *Dictionary
- term string
- postingsID uint64
- except *roaring.Bitmap
-}
-
-// Count returns the number of items on this postings list
-func (p *PostingsList) Count() uint64 {
- var rv uint64
- if p.postingsID > 0 {
- rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality()
- if p.except != nil {
- except := p.except.GetCardinality()
- if except > rv {
- // avoid underflow
- except = rv
- }
- rv -= except
- }
- }
- return rv
-}
-
-// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator() segment.PostingsIterator {
- rv := &PostingsIterator{
- postings: p,
- }
- if p.postingsID > 0 {
- allbits := p.dictionary.segment.Postings[p.postingsID-1]
- rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]
- rv.all = allbits.Iterator()
- if p.except != nil {
- allExcept := allbits.Clone()
- allExcept.AndNot(p.except)
- rv.actual = allExcept.Iterator()
- } else {
- rv.actual = allbits.Iterator()
- }
- }
-
- return rv
-}
-
-// PostingsIterator provides a way to iterate through the postings list
-type PostingsIterator struct {
- postings *PostingsList
- all roaring.IntIterable
- locations *roaring.Bitmap
- offset int
- locoffset int
- actual roaring.IntIterable
-}
-
-// Next returns the next posting on the postings list, or nil at the end
-func (i *PostingsIterator) Next() (segment.Posting, error) {
- if i.actual == nil || !i.actual.HasNext() {
- return nil, nil
- }
- n := i.actual.Next()
- allN := i.all.Next()
-
- // n is the next actual hit (excluding some postings)
- // allN is the next hit in the full postings
- // if they don't match, adjust offsets to factor in item we're skipping over
- // incr the all iterator, and check again
- for allN != n {
- i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
- i.offset++
- allN = i.all.Next()
- }
- rv := &Posting{
- iterator: i,
- docNum: uint64(n),
- offset: i.offset,
- locoffset: i.locoffset,
- hasLoc: i.locations.Contains(n),
- }
-
- i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
- i.offset++
- return rv, nil
-}
-
-// Posting is a single entry in a postings list
-type Posting struct {
- iterator *PostingsIterator
- docNum uint64
- offset int
- locoffset int
- hasLoc bool
-}
-
-// Number returns the document number of this posting in this segment
-func (p *Posting) Number() uint64 {
- return p.docNum
-}
-
-// Frequency returns the frequence of occurance of this term in this doc/field
-func (p *Posting) Frequency() uint64 {
- return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset]
-}
-
-// Norm returns the normalization factor for this posting
-func (p *Posting) Norm() float64 {
- return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset])
-}
-
-// Locations returns the location information for each occurance
-func (p *Posting) Locations() []segment.Location {
- if !p.hasLoc {
- return nil
- }
- freq := int(p.Frequency())
- rv := make([]segment.Location, freq)
- for i := 0; i < freq; i++ {
- rv[i] = &Location{
- p: p,
- offset: p.locoffset + i,
- }
- }
- return rv
-}
-
-// Location represents the location of a single occurance
-type Location struct {
- p *Posting
- offset int
-}
-
-// Field returns the name of the field (useful in composite fields to know
-// which original field the value came from)
-func (l *Location) Field() string {
- return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]]
-}
-
-// Start returns the start byte offset of this occurance
-func (l *Location) Start() uint64 {
- return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// End returns the end byte offset of this occurance
-func (l *Location) End() uint64 {
- return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// Pos returns the 1-based phrase position of this occurance
-func (l *Location) Pos() uint64 {
- return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset]
-}
-
-// ArrayPositions returns the array position vector associated with this occurance
-func (l *Location) ArrayPositions() []uint64 {
- return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go
deleted file mode 100644
index 04bdb368ac..0000000000
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go
+++ /dev/null
@@ -1,289 +0,0 @@
-// Copyright (c) 2017 Couchbase, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package mem
-
-import (
- "fmt"
-
- "github.com/RoaringBitmap/roaring"
- "github.com/blevesearch/bleve/index/scorch/segment"
-)
-
-// _id field is always guaranteed to have fieldID of 0
-const idFieldID uint16 = 0
-
-// KNOWN ISSUES
-// - LIMITATION - we decided whether or not to store term vectors for a field
-// at the segment level, based on the first definition of a
-// field we see. in normal bleve usage this is fine, all
-// instances of a field definition will be the same. however,
-// advanced users may violate this and provide unique field
-// definitions with each document. this segment does not
-// support this usage.
-
-// TODO
-// - need better testing of multiple docs, iterating freqs, locations and
-// and verifying the correct results are returned
-
-// Segment is an in memory implementation of scorch.Segment
-type Segment struct {
-
- // FieldsMap adds 1 to field id to avoid zero value issues
- // name -> field id + 1
- FieldsMap map[string]uint16
-
- // FieldsInv is the inverse of FieldsMap
- // field id -> name
- FieldsInv []string
-
- // Term dictionaries for each field
- // field id -> term -> postings list id + 1
- Dicts []map[string]uint64
-
- // Terms for each field, where terms are sorted ascending
- // field id -> []term
- DictKeys [][]string
-
- // Postings list
- // postings list id -> bitmap by docNum
- Postings []*roaring.Bitmap
-
- // Postings list has locations
- PostingsLocs []*roaring.Bitmap
-
- // Term frequencies
- // postings list id -> Freqs (one for each hit in bitmap)
- Freqs [][]uint64
-
- // Field norms
- // postings list id -> Norms (one for each hit in bitmap)
- Norms [][]float32
-
- // Field/start/end/pos/locarraypos
- // postings list id -> start/end/pos/locarraypos (one for each freq)
- Locfields [][]uint16
- Locstarts [][]uint64
- Locends [][]uint64
- Locpos [][]uint64
- Locarraypos [][][]uint64
-
- // Stored field values
- // docNum -> field id -> slice of values (each value []byte)
- Stored []map[uint16][][]byte
-
- // Stored field types
- // docNum -> field id -> slice of types (each type byte)
- StoredTypes []map[uint16][]byte
-
- // Stored field array positions
- // docNum -> field id -> slice of array positions (each is []uint64)
- StoredPos []map[uint16][][]uint64
-
- // For storing the docValue persisted fields
- DocValueFields map[uint16]bool
-
- // Footprint of the segment, updated when analyzed document mutations
- // are added into the segment
- sizeInBytes uint64
-}
-
-// New builds a new empty Segment
-func New() *Segment {
- return &Segment{
- FieldsMap: map[string]uint16{},
- DocValueFields: map[uint16]bool{},
- }
-}
-
-func (s *Segment) updateSizeInBytes() {
- var sizeInBytes uint64
-
- // FieldsMap, FieldsInv
- for k, _ := range s.FieldsMap {
- sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
- 2 /* size of uint16 */)
- }
- // overhead from the data structures
- sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
-
- // Dicts, DictKeys
- for _, entry := range s.Dicts {
- for k, _ := range entry {
- sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 +
- 8 /* size of uint64 */)
- }
- // overhead from the data structures
- sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice)
- }
- sizeInBytes += (segment.SizeOfSlice * 2)
-
- // Postings, PostingsLocs
- for i := 0; i < len(s.Postings); i++ {
- sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) +
- (s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer)
- }
- sizeInBytes += (segment.SizeOfSlice * 2)
-
- // Freqs, Norms
- for i := 0; i < len(s.Freqs); i++ {
- sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ +
- len(s.Norms[i])*4 /* size of float32 */) +
- (segment.SizeOfSlice * 2)
- }
- sizeInBytes += (segment.SizeOfSlice * 2)
-
- // Location data
- for i := 0; i < len(s.Locfields); i++ {
- sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ +
- len(s.Locstarts[i])*8 /* size of uint64 */ +
- len(s.Locends[i])*8 /* size of uint64 */ +
- len(s.Locpos[i])*8 /* size of uint64 */)
-
- for j := 0; j < len(s.Locarraypos[i]); j++ {
- sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) +
- segment.SizeOfSlice
- }
-
- sizeInBytes += (segment.SizeOfSlice * 5)
- }
- sizeInBytes += (segment.SizeOfSlice * 5)
-
- // Stored data
- for i := 0; i < len(s.Stored); i++ {
- for _, v := range s.Stored[i] {
- sizeInBytes += uint64(2 /* size of uint16 */)
- for _, arr := range v {
- sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice
- }
- sizeInBytes += segment.SizeOfSlice
- }
-
- for _, v := range s.StoredTypes[i] {
- sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice
- }
-
- for _, v := range s.StoredPos[i] {
- sizeInBytes += uint64(2 /* size of uint16 */)
- for _, arr := range v {
- sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) +
- segment.SizeOfSlice
- }
- sizeInBytes += segment.SizeOfSlice
- }
-
- // overhead from map(s) within Stored, StoredTypes, StoredPos
- sizeInBytes += (segment.SizeOfMap * 3)
- }
- // overhead from data structures: Stored, StoredTypes, StoredPos
- sizeInBytes += (segment.SizeOfSlice * 3)
-
- // DocValueFields
- sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) +
- segment.SizeOfMap
-
- // SizeInBytes
- sizeInBytes += uint64(8)
-
- s.sizeInBytes = sizeInBytes
-}
-
-func (s *Segment) SizeInBytes() uint64 {
- return s.sizeInBytes
-}
-
-func (s *Segment) AddRef() {
-}
-
-func (s *Segment) DecRef() error {
- return nil
-}
-
-// Fields returns the field names used in this segment
-func (s *Segment) Fields() []string {
- return s.FieldsInv
-}
-
-// VisitDocument invokes the DocFieldValueVistor for each stored field
-// for the specified doc number
-func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
- // ensure document number exists
- if int(num) > len(s.Stored)-1 {
- return nil
- }
- docFields := s.Stored[int(num)]
- st := s.StoredTypes[int(num)]
- sp := s.StoredPos[int(num)]
- for field, values := range docFields {
- for i, value := range values {
- keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i])
- if !keepGoing {
- return nil
- }
- }
- }
- return nil
-}
-
-func (s *Segment) getField(name string) (int, error) {
- fieldID, ok := s.FieldsMap[name]
- if !ok {
- return 0, fmt.Errorf("no field named %s", name)
- }
- return int(fieldID - 1), nil
-}
-
-// Dictionary returns the term dictionary for the specified field
-func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
- fieldID, err := s.getField(field)
- if err != nil {
- // no such field, return empty dictionary
- return &segment.EmptyDictionary{}, nil
- }
- return &Dictionary{
- segment: s,
- field: field,
- fieldID: uint16(fieldID),
- }, nil
-}
-
-// Count returns the number of documents in this segment
-// (this has no notion of deleted docs)
-func (s *Segment) Count() uint64 {
- return uint64(len(s.Stored))
-}
-
-// DocNumbers returns a bitset corresponding to the doc numbers of all the
-// provided _id strings
-func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
- rv := roaring.New()
-
- // guard against empty segment
- if len(s.FieldsMap) > 0 {
- idDictionary := s.Dicts[idFieldID]
-
- for _, id := range ids {
- postingID := idDictionary[id]
- if postingID > 0 {
- rv.Or(s.Postings[postingID-1])
- }
- }
- }
- return rv, nil
-}
-
-// Close releases all resources associated with this segment
-func (s *Segment) Close() error {
- return nil
-}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
new file mode 100644
index 0000000000..3aa151d64d
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
@@ -0,0 +1,75 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package segment
+
+import (
+ "regexp/syntax"
+
+ "github.com/couchbase/vellum/regexp"
+)
+
+func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
+ // TODO: potential optimization where syntax.Regexp supports a Simplify() API?
+
+ parsed, err := syntax.Parse(pattern, syntax.Perl)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ prefix := LiteralPrefix(parsed)
+ if prefix != "" {
+ prefixBeg := []byte(prefix)
+ prefixEnd := IncrementBytes(prefixBeg)
+ return re, prefixBeg, prefixEnd, nil
+ }
+
+ return re, nil, nil, nil
+}
+
+// Returns the literal prefix given the parse tree for a regexp
+func LiteralPrefix(s *syntax.Regexp) string {
+ // traverse the left-most branch in the parse tree as long as the
+ // node represents a concatenation
+ for s != nil && s.Op == syntax.OpConcat {
+ if len(s.Sub) < 1 {
+ return ""
+ }
+
+ s = s.Sub[0]
+ }
+
+ if s.Op == syntax.OpLiteral {
+ return string(s.Rune)
+ }
+
+ return "" // no literal prefix
+}
+
+func IncrementBytes(in []byte) []byte {
+ rv := make([]byte, len(in))
+ copy(rv, in)
+ for i := len(rv) - 1; i >= 0; i-- {
+ rv[i] = rv[i] + 1
+ if rv[i] != 0 {
+ return rv // didn't overflow, so stop
+ }
+ }
+ return nil // overflowed
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
index d5435ab96b..b94d6f979f 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
@@ -15,15 +15,14 @@
package segment
import (
+ "fmt"
+
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
+ "github.com/couchbase/vellum"
)
-// Overhead from go data structures when deployed on a 64-bit system.
-const SizeOfMap uint64 = 8
-const SizeOfPointer uint64 = 8
-const SizeOfSlice uint64 = 24
-const SizeOfString uint64 = 16
+var ErrClosed = fmt.Errorf("index closed")
// DocumentFieldValueVisitor defines a callback to be visited for each
// stored field value. The return value determines if the visitor
@@ -34,6 +33,9 @@ type Segment interface {
Dictionary(field string) (TermDictionary, error)
VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
+
+ DocID(num uint64) ([]byte, error)
+
Count() uint64
DocNumbers([]string) (*roaring.Bitmap, error)
@@ -42,18 +44,21 @@ type Segment interface {
Close() error
- SizeInBytes() uint64
+ Size() int
AddRef()
DecRef() error
}
type TermDictionary interface {
- PostingsList(term string, except *roaring.Bitmap) (PostingsList, error)
+ PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error)
Iterator() DictionaryIterator
PrefixIterator(prefix string) DictionaryIterator
RangeIterator(start, end string) DictionaryIterator
+ AutomatonIterator(a vellum.Automaton,
+ startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
+ OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
}
type DictionaryIterator interface {
@@ -61,7 +66,9 @@ type DictionaryIterator interface {
}
type PostingsList interface {
- Iterator() PostingsIterator
+ Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) PostingsIterator
+
+ Size() int
Count() uint64
@@ -77,6 +84,14 @@ type PostingsIterator interface {
// implementations may return a shared instance to reduce memory
// allocations.
Next() (Posting, error)
+
+ // Advance will return the posting with the specified doc number
+ // or if there is no such posting, the next posting.
+ // Callers MUST NOT attempt to pass a docNum that is less than or
+ // equal to the currently visited posting doc Num.
+ Advance(docNum uint64) (Posting, error)
+
+ Size() int
}
type Posting interface {
@@ -86,6 +101,8 @@ type Posting interface {
Norm() float64
Locations() []Location
+
+ Size() int
}
type Location interface {
@@ -94,6 +111,7 @@ type Location interface {
End() uint64
Pos() uint64
ArrayPositions() []uint64
+ Size() int
}
// DocumentFieldTermVisitable is implemented by various scorch segment
@@ -101,10 +119,17 @@ type Location interface {
// postings or other indexed values.
type DocumentFieldTermVisitable interface {
VisitDocumentFieldTerms(localDocNum uint64, fields []string,
- visitor index.DocumentFieldTermVisitor) error
+ visitor index.DocumentFieldTermVisitor, optional DocVisitState) (DocVisitState, error)
// VisitableDocValueFields implementation should return
// the list of fields which are document value persisted and
// therefore visitable by the above VisitDocumentFieldTerms method.
VisitableDocValueFields() ([]string, error)
}
+
+type DocVisitState interface {
+}
+
+type StatsReporter interface {
+ ReportBytesWritten(bytesWritten uint64)
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
index 72357ae7d7..91bfd4e24e 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
@@ -16,19 +16,13 @@ package zap
import (
"bufio"
- "bytes"
- "encoding/binary"
"math"
"os"
- "sort"
-
- "github.com/Smerity/govarint"
- "github.com/blevesearch/bleve/index/scorch/segment/mem"
- "github.com/couchbase/vellum"
- "github.com/golang/snappy"
)
-const version uint32 = 3
+const Version uint32 = 11
+
+const Type string = "zap"
const fieldNotUninverted = math.MaxUint64
@@ -82,219 +76,39 @@ func PersistSegmentBase(sb *SegmentBase, path string) error {
return nil
}
-// PersistSegment takes the in-memory segment and persists it to
-// the specified path in the zap file format.
-func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) error {
- flag := os.O_RDWR | os.O_CREATE
-
- f, err := os.OpenFile(path, flag, 0600)
- if err != nil {
- return err
- }
-
- cleanup := func() {
- _ = f.Close()
- _ = os.Remove(path)
- }
-
- // buffer the output
- br := bufio.NewWriter(f)
-
- // wrap it for counting (tracking offsets)
- cr := NewCountHashWriter(br)
-
- numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, err :=
- persistBase(memSegment, cr, chunkFactor)
- if err != nil {
- cleanup()
- return err
- }
-
- err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset,
- chunkFactor, cr.Sum32(), cr)
- if err != nil {
- cleanup()
- return err
- }
-
- err = br.Flush()
- if err != nil {
- cleanup()
- return err
- }
-
- err = f.Sync()
- if err != nil {
- cleanup()
- return err
- }
-
- err = f.Close()
- if err != nil {
- cleanup()
- return err
- }
-
- return nil
-}
-
-func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint32) (
- numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
- dictLocs []uint64, err error) {
- docValueOffset = uint64(fieldNotUninverted)
-
- if len(memSegment.Stored) > 0 {
- storedIndexOffset, err = persistStored(memSegment, cr)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- freqOffsets, locOffsets, err := persistPostingDetails(memSegment, cr, chunkFactor)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- postingsListLocs, err := persistPostingsLocs(memSegment, cr)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- postingsLocs, err := persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- dictLocs, err = persistDictionary(memSegment, cr, postingsLocs)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- docValueOffset, err = persistFieldDocValues(memSegment, cr, chunkFactor)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
- } else {
- dictLocs = make([]uint64, len(memSegment.FieldsInv))
- }
-
- fieldsIndexOffset, err = persistFields(memSegment.FieldsInv, cr, dictLocs)
- if err != nil {
- return 0, 0, 0, 0, nil, err
- }
-
- return uint64(len(memSegment.Stored)), storedIndexOffset, fieldsIndexOffset, docValueOffset,
- dictLocs, nil
-}
-
-func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) {
- var curr int
- var metaBuf bytes.Buffer
- var data, compressed []byte
-
- metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
-
- docNumOffsets := make(map[int]uint64, len(memSegment.Stored))
-
- for docNum, storedValues := range memSegment.Stored {
- if docNum != 0 {
- // reset buffer if necessary
- curr = 0
- metaBuf.Reset()
- data = data[:0]
- compressed = compressed[:0]
- }
-
- st := memSegment.StoredTypes[docNum]
- sp := memSegment.StoredPos[docNum]
-
- // encode fields in order
- for fieldID := range memSegment.FieldsInv {
- if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
- stf := st[uint16(fieldID)]
- spf := sp[uint16(fieldID)]
-
- var err2 error
- curr, data, err2 = persistStoredFieldValues(fieldID,
- storedFieldValues, stf, spf, curr, metaEncoder, data)
- if err2 != nil {
- return 0, err2
- }
- }
- }
-
- metaEncoder.Close()
- metaBytes := metaBuf.Bytes()
-
- // compress the data
- compressed = snappy.Encode(compressed, data)
-
- // record where we're about to start writing
- docNumOffsets[docNum] = uint64(w.Count())
-
- // write out the meta len and compressed data len
- _, err := writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
- if err != nil {
- return 0, err
- }
-
- // now write the meta
- _, err = w.Write(metaBytes)
- if err != nil {
- return 0, err
- }
- // now write the compressed data
- _, err = w.Write(compressed)
- if err != nil {
- return 0, err
- }
- }
-
- // return value is the start of the stored index
- rv := uint64(w.Count())
- // now write out the stored doc index
- for docNum := range memSegment.Stored {
- err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum])
- if err != nil {
- return 0, err
- }
- }
-
- return rv, nil
-}
-
func persistStoredFieldValues(fieldID int,
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
- curr int, metaEncoder *govarint.Base128Encoder, data []byte) (
+ curr int, metaEncode varintEncoder, data []byte) (
int, []byte, error) {
for i := 0; i < len(storedFieldValues); i++ {
// encode field
- _, err := metaEncoder.PutU64(uint64(fieldID))
+ _, err := metaEncode(uint64(fieldID))
if err != nil {
return 0, nil, err
}
// encode type
- _, err = metaEncoder.PutU64(uint64(stf[i]))
+ _, err = metaEncode(uint64(stf[i]))
if err != nil {
return 0, nil, err
}
// encode start offset
- _, err = metaEncoder.PutU64(uint64(curr))
+ _, err = metaEncode(uint64(curr))
if err != nil {
return 0, nil, err
}
// end len
- _, err = metaEncoder.PutU64(uint64(len(storedFieldValues[i])))
+ _, err = metaEncode(uint64(len(storedFieldValues[i])))
if err != nil {
return 0, nil, err
}
// encode number of array pos
- _, err = metaEncoder.PutU64(uint64(len(spf[i])))
+ _, err = metaEncode(uint64(len(spf[i])))
if err != nil {
return 0, nil, err
}
// encode all array positions
for _, pos := range spf[i] {
- _, err = metaEncoder.PutU64(pos)
+ _, err = metaEncode(pos)
if err != nil {
return 0, nil, err
}
@@ -307,337 +121,6 @@ func persistStoredFieldValues(fieldID int,
return curr, data, nil
}
-func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
- var freqOffsets, locOfffsets []uint64
- tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
- for postingID := range memSegment.Postings {
- if postingID != 0 {
- tfEncoder.Reset()
- }
- freqs := memSegment.Freqs[postingID]
- norms := memSegment.Norms[postingID]
- postingsListItr := memSegment.Postings[postingID].Iterator()
- var offset int
- for postingsListItr.HasNext() {
-
- docNum := uint64(postingsListItr.Next())
-
- // put freq
- err := tfEncoder.Add(docNum, freqs[offset])
- if err != nil {
- return nil, nil, err
- }
-
- // put norm
- norm := norms[offset]
- normBits := math.Float32bits(norm)
- err = tfEncoder.Add(docNum, uint64(normBits))
- if err != nil {
- return nil, nil, err
- }
-
- offset++
- }
-
- // record where this postings freq info starts
- freqOffsets = append(freqOffsets, uint64(w.Count()))
-
- tfEncoder.Close()
- _, err := tfEncoder.Write(w)
- if err != nil {
- return nil, nil, err
- }
-
- }
-
- // now do it again for the locations
- locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
- for postingID := range memSegment.Postings {
- if postingID != 0 {
- locEncoder.Reset()
- }
- freqs := memSegment.Freqs[postingID]
- locfields := memSegment.Locfields[postingID]
- locpos := memSegment.Locpos[postingID]
- locstarts := memSegment.Locstarts[postingID]
- locends := memSegment.Locends[postingID]
- locarraypos := memSegment.Locarraypos[postingID]
- postingsListItr := memSegment.Postings[postingID].Iterator()
- var offset int
- var locOffset int
- for postingsListItr.HasNext() {
- docNum := uint64(postingsListItr.Next())
- for i := 0; i < int(freqs[offset]); i++ {
- if len(locfields) > 0 {
- // put field
- err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
- if err != nil {
- return nil, nil, err
- }
-
- // put pos
- err = locEncoder.Add(docNum, locpos[locOffset])
- if err != nil {
- return nil, nil, err
- }
-
- // put start
- err = locEncoder.Add(docNum, locstarts[locOffset])
- if err != nil {
- return nil, nil, err
- }
-
- // put end
- err = locEncoder.Add(docNum, locends[locOffset])
- if err != nil {
- return nil, nil, err
- }
-
- // put the number of array positions to follow
- num := len(locarraypos[locOffset])
- err = locEncoder.Add(docNum, uint64(num))
- if err != nil {
- return nil, nil, err
- }
-
- // put each array position
- for _, pos := range locarraypos[locOffset] {
- err = locEncoder.Add(docNum, pos)
- if err != nil {
- return nil, nil, err
- }
- }
- }
- locOffset++
- }
- offset++
- }
-
- // record where this postings loc info starts
- locOfffsets = append(locOfffsets, uint64(w.Count()))
- locEncoder.Close()
- _, err := locEncoder.Write(w)
- if err != nil {
- return nil, nil, err
- }
- }
- return freqOffsets, locOfffsets, nil
-}
-
-func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
- rv = make([]uint64, 0, len(memSegment.PostingsLocs))
- var reuseBuf bytes.Buffer
- reuseBufVarint := make([]byte, binary.MaxVarintLen64)
- for postingID := range memSegment.PostingsLocs {
- // record where we start this posting loc
- rv = append(rv, uint64(w.Count()))
- // write out the length and bitmap
- _, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
- if err != nil {
- return nil, err
- }
- }
- return rv, nil
-}
-
-func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
- postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
- rv = make([]uint64, 0, len(memSegment.Postings))
- var reuseBuf bytes.Buffer
- reuseBufVarint := make([]byte, binary.MaxVarintLen64)
- for postingID := range memSegment.Postings {
- // record where we start this posting list
- rv = append(rv, uint64(w.Count()))
-
- // write out the term info, loc info, and loc posting list offset
- _, err = writeUvarints(w, freqOffsets[postingID],
- locOffsets[postingID], postingsListLocs[postingID])
- if err != nil {
- return nil, err
- }
-
- // write out the length and bitmap
- _, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
- if err != nil {
- return nil, err
- }
- }
- return rv, nil
-}
-
-func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
- rv := make([]uint64, 0, len(memSegment.DictKeys))
-
- varintBuf := make([]byte, binary.MaxVarintLen64)
-
- var buffer bytes.Buffer
- for fieldID, fieldTerms := range memSegment.DictKeys {
- if fieldID != 0 {
- buffer.Reset()
- }
-
- // start a new vellum for this field
- builder, err := vellum.New(&buffer, nil)
- if err != nil {
- return nil, err
- }
-
- dict := memSegment.Dicts[fieldID]
- // now walk the dictionary in order of fieldTerms (already sorted)
- for _, fieldTerm := range fieldTerms {
- postingID := dict[fieldTerm] - 1
- postingsAddr := postingsLocs[postingID]
- err = builder.Insert([]byte(fieldTerm), postingsAddr)
- if err != nil {
- return nil, err
- }
- }
- err = builder.Close()
- if err != nil {
- return nil, err
- }
-
- // record where this dictionary starts
- rv = append(rv, uint64(w.Count()))
-
- vellumData := buffer.Bytes()
-
- // write out the length of the vellum data
- n := binary.PutUvarint(varintBuf, uint64(len(vellumData)))
- _, err = w.Write(varintBuf[:n])
- if err != nil {
- return nil, err
- }
-
- // write this vellum to disk
- _, err = w.Write(vellumData)
- if err != nil {
- return nil, err
- }
- }
-
- return rv, nil
-}
-
-type docIDRange []uint64
-
-func (a docIDRange) Len() int { return len(a) }
-func (a docIDRange) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] }
-
-func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
- chunkFactor uint32) (map[uint16]uint64, error) {
- fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
- fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
-
- for fieldID := range memSegment.DocValueFields {
- field := memSegment.FieldsInv[fieldID]
- docTermMap := make(map[uint64][]byte, 0)
- dict, err := memSegment.Dictionary(field)
- if err != nil {
- return nil, err
- }
-
- dictItr := dict.Iterator()
- next, err := dictItr.Next()
- for err == nil && next != nil {
- postings, err1 := dict.PostingsList(next.Term, nil)
- if err1 != nil {
- return nil, err
- }
-
- postingsItr := postings.Iterator()
- nextPosting, err2 := postingsItr.Next()
- for err2 == nil && nextPosting != nil {
- docNum := nextPosting.Number()
- docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
- docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
- nextPosting, err2 = postingsItr.Next()
- }
- if err2 != nil {
- return nil, err2
- }
-
- next, err = dictItr.Next()
- }
-
- if err != nil {
- return nil, err
- }
- // sort wrt to docIDs
- var docNumbers docIDRange
- for k := range docTermMap {
- docNumbers = append(docNumbers, k)
- }
- sort.Sort(docNumbers)
-
- for _, docNum := range docNumbers {
- err = fdvEncoder.Add(docNum, docTermMap[docNum])
- if err != nil {
- return nil, err
- }
- }
-
- fieldChunkOffsets[fieldID] = uint64(w.Count())
- err = fdvEncoder.Close()
- if err != nil {
- return nil, err
- }
- // persist the doc value details for this field
- _, err = fdvEncoder.Write(w)
- if err != nil {
- return nil, err
- }
- // reseting encoder for the next field
- fdvEncoder.Reset()
- }
-
- return fieldChunkOffsets, nil
-}
-
-func persistFieldDocValues(memSegment *mem.Segment, w *CountHashWriter,
- chunkFactor uint32) (uint64, error) {
- fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor)
- if err != nil {
- return 0, err
- }
-
- fieldDocValuesOffset := uint64(w.Count())
- buf := make([]byte, binary.MaxVarintLen64)
- offset := uint64(0)
- ok := true
- for fieldID := range memSegment.FieldsInv {
- // if the field isn't configured for docValue, then mark
- // the offset accordingly
- if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok {
- offset = fieldNotUninverted
- }
- n := binary.PutUvarint(buf, uint64(offset))
- _, err := w.Write(buf[:n])
- if err != nil {
- return 0, err
- }
- }
-
- return fieldDocValuesOffset, nil
-}
-
-func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase, error) {
- var br bytes.Buffer
-
- cr := NewCountHashWriter(&br)
-
- numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, err :=
- persistBase(memSegment, cr, chunkFactor)
- if err != nil {
- return nil, err
- }
-
- return InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor,
- memSegment.FieldsMap, memSegment.FieldsInv, numDocs,
- storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs)
-}
-
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
@@ -653,10 +136,11 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
fieldsIndexOffset: fieldsIndexOffset,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
- fieldDvIterMap: make(map[uint16]*docValueIterator),
+ fieldDvReaders: make(map[uint16]*docValueReader),
}
+ sb.updateSize()
- err := sb.loadDvIterators()
+ err := sb.loadDvReaders()
if err != nil {
return nil, err
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
index 83457146ec..b9ff8179b3 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
@@ -18,41 +18,56 @@ import (
"bytes"
"encoding/binary"
"io"
+ "reflect"
"github.com/golang/snappy"
)
+var reflectStaticSizeMetaData int
+
+func init() {
+ var md MetaData
+ reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size())
+}
+
var termSeparator byte = 0xff
var termSeparatorSplitSlice = []byte{termSeparator}
type chunkedContentCoder struct {
- final []byte
- chunkSize uint64
- currChunk uint64
- chunkLens []uint64
+ final []byte
+ chunkSize uint64
+ currChunk uint64
+ chunkLens []uint64
+
+ w io.Writer
+ progressiveWrite bool
+
chunkMetaBuf bytes.Buffer
chunkBuf bytes.Buffer
chunkMeta []MetaData
+
+ compressed []byte // temp buf for snappy compression
}
// MetaData represents the data information inside a
// chunk.
type MetaData struct {
- DocNum uint64 // docNum of the data inside the chunk
- DocDvLoc uint64 // starting offset for a given docid
- DocDvLen uint64 // length of data inside the chunk for the given docid
+ DocNum uint64 // docNum of the data inside the chunk
+ DocDvOffset uint64 // offset of data inside the chunk for the given docid
}
// newChunkedContentCoder returns a new chunk content coder which
// packs data into chunks based on the provided chunkSize
-func newChunkedContentCoder(chunkSize uint64,
- maxDocNum uint64) *chunkedContentCoder {
+func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64,
+ w io.Writer, progressiveWrite bool) *chunkedContentCoder {
total := maxDocNum/chunkSize + 1
rv := &chunkedContentCoder{
- chunkSize: chunkSize,
- chunkLens: make([]uint64, total),
- chunkMeta: make([]MetaData, 0, total),
+ chunkSize: chunkSize,
+ chunkLens: make([]uint64, total),
+ chunkMeta: make([]MetaData, 0, total),
+ w: w,
+ progressiveWrite: progressiveWrite,
}
return rv
@@ -88,7 +103,7 @@ func (c *chunkedContentCoder) flushContents() error {
// write out the metaData slice
for _, meta := range c.chunkMeta {
- _, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen)
+ _, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset)
if err != nil {
return err
}
@@ -98,10 +113,19 @@ func (c *chunkedContentCoder) flushContents() error {
metaData := c.chunkMetaBuf.Bytes()
c.final = append(c.final, c.chunkMetaBuf.Bytes()...)
// write the compressed data to the final data
- compressedData := snappy.Encode(nil, c.chunkBuf.Bytes())
- c.final = append(c.final, compressedData...)
+ c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes())
+ c.final = append(c.final, c.compressed...)
+
+ c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData))
+
+ if c.progressiveWrite {
+ _, err := c.w.Write(c.final)
+ if err != nil {
+ return err
+ }
+ c.final = c.final[:0]
+ }
- c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData))
return nil
}
@@ -122,7 +146,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
c.currChunk = chunk
}
- // mark the starting offset for this doc
+ // get the starting offset for this doc
dvOffset := c.chunkBuf.Len()
dvSize, err := c.chunkBuf.Write(vals)
if err != nil {
@@ -130,38 +154,77 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error {
}
c.chunkMeta = append(c.chunkMeta, MetaData{
- DocNum: docNum,
- DocDvLoc: uint64(dvOffset),
- DocDvLen: uint64(dvSize),
+ DocNum: docNum,
+ DocDvOffset: uint64(dvOffset + dvSize),
})
return nil
}
// Write commits all the encoded chunked contents to the provided writer.
-func (c *chunkedContentCoder) Write(w io.Writer) (int, error) {
+//
+// | ..... data ..... | chunk offsets (varints)
+// | position of chunk offsets (uint64) | number of offsets (uint64) |
+//
+func (c *chunkedContentCoder) Write() (int, error) {
var tw int
- buf := make([]byte, binary.MaxVarintLen64)
- // write out the number of chunks
- n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
- nw, err := w.Write(buf[:n])
- tw += nw
- if err != nil {
- return tw, err
+
+ if c.final != nil {
+ // write out the data section first
+ nw, err := c.w.Write(c.final)
+ tw += nw
+ if err != nil {
+ return tw, err
+ }
+ }
+
+ chunkOffsetsStart := uint64(tw)
+
+ if cap(c.final) < binary.MaxVarintLen64 {
+ c.final = make([]byte, binary.MaxVarintLen64)
+ } else {
+ c.final = c.final[0:binary.MaxVarintLen64]
}
- // write out the chunk lens
- for _, chunkLen := range c.chunkLens {
- n := binary.PutUvarint(buf, uint64(chunkLen))
- nw, err = w.Write(buf[:n])
+ chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
+ // write out the chunk offsets
+ for _, chunkOffset := range chunkOffsets {
+ n := binary.PutUvarint(c.final, chunkOffset)
+ nw, err := c.w.Write(c.final[:n])
tw += nw
if err != nil {
return tw, err
}
}
- // write out the data
- nw, err = w.Write(c.final)
+
+ chunkOffsetsLen := uint64(tw) - chunkOffsetsStart
+
+ c.final = c.final[0:8]
+ // write out the length of chunk offsets
+ binary.BigEndian.PutUint64(c.final, chunkOffsetsLen)
+ nw, err := c.w.Write(c.final)
+ tw += nw
+ if err != nil {
+ return tw, err
+ }
+
+ // write out the number of chunks
+ binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens)))
+ nw, err = c.w.Write(c.final)
tw += nw
if err != nil {
return tw, err
}
+
+ c.final = c.final[:0]
+
return tw, nil
}
+
+// ReadDocValueBoundary elicits the start, end offsets from a
+// metaData header slice
+func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) {
+ var start uint64
+ if chunk > 0 {
+ start = metaHeaders[chunk-1].DocDvOffset
+ }
+ return start, metaHeaders[chunk].DocDvOffset
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
index d75e83c032..50290f8882 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
@@ -17,6 +17,8 @@ package zap
import (
"hash/crc32"
"io"
+
+ "github.com/blevesearch/bleve/index/scorch/segment"
)
// CountHashWriter is a wrapper around a Writer which counts the number of
@@ -25,6 +27,7 @@ type CountHashWriter struct {
w io.Writer
crc uint32
n int
+ s segment.StatsReporter
}
// NewCountHashWriter returns a CountHashWriter which wraps the provided Writer
@@ -32,11 +35,18 @@ func NewCountHashWriter(w io.Writer) *CountHashWriter {
return &CountHashWriter{w: w}
}
+func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter {
+ return &CountHashWriter{w: w, s: s}
+}
+
// Write writes the provided bytes to the wrapped writer and counts the bytes
func (c *CountHashWriter) Write(b []byte) (int, error) {
n, err := c.w.Write(b)
c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n])
c.n += n
+ if c.s != nil {
+ c.s.ReportBytesWritten(uint64(n))
+ }
return n, err
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
index e5d7126866..2c0e1bf2ad 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
@@ -15,38 +15,51 @@
package zap
import (
+ "bytes"
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
- "github.com/couchbase/vellum/regexp"
)
// Dictionary is the zap representation of the term dictionary
type Dictionary struct {
- sb *SegmentBase
- field string
- fieldID uint16
- fst *vellum.FST
+ sb *SegmentBase
+ field string
+ fieldID uint16
+ fst *vellum.FST
+ fstReader *vellum.Reader
}
// PostingsList returns the postings list for the specified term
-func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
- return d.postingsList([]byte(term), except, nil)
+func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap,
+ prealloc segment.PostingsList) (segment.PostingsList, error) {
+ var preallocPL *PostingsList
+ pl, ok := prealloc.(*PostingsList)
+ if ok && pl != nil {
+ preallocPL = pl
+ }
+ return d.postingsList(term, except, preallocPL)
}
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
- if d.fst == nil {
+ if d.fstReader == nil {
+ if rv == nil || rv == emptyPostingsList {
+ return emptyPostingsList, nil
+ }
return d.postingsListInit(rv, except), nil
}
- postingsOffset, exists, err := d.fst.Get(term)
+ postingsOffset, exists, err := d.fstReader.Get(term)
if err != nil {
return nil, fmt.Errorf("vellum err: %v", err)
}
if !exists {
+ if rv == nil || rv == emptyPostingsList {
+ return emptyPostingsList, nil
+ }
return d.postingsListInit(rv, except), nil
}
@@ -65,10 +78,17 @@ func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roari
}
func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
- if rv == nil {
+ if rv == nil || rv == emptyPostingsList {
rv = &PostingsList{}
} else {
+ postings := rv.postings
+ if postings != nil {
+ postings.Clear()
+ }
+
*rv = PostingsList{} // clear the struct
+
+ rv.postings = postings
}
rv.sb = d.sb
rv.except = except
@@ -85,6 +105,8 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator {
itr, err := d.fst.Iterator(nil, nil)
if err == nil {
rv.itr = itr
+ } else if err != vellum.ErrIteratorDone {
+ rv.err = err
}
}
@@ -98,13 +120,15 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
d: d,
}
+ kBeg := []byte(prefix)
+ kEnd := segment.IncrementBytes(kBeg)
+
if d.fst != nil {
- r, err := regexp.New(prefix + ".*")
+ itr, err := d.fst.Iterator(kBeg, kEnd)
if err == nil {
- itr, err := d.fst.Search(r, nil, nil)
- if err == nil {
- rv.itr = itr
- }
+ rv.itr = itr
+ } else if err != vellum.ErrIteratorDone {
+ rv.err = err
}
}
@@ -130,36 +154,103 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
itr, err := d.fst.Iterator([]byte(start), endBytes)
if err == nil {
rv.itr = itr
+ } else if err != vellum.ErrIteratorDone {
+ rv.err = err
+ }
+ }
+
+ return rv
+}
+
+// AutomatonIterator returns an iterator which only visits terms
+// having the the vellum automaton and start/end key range
+func (d *Dictionary) AutomatonIterator(a vellum.Automaton,
+ startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator {
+ rv := &DictionaryIterator{
+ d: d,
+ }
+
+ if d.fst != nil {
+ itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive)
+ if err == nil {
+ rv.itr = itr
+ } else if err != vellum.ErrIteratorDone {
+ rv.err = err
+ }
+ }
+
+ return rv
+}
+
+func (d *Dictionary) OnlyIterator(onlyTerms [][]byte,
+ includeCount bool) segment.DictionaryIterator {
+
+ rv := &DictionaryIterator{
+ d: d,
+ omitCount: !includeCount,
+ }
+
+ var buf bytes.Buffer
+ builder, err := vellum.New(&buf, nil)
+ if err != nil {
+ rv.err = err
+ return rv
+ }
+ for _, term := range onlyTerms {
+ err = builder.Insert(term, 0)
+ if err != nil {
+ rv.err = err
+ return rv
}
}
+ err = builder.Close()
+ if err != nil {
+ rv.err = err
+ return rv
+ }
+
+ onlyFST, err := vellum.Load(buf.Bytes())
+ if err != nil {
+ rv.err = err
+ return rv
+ }
+
+ itr, err := d.fst.Search(onlyFST, nil, nil)
+ if err == nil {
+ rv.itr = itr
+ } else if err != vellum.ErrIteratorDone {
+ rv.err = err
+ }
return rv
}
// DictionaryIterator is an iterator for term dictionary
type DictionaryIterator struct {
- d *Dictionary
- itr vellum.Iterator
- err error
- tmp PostingsList
+ d *Dictionary
+ itr vellum.Iterator
+ err error
+ tmp PostingsList
+ entry index.DictEntry
+ omitCount bool
}
// Next returns the next entry in the dictionary
func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
- if i.itr == nil || i.err == vellum.ErrIteratorDone {
- return nil, nil
- } else if i.err != nil {
+ if i.err != nil && i.err != vellum.ErrIteratorDone {
return nil, i.err
+ } else if i.itr == nil || i.err == vellum.ErrIteratorDone {
+ return nil, nil
}
term, postingsOffset := i.itr.Current()
- i.err = i.tmp.read(postingsOffset, i.d)
- if i.err != nil {
- return nil, i.err
- }
- rv := &index.DictEntry{
- Term: string(term),
- Count: i.tmp.Count(),
+ i.entry.Term = string(term)
+ if !i.omitCount {
+ i.err = i.tmp.read(postingsOffset, i.d)
+ if i.err != nil {
+ return nil, i.err
+ }
+ i.entry.Count = i.tmp.Count()
}
i.err = i.itr.Next()
- return rv, nil
+ return &i.entry, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
index 0514bd307c..bcc0f94728 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
@@ -19,93 +19,129 @@ import (
"encoding/binary"
"fmt"
"math"
+ "reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/size"
"github.com/golang/snappy"
)
-type docValueIterator struct {
+var reflectStaticSizedocValueReader int
+
+func init() {
+ var dvi docValueReader
+ reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size())
+}
+
+type docNumTermsVisitor func(docNum uint64, terms []byte) error
+
+type docVisitState struct {
+ dvrs map[uint16]*docValueReader
+ segment *Segment
+}
+
+type docValueReader struct {
field string
curChunkNum uint64
- numChunks uint64
- chunkLens []uint64
+ chunkOffsets []uint64
dvDataLoc uint64
curChunkHeader []MetaData
curChunkData []byte // compressed data cache
+ uncompressed []byte // temp buf for snappy decompression
}
-func (di *docValueIterator) sizeInBytes() uint64 {
- // curChunkNum, numChunks, dvDataLoc --> uint64
- sizeInBytes := 24
-
- // field
- sizeInBytes += (len(di.field) + int(segment.SizeOfString))
+func (di *docValueReader) size() int {
+ return reflectStaticSizedocValueReader + size.SizeOfPtr +
+ len(di.field) +
+ len(di.chunkOffsets)*size.SizeOfUint64 +
+ len(di.curChunkHeader)*reflectStaticSizeMetaData +
+ len(di.curChunkData)
+}
- // chunkLens, curChunkHeader
- sizeInBytes += len(di.chunkLens)*8 +
- len(di.curChunkHeader)*24 +
- int(segment.SizeOfSlice*2) /* overhead from slices */
+func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader {
+ if rv == nil {
+ rv = &docValueReader{}
+ }
- // curChunkData is mmap'ed, not included
+ rv.field = di.field
+ rv.curChunkNum = math.MaxUint64
+ rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable
+ rv.dvDataLoc = di.dvDataLoc
+ rv.curChunkHeader = rv.curChunkHeader[:0]
+ rv.curChunkData = nil
+ rv.uncompressed = rv.uncompressed[:0]
- return uint64(sizeInBytes)
+ return rv
}
-func (di *docValueIterator) fieldName() string {
+func (di *docValueReader) fieldName() string {
return di.field
}
-func (di *docValueIterator) curChunkNumber() uint64 {
+func (di *docValueReader) curChunkNumber() uint64 {
return di.curChunkNum
}
-func (s *SegmentBase) loadFieldDocValueIterator(field string,
- fieldDvLoc uint64) (*docValueIterator, error) {
+func (s *SegmentBase) loadFieldDocValueReader(field string,
+ fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
- if fieldDvLoc == fieldNotUninverted {
- return nil, fmt.Errorf("loadFieldDocValueIterator: "+
+ if fieldDvLocStart == fieldNotUninverted {
+ return nil, fmt.Errorf("loadFieldDocValueReader: "+
"no docValues found for field: %s", field)
}
- // read the number of chunks, chunk lengths
- var offset, clen uint64
- numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64])
- if read <= 0 {
- return nil, fmt.Errorf("failed to read the field "+
- "doc values for field %s", field)
+ // read the number of chunks, and chunk offsets position
+ var numChunks, chunkOffsetsPosition uint64
+
+ if fieldDvLocEnd-fieldDvLocStart > 16 {
+ numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
+ // read the length of chunk offsets
+ chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
+ // acquire position of chunk offsets
+ chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
}
- offset += uint64(read)
- fdvIter := &docValueIterator{
- curChunkNum: math.MaxUint64,
- field: field,
- chunkLens: make([]uint64, int(numChunks)),
+ fdvIter := &docValueReader{
+ curChunkNum: math.MaxUint64,
+ field: field,
+ chunkOffsets: make([]uint64, int(numChunks)),
}
+
+ // read the chunk offsets
+ var offset uint64
for i := 0; i < int(numChunks); i++ {
- clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64])
+ loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
if read <= 0 {
- return nil, fmt.Errorf("corrupted chunk length during segment load")
+ return nil, fmt.Errorf("corrupted chunk offset during segment load")
}
- fdvIter.chunkLens[i] = clen
+ fdvIter.chunkOffsets[i] = loc
offset += uint64(read)
}
- fdvIter.dvDataLoc = fieldDvLoc + offset
+ // set the data offset
+ fdvIter.dvDataLoc = fieldDvLocStart
+
return fdvIter, nil
}
-func (di *docValueIterator) loadDvChunk(chunkNumber,
- localDocNum uint64, s *SegmentBase) error {
+func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
// advance to the chunk where the docValues
// reside for the given docNum
- destChunkDataLoc := di.dvDataLoc
- for i := 0; i < int(chunkNumber); i++ {
- destChunkDataLoc += di.chunkLens[i]
+ destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc
+ start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets)
+ if start >= end {
+ di.curChunkHeader = di.curChunkHeader[:0]
+ di.curChunkData = nil
+ di.curChunkNum = chunkNumber
+ di.uncompressed = di.uncompressed[:0]
+ return nil
}
- curChunkSize := di.chunkLens[chunkNumber]
+ destChunkDataLoc += start
+ curChunkEnd += end
+
// read the number of docs reside in the chunk
numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64])
if read <= 0 {
@@ -114,38 +150,81 @@ func (di *docValueIterator) loadDvChunk(chunkNumber,
chunkMetaLoc := destChunkDataLoc + uint64(read)
offset := uint64(0)
- di.curChunkHeader = make([]MetaData, int(numDocs))
+ if cap(di.curChunkHeader) < int(numDocs) {
+ di.curChunkHeader = make([]MetaData, int(numDocs))
+ } else {
+ di.curChunkHeader = di.curChunkHeader[:int(numDocs)]
+ }
for i := 0; i < int(numDocs); i++ {
di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
- di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
- offset += uint64(read)
- di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
+ di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64])
offset += uint64(read)
}
compressedDataLoc := chunkMetaLoc + offset
- dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc
+ dataLength := curChunkEnd - compressedDataLoc
di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength]
di.curChunkNum = chunkNumber
+ di.uncompressed = di.uncompressed[:0]
+ return nil
+}
+
+func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error {
+ for i := 0; i < len(di.chunkOffsets); i++ {
+ err := di.loadDvChunk(uint64(i), s)
+ if err != nil {
+ return err
+ }
+ if di.curChunkData == nil || len(di.curChunkHeader) == 0 {
+ continue
+ }
+
+ // uncompress the already loaded data
+ uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
+ if err != nil {
+ return err
+ }
+ di.uncompressed = uncompressed
+
+ start := uint64(0)
+ for _, entry := range di.curChunkHeader {
+ err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset])
+ if err != nil {
+ return err
+ }
+
+ start = entry.DocDvOffset
+ }
+ }
+
return nil
}
-func (di *docValueIterator) visitDocValues(docNum uint64,
+func (di *docValueReader) visitDocValues(docNum uint64,
visitor index.DocumentFieldTermVisitor) error {
// binary search the term locations for the docNum
- start, length := di.getDocValueLocs(docNum)
- if start == math.MaxUint64 || length == math.MaxUint64 {
+ start, end := di.getDocValueLocs(docNum)
+ if start == math.MaxUint64 || end == math.MaxUint64 || start == end {
return nil
}
- // uncompress the already loaded data
- uncompressed, err := snappy.Decode(nil, di.curChunkData)
- if err != nil {
- return err
+
+ var uncompressed []byte
+ var err error
+ // use the uncompressed copy if available
+ if len(di.uncompressed) > 0 {
+ uncompressed = di.uncompressed
+ } else {
+ // uncompress the already loaded data
+ uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData)
+ if err != nil {
+ return err
+ }
+ di.uncompressed = uncompressed
}
// pick the terms for the given docNum
- uncompressed = uncompressed[start : start+length]
+ uncompressed = uncompressed[start:end]
for {
i := bytes.Index(uncompressed, termSeparatorSplitSlice)
if i < 0 {
@@ -159,55 +238,72 @@ func (di *docValueIterator) visitDocValues(docNum uint64,
return nil
}
-func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) {
+func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
i := sort.Search(len(di.curChunkHeader), func(i int) bool {
return di.curChunkHeader[i].DocNum >= docNum
})
if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum {
- return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen
+ return ReadDocValueBoundary(i, di.curChunkHeader)
}
return math.MaxUint64, math.MaxUint64
}
// VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface
-func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
- visitor index.DocumentFieldTermVisitor) error {
- fieldIDPlus1 := uint16(0)
- ok := true
+func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
+ visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
+ segment.DocVisitState, error) {
+ dvs, ok := dvsIn.(*docVisitState)
+ if !ok || dvs == nil {
+ dvs = &docVisitState{}
+ } else {
+ if dvs.segment != s {
+ dvs.segment = s
+ dvs.dvrs = nil
+ }
+ }
+
+ var fieldIDPlus1 uint16
+ if dvs.dvrs == nil {
+ dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
+ for _, field := range fields {
+ if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
+ continue
+ }
+ fieldID := fieldIDPlus1 - 1
+ if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
+ dvIter != nil {
+ dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
+ }
+ }
+ }
+
+ // find the chunkNumber where the docValues are stored
+ docInChunk := localDocNum / uint64(s.chunkFactor)
+ var dvr *docValueReader
for _, field := range fields {
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
continue
}
- // find the chunkNumber where the docValues are stored
- docInChunk := localDocNum / uint64(s.chunkFactor)
-
- if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists &&
- dvIter != nil {
+ fieldID := fieldIDPlus1 - 1
+ if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
- if docInChunk != dvIter.curChunkNumber() {
- err := dvIter.loadDvChunk(docInChunk, localDocNum, s)
+ if docInChunk != dvr.curChunkNumber() {
+ err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
if err != nil {
- continue
+ return dvs, err
}
}
- _ = dvIter.visitDocValues(localDocNum, visitor)
+ _ = dvr.visitDocValues(localDocNum, visitor)
}
}
- return nil
+ return dvs, nil
}
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
func (s *Segment) VisitableDocValueFields() ([]string, error) {
- var rv []string
- for fieldID, field := range s.fieldsInv {
- if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
- dvIter != nil {
- rv = append(rv, field)
- }
- }
- return rv, nil
+ return s.fieldDvNames, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
index 3c708dd577..cd6ff73c79 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
@@ -46,26 +46,27 @@ func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) {
for i, itr := range rv.itrs {
rv.currKs[i], rv.currVs[i] = itr.Current()
}
- rv.updateMatches()
- if rv.lowK == nil {
+ rv.updateMatches(false)
+ if rv.lowK == nil && len(rv.lowIdxs) == 0 {
return rv, vellum.ErrIteratorDone
}
return rv, nil
}
// updateMatches maintains the low key matches based on the currKs
-func (m *enumerator) updateMatches() {
+func (m *enumerator) updateMatches(skipEmptyKey bool) {
m.lowK = nil
m.lowIdxs = m.lowIdxs[:0]
m.lowCurr = 0
for i, key := range m.currKs {
- if key == nil {
+ if (key == nil && m.currVs[i] == 0) || // in case of empty iterator
+ (len(key) == 0 && skipEmptyKey) { // skip empty keys
continue
}
cmp := bytes.Compare(key, m.lowK)
- if cmp < 0 || m.lowK == nil {
+ if cmp < 0 || len(m.lowIdxs) == 0 {
// reached a new low
m.lowK = key
m.lowIdxs = m.lowIdxs[:0]
@@ -102,9 +103,10 @@ func (m *enumerator) Next() error {
}
m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
}
- m.updateMatches()
+ // can skip any empty keys encountered at this point
+ m.updateMatches(true)
}
- if m.lowK == nil {
+ if m.lowK == nil && len(m.lowIdxs) == 0 {
return vellum.ErrIteratorDone
}
return nil
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
index b505fec94e..571d06edb6 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
@@ -18,16 +18,12 @@ import (
"bytes"
"encoding/binary"
"io"
-
- "github.com/Smerity/govarint"
)
type chunkedIntCoder struct {
final []byte
- maxDocNum uint64
chunkSize uint64
chunkBuf bytes.Buffer
- encoder *govarint.Base128Encoder
chunkLens []uint64
currChunk uint64
@@ -41,11 +37,9 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
total := maxDocNum/chunkSize + 1
rv := &chunkedIntCoder{
chunkSize: chunkSize,
- maxDocNum: maxDocNum,
chunkLens: make([]uint64, total),
final: make([]byte, 0, 64),
}
- rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf)
return rv
}
@@ -67,16 +61,18 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
chunk := docNum / c.chunkSize
if chunk != c.currChunk {
// starting a new chunk
- if c.encoder != nil {
- // close out last
- c.Close()
- c.chunkBuf.Reset()
- }
+ c.Close()
+ c.chunkBuf.Reset()
c.currChunk = chunk
}
+ if len(c.buf) < binary.MaxVarintLen64 {
+ c.buf = make([]byte, binary.MaxVarintLen64)
+ }
+
for _, val := range vals {
- _, err := c.encoder.PutU64(val)
+ wb := binary.PutUvarint(c.buf, val)
+ _, err := c.chunkBuf.Write(c.buf[:wb])
if err != nil {
return err
}
@@ -85,13 +81,26 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error {
return nil
}
+func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error {
+ chunk := docNum / c.chunkSize
+ if chunk != c.currChunk {
+ // starting a new chunk
+ c.Close()
+ c.chunkBuf.Reset()
+ c.currChunk = chunk
+ }
+
+ _, err := c.chunkBuf.Write(buf)
+ return err
+}
+
// Close indicates you are done calling Add() this allows the final chunk
// to be encoded.
func (c *chunkedIntCoder) Close() {
- c.encoder.Close()
encodingBytes := c.chunkBuf.Bytes()
c.chunkLens[c.currChunk] = uint64(len(encodingBytes))
c.final = append(c.final, encodingBytes...)
+ c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close
}
// Write commits all the encoded chunked integers to the provided writer.
@@ -102,10 +111,13 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
}
buf := c.buf
- // write out the number of chunks & each chunkLen
- n := binary.PutUvarint(buf, uint64(len(c.chunkLens)))
- for _, chunkLen := range c.chunkLens {
- n += binary.PutUvarint(buf[n:], uint64(chunkLen))
+ // convert the chunk lengths into chunk offsets
+ chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens)
+
+ // write out the number of chunks & each chunk offsets
+ n := binary.PutUvarint(buf, uint64(len(chunkOffsets)))
+ for _, chunkOffset := range chunkOffsets {
+ n += binary.PutUvarint(buf[n:], chunkOffset)
}
tw, err := w.Write(buf[:n])
@@ -121,3 +133,40 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) {
}
return tw, nil
}
+
+func (c *chunkedIntCoder) FinalSize() int {
+ return len(c.final)
+}
+
+// modifyLengthsToEndOffsets converts the chunk length array
+// to a chunk offset array. The readChunkBoundary
+// will figure out the start and end of every chunk from
+// these offsets. Starting offset of i'th index is stored
+// in i-1'th position except for 0'th index and ending offset
+// is stored at i'th index position.
+// For 0'th element, starting position is always zero.
+// eg:
+// Lens -> 5 5 5 5 => 5 10 15 20
+// Lens -> 0 5 0 5 => 0 5 5 10
+// Lens -> 0 0 0 5 => 0 0 0 5
+// Lens -> 5 0 0 0 => 5 5 5 5
+// Lens -> 0 5 0 0 => 0 5 5 5
+// Lens -> 0 0 5 0 => 0 0 5 5
+func modifyLengthsToEndOffsets(lengths []uint64) []uint64 {
+ var runningOffset uint64
+ var index, i int
+ for i = 1; i <= len(lengths); i++ {
+ runningOffset += lengths[i-1]
+ lengths[index] = runningOffset
+ index++
+ }
+ return lengths
+}
+
+func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) {
+ var start uint64
+ if chunk > 0 {
+ start = offsets[chunk-1]
+ }
+ return start, offsets[chunk]
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
index ae8c5b197b..4ef222c1a2 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
@@ -24,11 +24,13 @@ import (
"sort"
"github.com/RoaringBitmap/roaring"
- "github.com/Smerity/govarint"
+ seg "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
"github.com/golang/snappy"
)
+var DefaultFileMergerBufferSize = 1024 * 1024
+
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which
@@ -36,12 +38,24 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// remaining data. This new segment is built at the specified path,
// with the provided chunkFactor.
func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
- chunkFactor uint32) ([][]uint64, error) {
+ chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
+ [][]uint64, uint64, error) {
+ segmentBases := make([]*SegmentBase, len(segments))
+ for segmenti, segment := range segments {
+ segmentBases[segmenti] = &segment.SegmentBase
+ }
+
+ return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh, s)
+}
+
+func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string,
+ chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) (
+ [][]uint64, uint64, error) {
flag := os.O_RDWR | os.O_CREATE
f, err := os.OpenFile(path, flag, 0600)
if err != nil {
- return nil, err
+ return nil, 0, err
}
cleanup := func() {
@@ -49,54 +63,49 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
_ = os.Remove(path)
}
- segmentBases := make([]*SegmentBase, len(segments))
- for segmenti, segment := range segments {
- segmentBases[segmenti] = &segment.SegmentBase
- }
-
// buffer the output
- br := bufio.NewWriter(f)
+ br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize)
// wrap it for counting (tracking offsets)
- cr := NewCountHashWriter(br)
+ cr := NewCountHashWriterWithStatsReporter(br, s)
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err :=
- MergeToWriter(segmentBases, drops, chunkFactor, cr)
+ MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh)
if err != nil {
cleanup()
- return nil, err
+ return nil, 0, err
}
err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset,
docValueOffset, chunkFactor, cr.Sum32(), cr)
if err != nil {
cleanup()
- return nil, err
+ return nil, 0, err
}
err = br.Flush()
if err != nil {
cleanup()
- return nil, err
+ return nil, 0, err
}
err = f.Sync()
if err != nil {
cleanup()
- return nil, err
+ return nil, 0, err
}
err = f.Close()
if err != nil {
cleanup()
- return nil, err
+ return nil, 0, err
}
- return newDocNums, nil
+ return newDocNums, uint64(cr.Count()), nil
}
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
- chunkFactor uint32, cr *CountHashWriter) (
+ chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) (
newDocNums [][]uint64,
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16,
@@ -108,15 +117,21 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap = mapFields(fieldsInv)
numDocs = computeNewDocCount(segments, drops)
+
+ if isClosed(closeCh) {
+ return nil, 0, 0, 0, 0, nil, nil, nil, seg.ErrClosed
+ }
+
if numDocs > 0 {
storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops,
- fieldsMap, fieldsInv, fieldsSame, numDocs, cr)
+ fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh)
if err != nil {
return nil, 0, 0, 0, 0, nil, nil, nil, err
}
- dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap,
- newDocNums, numDocs, chunkFactor, cr)
+ dictLocs, docValueOffset, err = persistMergedRest(segments, drops,
+ fieldsInv, fieldsMap, fieldsSame,
+ newDocNums, numDocs, chunkFactor, cr, closeCh)
if err != nil {
return nil, 0, 0, 0, 0, nil, nil, nil, err
}
@@ -156,11 +171,10 @@ func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64
}
func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
- fieldsInv []string, fieldsMap map[string]uint16, newDocNumsIn [][]uint64,
- newSegDocCount uint64, chunkFactor uint32,
- w *CountHashWriter) ([]uint64, uint64, error) {
+ fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool,
+ newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32,
+ w *CountHashWriter, closeCh chan struct{}) ([]uint64, uint64, error) {
- var bufReuse bytes.Buffer
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var bufLoc []uint64
@@ -168,28 +182,22 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
var postItr *PostingsIterator
rv := make([]uint64, len(fieldsInv))
- fieldDvLocs := make([]uint64, len(fieldsInv))
+ fieldDvLocsStart := make([]uint64, len(fieldsInv))
+ fieldDvLocsEnd := make([]uint64, len(fieldsInv))
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
- // docTermMap is keyed by docNum, where the array impl provides
- // better memory usage behavior than a sparse-friendlier hashmap
- // for when docs have much structural similarity (i.e., every doc
- // has a given field)
- var docTermMap [][]byte
-
var vellumBuf bytes.Buffer
+ newVellum, err := vellum.New(&vellumBuf, nil)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ newRoaring := roaring.NewBitmap()
// for each field
for fieldID, fieldName := range fieldsInv {
- if fieldID != 0 {
- vellumBuf.Reset()
- }
- newVellum, err := vellum.New(&vellumBuf, nil)
- if err != nil {
- return nil, 0, err
- }
// collect FST iterators from all active segments for this field
var newDocNums [][]uint64
@@ -197,7 +205,15 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
var dicts []*Dictionary
var itrs []vellum.Iterator
+ var segmentsInFocus []*SegmentBase
+
for segmentI, segment := range segments {
+
+ // check for the closure in meantime
+ if isClosed(closeCh) {
+ return nil, 0, seg.ErrClosed
+ }
+
dict, err2 := segment.dictionary(fieldName)
if err2 != nil {
return nil, 0, err2
@@ -209,89 +225,63 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
}
if itr != nil {
newDocNums = append(newDocNums, newDocNumsIn[segmentI])
- drops = append(drops, dropsIn[segmentI])
+ if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() {
+ drops = append(drops, dropsIn[segmentI])
+ } else {
+ drops = append(drops, nil)
+ }
dicts = append(dicts, dict)
itrs = append(itrs, itr)
+ segmentsInFocus = append(segmentsInFocus, segment)
}
}
}
- if uint64(cap(docTermMap)) < newSegDocCount {
- docTermMap = make([][]byte, newSegDocCount)
- } else {
- docTermMap = docTermMap[0:newSegDocCount]
- for docNum := range docTermMap { // reset the docTermMap
- docTermMap[docNum] = docTermMap[docNum][:0]
- }
- }
-
var prevTerm []byte
- newRoaring := roaring.NewBitmap()
- newRoaringLocs := roaring.NewBitmap()
+ newRoaring.Clear()
- finishTerm := func(term []byte) error {
- if term == nil {
- return nil
+ var lastDocNum, lastFreq, lastNorm uint64
+
+ // determines whether to use "1-hit" encoding optimization
+ // when a term appears in only 1 doc, with no loc info,
+ // has freq of 1, and the docNum fits into 31-bits
+ use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) {
+ if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 {
+ docNum := uint64(newRoaring.Minimum())
+ if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 {
+ return true, docNum, lastNorm
+ }
}
+ return false, 0, 0
+ }
+ finishTerm := func(term []byte) error {
tfEncoder.Close()
locEncoder.Close()
- if newRoaring.GetCardinality() > 0 {
- // this field/term actually has hits in the new segment, lets write it down
- freqOffset := uint64(w.Count())
- _, err := tfEncoder.Write(w)
- if err != nil {
- return err
- }
- locOffset := uint64(w.Count())
- _, err = locEncoder.Write(w)
- if err != nil {
- return err
- }
- postingLocOffset := uint64(w.Count())
- _, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
- if err != nil {
- return err
- }
- postingOffset := uint64(w.Count())
-
- // write out the start of the term info
- n := binary.PutUvarint(bufMaxVarintLen64, freqOffset)
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return err
- }
- // write out the start of the loc info
- n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return err
- }
- // write out the start of the posting locs
- n = binary.PutUvarint(bufMaxVarintLen64, postingLocOffset)
- _, err = w.Write(bufMaxVarintLen64[:n])
- if err != nil {
- return err
- }
- _, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
- if err != nil {
- return err
- }
+ postingsOffset, err := writePostings(newRoaring,
+ tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64)
+ if err != nil {
+ return err
+ }
- err = newVellum.Insert(term, postingOffset)
+ if postingsOffset > 0 {
+ err = newVellum.Insert(term, postingsOffset)
if err != nil {
return err
}
}
- newRoaring = roaring.NewBitmap()
- newRoaringLocs = roaring.NewBitmap()
+ newRoaring.Clear()
tfEncoder.Reset()
locEncoder.Reset()
+ lastDocNum = 0
+ lastFreq = 0
+ lastNorm = 0
+
return nil
}
@@ -301,66 +291,39 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
term, itrI, postingsOffset := enumerator.Current()
if !bytes.Equal(prevTerm, term) {
+ // check for the closure in meantime
+ if isClosed(closeCh) {
+ return nil, 0, seg.ErrClosed
+ }
+
// if the term changed, write out the info collected
// for the previous term
- err2 := finishTerm(prevTerm)
- if err2 != nil {
- return nil, 0, err2
+ err = finishTerm(prevTerm)
+ if err != nil {
+ return nil, 0, err
}
}
- var err2 error
- postings, err2 = dicts[itrI].postingsListFromOffset(
+ postings, err = dicts[itrI].postingsListFromOffset(
postingsOffset, drops[itrI], postings)
- if err2 != nil {
- return nil, 0, err2
+ if err != nil {
+ return nil, 0, err
}
- newDocNumsI := newDocNums[itrI]
-
- postItr = postings.iterator(postItr)
- next, err2 := postItr.Next()
- for next != nil && err2 == nil {
- hitNewDocNum := newDocNumsI[next.Number()]
- if hitNewDocNum == docDropped {
- return nil, 0, fmt.Errorf("see hit with dropped doc num")
- }
- newRoaring.Add(uint32(hitNewDocNum))
- // encode norm bits
- norm := next.Norm()
- normBits := math.Float32bits(float32(norm))
- err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits))
- if err != nil {
- return nil, 0, err
- }
- locs := next.Locations()
- if len(locs) > 0 {
- newRoaringLocs.Add(uint32(hitNewDocNum))
- for _, loc := range locs {
- if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
- bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
- }
- args := bufLoc[0:5]
- args[0] = uint64(fieldsMap[loc.Field()] - 1)
- args[1] = loc.Pos()
- args[2] = loc.Start()
- args[3] = loc.End()
- args[4] = uint64(len(loc.ArrayPositions()))
- args = append(args, loc.ArrayPositions()...)
- err = locEncoder.Add(hitNewDocNum, args...)
- if err != nil {
- return nil, 0, err
- }
- }
- }
-
- docTermMap[hitNewDocNum] =
- append(append(docTermMap[hitNewDocNum], term...), termSeparator)
-
- next, err2 = postItr.Next()
+ postItr = postings.iterator(true, true, true, postItr)
+
+ if fieldsSame {
+ // can optimize by copying freq/norm/loc bytes directly
+ lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying(
+ term, postItr, newDocNums[itrI], newRoaring,
+ tfEncoder, locEncoder)
+ } else {
+ lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs(
+ fieldsMap, term, postItr, newDocNums[itrI], newRoaring,
+ tfEncoder, locEncoder, bufLoc)
}
- if err2 != nil {
- return nil, 0, err2
+ if err != nil {
+ return nil, 0, err
}
prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem
@@ -368,7 +331,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
err = enumerator.Next()
}
- if err != nil && err != vellum.ErrIteratorDone {
+ if err != vellum.ErrIteratorDone {
return nil, 0, err
}
@@ -400,26 +363,63 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
rv[fieldID] = dictOffset
+ // get the field doc value offset (start)
+ fieldDvLocsStart[fieldID] = uint64(w.Count())
+
// update the field doc values
- fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
- for docNum, docTerms := range docTermMap {
- if len(docTerms) > 0 {
- err = fdvEncoder.Add(uint64(docNum), docTerms)
+ fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true)
+
+ fdvReadersAvailable := false
+ var dvIterClone *docValueReader
+ for segmentI, segment := range segmentsInFocus {
+ // check for the closure in meantime
+ if isClosed(closeCh) {
+ return nil, 0, seg.ErrClosed
+ }
+
+ fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
+ if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists &&
+ dvIter != nil {
+ fdvReadersAvailable = true
+ dvIterClone = dvIter.cloneInto(dvIterClone)
+ err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error {
+ if newDocNums[segmentI][docNum] == docDropped {
+ return nil
+ }
+ err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms)
+ if err != nil {
+ return err
+ }
+ return nil
+ })
if err != nil {
return nil, 0, err
}
}
}
- err = fdvEncoder.Close()
- if err != nil {
- return nil, 0, err
- }
- // get the field doc value offset
- fieldDvLocs[fieldID] = uint64(w.Count())
+ if fdvReadersAvailable {
+ err = fdvEncoder.Close()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ // persist the doc value details for this field
+ _, err = fdvEncoder.Write()
+ if err != nil {
+ return nil, 0, err
+ }
+
+ // get the field doc value offset (end)
+ fieldDvLocsEnd[fieldID] = uint64(w.Count())
+ } else {
+ fieldDvLocsStart[fieldID] = fieldNotUninverted
+ fieldDvLocsEnd[fieldID] = fieldNotUninverted
+ }
- // persist the doc value details for this field
- _, err = fdvEncoder.Write(w)
+ // reset vellum buffer and vellum builder
+ vellumBuf.Reset()
+ err = newVellum.Reset(&vellumBuf)
if err != nil {
return nil, 0, err
}
@@ -428,38 +428,210 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
fieldDvLocsOffset := uint64(w.Count())
buf := bufMaxVarintLen64
- for _, offset := range fieldDvLocs {
- n := binary.PutUvarint(buf, uint64(offset))
+ for i := 0; i < len(fieldDvLocsStart); i++ {
+ n := binary.PutUvarint(buf, fieldDvLocsStart[i])
_, err := w.Write(buf[:n])
if err != nil {
return nil, 0, err
}
+ n = binary.PutUvarint(buf, fieldDvLocsEnd[i])
+ _, err = w.Write(buf[:n])
+ if err != nil {
+ return nil, 0, err
+ }
}
return rv, fieldDvLocsOffset, nil
}
+func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator,
+ newDocNums []uint64, newRoaring *roaring.Bitmap,
+ tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) (
+ lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) {
+ next, err := postItr.Next()
+ for next != nil && err == nil {
+ hitNewDocNum := newDocNums[next.Number()]
+ if hitNewDocNum == docDropped {
+ return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum")
+ }
+
+ newRoaring.Add(uint32(hitNewDocNum))
+
+ nextFreq := next.Frequency()
+ nextNorm := uint64(math.Float32bits(float32(next.Norm())))
+
+ locs := next.Locations()
+
+ err = tfEncoder.Add(hitNewDocNum,
+ encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm)
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+
+ if len(locs) > 0 {
+ numBytesLocs := 0
+ for _, loc := range locs {
+ ap := loc.ArrayPositions()
+ numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1),
+ loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap)
+ }
+
+ err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs))
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+
+ for _, loc := range locs {
+ ap := loc.ArrayPositions()
+ if cap(bufLoc) < 5+len(ap) {
+ bufLoc = make([]uint64, 0, 5+len(ap))
+ }
+ args := bufLoc[0:5]
+ args[0] = uint64(fieldsMap[loc.Field()] - 1)
+ args[1] = loc.Pos()
+ args[2] = loc.Start()
+ args[3] = loc.End()
+ args[4] = uint64(len(ap))
+ args = append(args, ap...)
+ err = locEncoder.Add(hitNewDocNum, args...)
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+ }
+ }
+
+ lastDocNum = hitNewDocNum
+ lastFreq = nextFreq
+ lastNorm = nextNorm
+
+ next, err = postItr.Next()
+ }
+
+ return lastDocNum, lastFreq, lastNorm, bufLoc, err
+}
+
+func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator,
+ newDocNums []uint64, newRoaring *roaring.Bitmap,
+ tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) (
+ lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) {
+ nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err :=
+ postItr.nextBytes()
+ for err == nil && len(nextFreqNormBytes) > 0 {
+ hitNewDocNum := newDocNums[nextDocNum]
+ if hitNewDocNum == docDropped {
+ return 0, 0, 0, fmt.Errorf("see hit with dropped doc num")
+ }
+
+ newRoaring.Add(uint32(hitNewDocNum))
+ err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ if len(nextLocBytes) > 0 {
+ err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+ }
+
+ lastDocNum = hitNewDocNum
+ lastFreq = nextFreq
+ lastNorm = nextNorm
+
+ nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err =
+ postItr.nextBytes()
+ }
+
+ return lastDocNum, lastFreq, lastNorm, err
+}
+
+func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder,
+ use1HitEncoding func(uint64) (bool, uint64, uint64),
+ w *CountHashWriter, bufMaxVarintLen64 []byte) (
+ offset uint64, err error) {
+ termCardinality := postings.GetCardinality()
+ if termCardinality <= 0 {
+ return 0, nil
+ }
+
+ if use1HitEncoding != nil {
+ encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality)
+ if encodeAs1Hit {
+ return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil
+ }
+ }
+
+ tfOffset := uint64(w.Count())
+ _, err = tfEncoder.Write(w)
+ if err != nil {
+ return 0, err
+ }
+
+ locOffset := uint64(w.Count())
+ _, err = locEncoder.Write(w)
+ if err != nil {
+ return 0, err
+ }
+
+ postingsOffset := uint64(w.Count())
+
+ n := binary.PutUvarint(bufMaxVarintLen64, tfOffset)
+ _, err = w.Write(bufMaxVarintLen64[:n])
+ if err != nil {
+ return 0, err
+ }
+
+ n = binary.PutUvarint(bufMaxVarintLen64, locOffset)
+ _, err = w.Write(bufMaxVarintLen64[:n])
+ if err != nil {
+ return 0, err
+ }
+
+ _, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64)
+ if err != nil {
+ return 0, err
+ }
+
+ return postingsOffset, nil
+}
+
+type varintEncoder func(uint64) (int, error)
+
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64,
- w *CountHashWriter) (uint64, [][]uint64, error) {
+ w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) {
var rv [][]uint64 // The remapped or newDocNums for each segment.
var newDocNum uint64
var curr int
- var metaBuf bytes.Buffer
var data, compressed []byte
-
- metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
+ var metaBuf bytes.Buffer
+ varBuf := make([]byte, binary.MaxVarintLen64)
+ metaEncode := func(val uint64) (int, error) {
+ wb := binary.PutUvarint(varBuf, val)
+ return metaBuf.Write(varBuf[:wb])
+ }
vals := make([][][]byte, len(fieldsInv))
typs := make([][]byte, len(fieldsInv))
poss := make([][][]uint64, len(fieldsInv))
+ var posBuf []uint64
+
docNumOffsets := make([]uint64, newSegDocCount)
+ vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+ defer visitDocumentCtxPool.Put(vdc)
+
// for each segment
for segI, segment := range segments {
+ // check for the closure in meantime
+ if isClosed(closeCh) {
+ return 0, nil, seg.ErrClosed
+ }
+
segNewDocNums := make([]uint64, segment.numDocs)
dropsI := drops[segI]
@@ -495,7 +667,8 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
curr = 0
metaBuf.Reset()
data = data[:0]
- compressed = compressed[:0]
+
+ posTemp := posBuf
// collect all the data
for i := 0; i < len(fieldsInv); i++ {
@@ -503,42 +676,63 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
typs[i] = typs[i][:0]
poss[i] = poss[i][:0]
}
- err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
+ err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
fieldID := int(fieldsMap[field]) - 1
vals[fieldID] = append(vals[fieldID], value)
typs[fieldID] = append(typs[fieldID], typ)
- poss[fieldID] = append(poss[fieldID], pos)
+
+ // copy array positions to preserve them beyond the scope of this callback
+ var curPos []uint64
+ if len(pos) > 0 {
+ if cap(posTemp) < len(pos) {
+ posBuf = make([]uint64, len(pos)*len(fieldsInv))
+ posTemp = posBuf
+ }
+ curPos = posTemp[0:len(pos)]
+ copy(curPos, pos)
+ posTemp = posTemp[len(pos):]
+ }
+ poss[fieldID] = append(poss[fieldID], curPos)
+
return true
})
if err != nil {
return 0, nil, err
}
- // now walk the fields in order
- for fieldID := range fieldsInv {
- storedFieldValues := vals[int(fieldID)]
+ // _id field special case optimizes ExternalID() lookups
+ idFieldVal := vals[uint16(0)][0]
+ _, err = metaEncode(uint64(len(idFieldVal)))
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // now walk the non-"_id" fields in order
+ for fieldID := 1; fieldID < len(fieldsInv); fieldID++ {
+ storedFieldValues := vals[fieldID]
- stf := typs[int(fieldID)]
- spf := poss[int(fieldID)]
+ stf := typs[fieldID]
+ spf := poss[fieldID]
var err2 error
curr, data, err2 = persistStoredFieldValues(fieldID,
- storedFieldValues, stf, spf, curr, metaEncoder, data)
+ storedFieldValues, stf, spf, curr, metaEncode, data)
if err2 != nil {
return 0, nil, err2
}
}
- metaEncoder.Close()
metaBytes := metaBuf.Bytes()
- compressed = snappy.Encode(compressed, data)
+ compressed = snappy.Encode(compressed[:cap(compressed)], data)
// record where we're about to start writing
docNumOffsets[newDocNum] = uint64(w.Count())
// write out the meta len and compressed data len
- _, err = writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed)))
+ _, err = writeUvarints(w,
+ uint64(len(metaBytes)),
+ uint64(len(idFieldVal)+len(compressed)))
if err != nil {
return 0, nil, err
}
@@ -547,6 +741,11 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
if err != nil {
return 0, nil, err
}
+ // now write the _id field val (counted as part of the 'compressed' data)
+ _, err = w.Write(idFieldVal)
+ if err != nil {
+ return 0, nil, err
+ }
// now write the compressed data
_, err = w.Write(compressed)
if err != nil {
@@ -644,3 +843,12 @@ func mergeFields(segments []*SegmentBase) (bool, []string) {
return fieldsSame, rv
}
+
+func isClosed(closeCh chan struct{}) bool {
+ select {
+ case <-closeCh:
+ return true
+ default:
+ return false
+ }
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
new file mode 100644
index 0000000000..22b69913e4
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
@@ -0,0 +1,826 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package zap
+
+import (
+ "bytes"
+ "encoding/binary"
+ "math"
+ "sort"
+ "sync"
+
+ "github.com/RoaringBitmap/roaring"
+ "github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/document"
+ "github.com/blevesearch/bleve/index"
+ "github.com/couchbase/vellum"
+ "github.com/golang/snappy"
+)
+
+var NewSegmentBufferNumResultsBump int = 100
+var NewSegmentBufferNumResultsFactor float64 = 1.0
+var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
+
+// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
+// SegmentBase from analysis results
+func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
+ chunkFactor uint32) (*SegmentBase, uint64, error) {
+ s := interimPool.Get().(*interim)
+
+ var br bytes.Buffer
+ if s.lastNumDocs > 0 {
+ // use previous results to initialize the buf with an estimate
+ // size, but note that the interim instance comes from a
+ // global interimPool, so multiple scorch instances indexing
+ // different docs can lead to low quality estimates
+ estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
+ NewSegmentBufferNumResultsFactor)
+ estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
+ NewSegmentBufferAvgBytesPerDocFactor)
+ br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
+ }
+
+ s.results = results
+ s.chunkFactor = chunkFactor
+ s.w = NewCountHashWriter(&br)
+
+ storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
+ err := s.convert()
+ if err != nil {
+ return nil, uint64(0), err
+ }
+
+ sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
+ s.FieldsMap, s.FieldsInv, uint64(len(results)),
+ storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
+
+ if err == nil && s.reset() == nil {
+ s.lastNumDocs = len(results)
+ s.lastOutSize = len(br.Bytes())
+ interimPool.Put(s)
+ }
+
+ return sb, uint64(len(br.Bytes())), err
+}
+
+var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
+
+// interim holds temporary working data used while converting from
+// analysis results to a zap-encoded segment
+type interim struct {
+ results []*index.AnalysisResult
+
+ chunkFactor uint32
+
+ w *CountHashWriter
+
+ // FieldsMap adds 1 to field id to avoid zero value issues
+ // name -> field id + 1
+ FieldsMap map[string]uint16
+
+ // FieldsInv is the inverse of FieldsMap
+ // field id -> name
+ FieldsInv []string
+
+ // Term dictionaries for each field
+ // field id -> term -> postings list id + 1
+ Dicts []map[string]uint64
+
+ // Terms for each field, where terms are sorted ascending
+ // field id -> []term
+ DictKeys [][]string
+
+ // Fields whose IncludeDocValues is true
+ // field id -> bool
+ IncludeDocValues []bool
+
+ // postings id -> bitmap of docNums
+ Postings []*roaring.Bitmap
+
+ // postings id -> freq/norm's, one for each docNum in postings
+ FreqNorms [][]interimFreqNorm
+ freqNormsBacking []interimFreqNorm
+
+ // postings id -> locs, one for each freq
+ Locs [][]interimLoc
+ locsBacking []interimLoc
+
+ numTermsPerPostingsList []int // key is postings list id
+ numLocsPerPostingsList []int // key is postings list id
+
+ builder *vellum.Builder
+ builderBuf bytes.Buffer
+
+ metaBuf bytes.Buffer
+
+ tmp0 []byte
+ tmp1 []byte
+
+ lastNumDocs int
+ lastOutSize int
+}
+
+func (s *interim) reset() (err error) {
+ s.results = nil
+ s.chunkFactor = 0
+ s.w = nil
+ s.FieldsMap = nil
+ s.FieldsInv = nil
+ for i := range s.Dicts {
+ s.Dicts[i] = nil
+ }
+ s.Dicts = s.Dicts[:0]
+ for i := range s.DictKeys {
+ s.DictKeys[i] = s.DictKeys[i][:0]
+ }
+ s.DictKeys = s.DictKeys[:0]
+ for i := range s.IncludeDocValues {
+ s.IncludeDocValues[i] = false
+ }
+ s.IncludeDocValues = s.IncludeDocValues[:0]
+ for _, idn := range s.Postings {
+ idn.Clear()
+ }
+ s.Postings = s.Postings[:0]
+ s.FreqNorms = s.FreqNorms[:0]
+ for i := range s.freqNormsBacking {
+ s.freqNormsBacking[i] = interimFreqNorm{}
+ }
+ s.freqNormsBacking = s.freqNormsBacking[:0]
+ s.Locs = s.Locs[:0]
+ for i := range s.locsBacking {
+ s.locsBacking[i] = interimLoc{}
+ }
+ s.locsBacking = s.locsBacking[:0]
+ s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
+ s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
+ s.builderBuf.Reset()
+ if s.builder != nil {
+ err = s.builder.Reset(&s.builderBuf)
+ }
+ s.metaBuf.Reset()
+ s.tmp0 = s.tmp0[:0]
+ s.tmp1 = s.tmp1[:0]
+ s.lastNumDocs = 0
+ s.lastOutSize = 0
+
+ return err
+}
+
+func (s *interim) grabBuf(size int) []byte {
+ buf := s.tmp0
+ if cap(buf) < size {
+ buf = make([]byte, size)
+ s.tmp0 = buf
+ }
+ return buf[0:size]
+}
+
+type interimStoredField struct {
+ vals [][]byte
+ typs []byte
+ arrayposs [][]uint64 // array positions
+}
+
+type interimFreqNorm struct {
+ freq uint64
+ norm float32
+ numLocs int
+}
+
+type interimLoc struct {
+ fieldID uint16
+ pos uint64
+ start uint64
+ end uint64
+ arrayposs []uint64
+}
+
+func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
+ s.FieldsMap = map[string]uint16{}
+
+ s.getOrDefineField("_id") // _id field is fieldID 0
+
+ for _, result := range s.results {
+ for _, field := range result.Document.CompositeFields {
+ s.getOrDefineField(field.Name())
+ }
+ for _, field := range result.Document.Fields {
+ s.getOrDefineField(field.Name())
+ }
+ }
+
+ sort.Strings(s.FieldsInv[1:]) // keep _id as first field
+
+ for fieldID, fieldName := range s.FieldsInv {
+ s.FieldsMap[fieldName] = uint16(fieldID + 1)
+ }
+
+ if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
+ s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
+ } else {
+ s.IncludeDocValues = make([]bool, len(s.FieldsInv))
+ }
+
+ s.prepareDicts()
+
+ for _, dict := range s.DictKeys {
+ sort.Strings(dict)
+ }
+
+ s.processDocuments()
+
+ storedIndexOffset, err := s.writeStoredFields()
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+
+ var fdvIndexOffset uint64
+ var dictOffsets []uint64
+
+ if len(s.results) > 0 {
+ fdvIndexOffset, dictOffsets, err = s.writeDicts()
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+ } else {
+ dictOffsets = make([]uint64, len(s.FieldsInv))
+ }
+
+ fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
+ if err != nil {
+ return 0, 0, 0, nil, err
+ }
+
+ return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
+}
+
+func (s *interim) getOrDefineField(fieldName string) int {
+ fieldIDPlus1, exists := s.FieldsMap[fieldName]
+ if !exists {
+ fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
+ s.FieldsMap[fieldName] = fieldIDPlus1
+ s.FieldsInv = append(s.FieldsInv, fieldName)
+
+ s.Dicts = append(s.Dicts, make(map[string]uint64))
+
+ n := len(s.DictKeys)
+ if n < cap(s.DictKeys) {
+ s.DictKeys = s.DictKeys[:n+1]
+ s.DictKeys[n] = s.DictKeys[n][:0]
+ } else {
+ s.DictKeys = append(s.DictKeys, []string(nil))
+ }
+ }
+
+ return int(fieldIDPlus1 - 1)
+}
+
+// fill Dicts and DictKeys from analysis results
+func (s *interim) prepareDicts() {
+ var pidNext int
+
+ var totTFs int
+ var totLocs int
+
+ visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
+ dict := s.Dicts[fieldID]
+ dictKeys := s.DictKeys[fieldID]
+
+ for term, tf := range tfs {
+ pidPlus1, exists := dict[term]
+ if !exists {
+ pidNext++
+ pidPlus1 = uint64(pidNext)
+
+ dict[term] = pidPlus1
+ dictKeys = append(dictKeys, term)
+
+ s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
+ s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
+ }
+
+ pid := pidPlus1 - 1
+
+ s.numTermsPerPostingsList[pid] += 1
+ s.numLocsPerPostingsList[pid] += len(tf.Locations)
+
+ totLocs += len(tf.Locations)
+ }
+
+ totTFs += len(tfs)
+
+ s.DictKeys[fieldID] = dictKeys
+ }
+
+ for _, result := range s.results {
+ // walk each composite field
+ for _, field := range result.Document.CompositeFields {
+ fieldID := uint16(s.getOrDefineField(field.Name()))
+ _, tf := field.Analyze()
+ visitField(fieldID, tf)
+ }
+
+ // walk each field
+ for i, field := range result.Document.Fields {
+ fieldID := uint16(s.getOrDefineField(field.Name()))
+ tf := result.Analyzed[i]
+ visitField(fieldID, tf)
+ }
+ }
+
+ numPostingsLists := pidNext
+
+ if cap(s.Postings) >= numPostingsLists {
+ s.Postings = s.Postings[:numPostingsLists]
+ } else {
+ postings := make([]*roaring.Bitmap, numPostingsLists)
+ copy(postings, s.Postings[:cap(s.Postings)])
+ for i := 0; i < numPostingsLists; i++ {
+ if postings[i] == nil {
+ postings[i] = roaring.New()
+ }
+ }
+ s.Postings = postings
+ }
+
+ if cap(s.FreqNorms) >= numPostingsLists {
+ s.FreqNorms = s.FreqNorms[:numPostingsLists]
+ } else {
+ s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
+ }
+
+ if cap(s.freqNormsBacking) >= totTFs {
+ s.freqNormsBacking = s.freqNormsBacking[:totTFs]
+ } else {
+ s.freqNormsBacking = make([]interimFreqNorm, totTFs)
+ }
+
+ freqNormsBacking := s.freqNormsBacking
+ for pid, numTerms := range s.numTermsPerPostingsList {
+ s.FreqNorms[pid] = freqNormsBacking[0:0]
+ freqNormsBacking = freqNormsBacking[numTerms:]
+ }
+
+ if cap(s.Locs) >= numPostingsLists {
+ s.Locs = s.Locs[:numPostingsLists]
+ } else {
+ s.Locs = make([][]interimLoc, numPostingsLists)
+ }
+
+ if cap(s.locsBacking) >= totLocs {
+ s.locsBacking = s.locsBacking[:totLocs]
+ } else {
+ s.locsBacking = make([]interimLoc, totLocs)
+ }
+
+ locsBacking := s.locsBacking
+ for pid, numLocs := range s.numLocsPerPostingsList {
+ s.Locs[pid] = locsBacking[0:0]
+ locsBacking = locsBacking[numLocs:]
+ }
+}
+
+func (s *interim) processDocuments() {
+ numFields := len(s.FieldsInv)
+ reuseFieldLens := make([]int, numFields)
+ reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
+
+ for docNum, result := range s.results {
+ for i := 0; i < numFields; i++ { // clear these for reuse
+ reuseFieldLens[i] = 0
+ reuseFieldTFs[i] = nil
+ }
+
+ s.processDocument(uint64(docNum), result,
+ reuseFieldLens, reuseFieldTFs)
+ }
+}
+
+func (s *interim) processDocument(docNum uint64,
+ result *index.AnalysisResult,
+ fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
+ visitField := func(fieldID uint16, fieldName string,
+ ln int, tf analysis.TokenFrequencies) {
+ fieldLens[fieldID] += ln
+
+ existingFreqs := fieldTFs[fieldID]
+ if existingFreqs != nil {
+ existingFreqs.MergeAll(fieldName, tf)
+ } else {
+ fieldTFs[fieldID] = tf
+ }
+ }
+
+ // walk each composite field
+ for _, field := range result.Document.CompositeFields {
+ fieldID := uint16(s.getOrDefineField(field.Name()))
+ ln, tf := field.Analyze()
+ visitField(fieldID, field.Name(), ln, tf)
+ }
+
+ // walk each field
+ for i, field := range result.Document.Fields {
+ fieldID := uint16(s.getOrDefineField(field.Name()))
+ ln := result.Length[i]
+ tf := result.Analyzed[i]
+ visitField(fieldID, field.Name(), ln, tf)
+ }
+
+ // now that it's been rolled up into fieldTFs, walk that
+ for fieldID, tfs := range fieldTFs {
+ dict := s.Dicts[fieldID]
+ norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
+
+ for term, tf := range tfs {
+ pid := dict[term] - 1
+ bs := s.Postings[pid]
+ bs.Add(uint32(docNum))
+
+ s.FreqNorms[pid] = append(s.FreqNorms[pid],
+ interimFreqNorm{
+ freq: uint64(tf.Frequency()),
+ norm: norm,
+ numLocs: len(tf.Locations),
+ })
+
+ if len(tf.Locations) > 0 {
+ locs := s.Locs[pid]
+
+ for _, loc := range tf.Locations {
+ var locf = uint16(fieldID)
+ if loc.Field != "" {
+ locf = uint16(s.getOrDefineField(loc.Field))
+ }
+ var arrayposs []uint64
+ if len(loc.ArrayPositions) > 0 {
+ arrayposs = loc.ArrayPositions
+ }
+ locs = append(locs, interimLoc{
+ fieldID: locf,
+ pos: uint64(loc.Position),
+ start: uint64(loc.Start),
+ end: uint64(loc.End),
+ arrayposs: arrayposs,
+ })
+ }
+
+ s.Locs[pid] = locs
+ }
+ }
+ }
+}
+
+func (s *interim) writeStoredFields() (
+ storedIndexOffset uint64, err error) {
+ varBuf := make([]byte, binary.MaxVarintLen64)
+ metaEncode := func(val uint64) (int, error) {
+ wb := binary.PutUvarint(varBuf, val)
+ return s.metaBuf.Write(varBuf[:wb])
+ }
+
+ data, compressed := s.tmp0[:0], s.tmp1[:0]
+ defer func() { s.tmp0, s.tmp1 = data, compressed }()
+
+ // keyed by docNum
+ docStoredOffsets := make([]uint64, len(s.results))
+
+ // keyed by fieldID, for the current doc in the loop
+ docStoredFields := map[uint16]interimStoredField{}
+
+ for docNum, result := range s.results {
+ for fieldID := range docStoredFields { // reset for next doc
+ delete(docStoredFields, fieldID)
+ }
+
+ for _, field := range result.Document.Fields {
+ fieldID := uint16(s.getOrDefineField(field.Name()))
+
+ opts := field.Options()
+
+ if opts.IsStored() {
+ isf := docStoredFields[fieldID]
+ isf.vals = append(isf.vals, field.Value())
+ isf.typs = append(isf.typs, encodeFieldType(field))
+ isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
+ docStoredFields[fieldID] = isf
+ }
+
+ if opts.IncludeDocValues() {
+ s.IncludeDocValues[fieldID] = true
+ }
+ }
+
+ var curr int
+
+ s.metaBuf.Reset()
+ data = data[:0]
+
+ // _id field special case optimizes ExternalID() lookups
+ idFieldVal := docStoredFields[uint16(0)].vals[0]
+ _, err = metaEncode(uint64(len(idFieldVal)))
+ if err != nil {
+ return 0, err
+ }
+
+ // handle non-"_id" fields
+ for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
+ isf, exists := docStoredFields[uint16(fieldID)]
+ if exists {
+ curr, data, err = persistStoredFieldValues(
+ fieldID, isf.vals, isf.typs, isf.arrayposs,
+ curr, metaEncode, data)
+ if err != nil {
+ return 0, err
+ }
+ }
+ }
+
+ metaBytes := s.metaBuf.Bytes()
+
+ compressed = snappy.Encode(compressed[:cap(compressed)], data)
+
+ docStoredOffsets[docNum] = uint64(s.w.Count())
+
+ _, err := writeUvarints(s.w,
+ uint64(len(metaBytes)),
+ uint64(len(idFieldVal)+len(compressed)))
+ if err != nil {
+ return 0, err
+ }
+
+ _, err = s.w.Write(metaBytes)
+ if err != nil {
+ return 0, err
+ }
+
+ _, err = s.w.Write(idFieldVal)
+ if err != nil {
+ return 0, err
+ }
+
+ _, err = s.w.Write(compressed)
+ if err != nil {
+ return 0, err
+ }
+ }
+
+ storedIndexOffset = uint64(s.w.Count())
+
+ for _, docStoredOffset := range docStoredOffsets {
+ err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
+ if err != nil {
+ return 0, err
+ }
+ }
+
+ return storedIndexOffset, nil
+}
+
+func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
+ dictOffsets = make([]uint64, len(s.FieldsInv))
+
+ fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
+ fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
+
+ buf := s.grabBuf(binary.MaxVarintLen64)
+
+ tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+ locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
+ fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
+
+ var docTermMap [][]byte
+
+ if s.builder == nil {
+ s.builder, err = vellum.New(&s.builderBuf, nil)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+
+ for fieldID, terms := range s.DictKeys {
+ if cap(docTermMap) < len(s.results) {
+ docTermMap = make([][]byte, len(s.results))
+ } else {
+ docTermMap = docTermMap[0:len(s.results)]
+ for docNum := range docTermMap { // reset the docTermMap
+ docTermMap[docNum] = docTermMap[docNum][:0]
+ }
+ }
+
+ dict := s.Dicts[fieldID]
+
+ for _, term := range terms { // terms are already sorted
+ pid := dict[term] - 1
+
+ postingsBS := s.Postings[pid]
+
+ freqNorms := s.FreqNorms[pid]
+ freqNormOffset := 0
+
+ locs := s.Locs[pid]
+ locOffset := 0
+
+ postingsItr := postingsBS.Iterator()
+ for postingsItr.HasNext() {
+ docNum := uint64(postingsItr.Next())
+
+ freqNorm := freqNorms[freqNormOffset]
+
+ err = tfEncoder.Add(docNum,
+ encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
+ uint64(math.Float32bits(freqNorm.norm)))
+ if err != nil {
+ return 0, nil, err
+ }
+
+ if freqNorm.numLocs > 0 {
+ numBytesLocs := 0
+ for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
+ numBytesLocs += totalUvarintBytes(
+ uint64(loc.fieldID), loc.pos, loc.start, loc.end,
+ uint64(len(loc.arrayposs)), loc.arrayposs)
+ }
+
+ err = locEncoder.Add(docNum, uint64(numBytesLocs))
+ if err != nil {
+ return 0, nil, err
+ }
+
+ for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
+ err = locEncoder.Add(docNum,
+ uint64(loc.fieldID), loc.pos, loc.start, loc.end,
+ uint64(len(loc.arrayposs)))
+ if err != nil {
+ return 0, nil, err
+ }
+
+ err = locEncoder.Add(docNum, loc.arrayposs...)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+
+ locOffset += freqNorm.numLocs
+ }
+
+ freqNormOffset++
+
+ docTermMap[docNum] = append(
+ append(docTermMap[docNum], term...),
+ termSeparator)
+ }
+
+ tfEncoder.Close()
+ locEncoder.Close()
+
+ postingsOffset, err :=
+ writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ if postingsOffset > uint64(0) {
+ err = s.builder.Insert([]byte(term), postingsOffset)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+
+ tfEncoder.Reset()
+ locEncoder.Reset()
+ }
+
+ err = s.builder.Close()
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // record where this dictionary starts
+ dictOffsets[fieldID] = uint64(s.w.Count())
+
+ vellumData := s.builderBuf.Bytes()
+
+ // write out the length of the vellum data
+ n := binary.PutUvarint(buf, uint64(len(vellumData)))
+ _, err = s.w.Write(buf[:n])
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // write this vellum to disk
+ _, err = s.w.Write(vellumData)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // reset vellum for reuse
+ s.builderBuf.Reset()
+
+ err = s.builder.Reset(&s.builderBuf)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // write the field doc values
+ if s.IncludeDocValues[fieldID] {
+ for docNum, docTerms := range docTermMap {
+ if len(docTerms) > 0 {
+ err = fdvEncoder.Add(uint64(docNum), docTerms)
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+ }
+ err = fdvEncoder.Close()
+ if err != nil {
+ return 0, nil, err
+ }
+
+ fdvOffsetsStart[fieldID] = uint64(s.w.Count())
+
+ _, err = fdvEncoder.Write()
+ if err != nil {
+ return 0, nil, err
+ }
+
+ fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
+
+ fdvEncoder.Reset()
+ } else {
+ fdvOffsetsStart[fieldID] = fieldNotUninverted
+ fdvOffsetsEnd[fieldID] = fieldNotUninverted
+ }
+ }
+
+ fdvIndexOffset = uint64(s.w.Count())
+
+ for i := 0; i < len(fdvOffsetsStart); i++ {
+ n := binary.PutUvarint(buf, fdvOffsetsStart[i])
+ _, err := s.w.Write(buf[:n])
+ if err != nil {
+ return 0, nil, err
+ }
+ n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
+ _, err = s.w.Write(buf[:n])
+ if err != nil {
+ return 0, nil, err
+ }
+ }
+
+ return fdvIndexOffset, dictOffsets, nil
+}
+
+func encodeFieldType(f document.Field) byte {
+ fieldType := byte('x')
+ switch f.(type) {
+ case *document.TextField:
+ fieldType = 't'
+ case *document.NumericField:
+ fieldType = 'n'
+ case *document.DateTimeField:
+ fieldType = 'd'
+ case *document.BooleanField:
+ fieldType = 'b'
+ case *document.GeoPointField:
+ fieldType = 'g'
+ case *document.CompositeField:
+ fieldType = 'c'
+ }
+ return fieldType
+}
+
+// returns the total # of bytes needed to encode the given uint64's
+// into binary.PutUVarint() encoding
+func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
+ n = numUvarintBytes(a)
+ n += numUvarintBytes(b)
+ n += numUvarintBytes(c)
+ n += numUvarintBytes(d)
+ n += numUvarintBytes(e)
+ for _, v := range more {
+ n += numUvarintBytes(v)
+ }
+ return n
+}
+
+// returns # of bytes needed to encode x in binary.PutUvarint() encoding
+func numUvarintBytes(x uint64) (n int) {
+ for x >= 0x80 {
+ x >>= 7
+ n++
+ }
+ return n + 1
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
index d504885d05..26378c27e0 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
@@ -18,71 +18,245 @@ import (
"bytes"
"encoding/binary"
"fmt"
+ "io"
"math"
+ "reflect"
"github.com/RoaringBitmap/roaring"
- "github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/size"
)
-// PostingsList is an in-memory represenation of a postings list
+var reflectStaticSizePostingsList int
+var reflectStaticSizePostingsIterator int
+var reflectStaticSizePosting int
+var reflectStaticSizeLocation int
+
+func init() {
+ var pl PostingsList
+ reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
+ var pi PostingsIterator
+ reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
+ var p Posting
+ reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
+ var l Location
+ reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
+}
+
+// FST or vellum value (uint64) encoding is determined by the top two
+// highest-order or most significant bits...
+//
+// encoding : MSB
+// name : 63 62 61...to...bit #0 (LSB)
+// ----------+---+---+---------------------------------------------------
+// general : 0 | 0 | 62-bits of postingsOffset.
+// ~ : 0 | 1 | reserved for future.
+// 1-hit : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
+// ~ : 1 | 1 | reserved for future.
+//
+// Encoding "general" is able to handle all cases, where the
+// postingsOffset points to more information about the postings for
+// the term.
+//
+// Encoding "1-hit" is used to optimize a commonly seen case when a
+// term has only a single hit. For example, a term in the _id field
+// will have only 1 hit. The "1-hit" encoding is used for a term
+// in a field when...
+//
+// - term vector info is disabled for that field;
+// - and, the term appears in only a single doc for that field;
+// - and, the term's freq is exactly 1 in that single doc for that field;
+// - and, the docNum must fit into 31-bits;
+//
+// Otherwise, the "general" encoding is used instead.
+//
+// In the "1-hit" encoding, the field in that single doc may have
+// other terms, which is supported in the "1-hit" encoding by the
+// positive float31 norm.
+
+const FSTValEncodingMask = uint64(0xc000000000000000)
+const FSTValEncodingGeneral = uint64(0x0000000000000000)
+const FSTValEncoding1Hit = uint64(0x8000000000000000)
+
+func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
+ return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
+}
+
+func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
+ return (mask31Bits & v), (mask31Bits & (v >> 31))
+}
+
+const mask31Bits = uint64(0x000000007fffffff)
+
+func under32Bits(x uint64) bool {
+ return x <= mask31Bits
+}
+
+const DocNum1HitFinished = math.MaxUint64
+
+var NormBits1Hit = uint64(math.Float32bits(float32(1)))
+
+// PostingsList is an in-memory representation of a postings list
type PostingsList struct {
sb *SegmentBase
postingsOffset uint64
freqOffset uint64
locOffset uint64
- locBitmap *roaring.Bitmap
postings *roaring.Bitmap
except *roaring.Bitmap
+
+ // when normBits1Hit != 0, then this postings list came from a
+ // 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
+ docNum1Hit uint64
+ normBits1Hit uint64
+}
+
+// represents an immutable, empty postings list
+var emptyPostingsList = &PostingsList{}
+
+func (p *PostingsList) Size() int {
+ sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
+
+ if p.except != nil {
+ sizeInBytes += int(p.except.GetSizeInBytes())
+ }
+
+ return sizeInBytes
+}
+
+func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
+ if p.normBits1Hit != 0 {
+ receiver.Add(uint32(p.docNum1Hit))
+ return
+ }
+
+ if p.postings != nil {
+ receiver.Or(p.postings)
+ }
}
// Iterator returns an iterator for this postings list
-func (p *PostingsList) Iterator() segment.PostingsIterator {
- return p.iterator(nil)
+func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
+ prealloc segment.PostingsIterator) segment.PostingsIterator {
+ if p.normBits1Hit == 0 && p.postings == nil {
+ return emptyPostingsIterator
+ }
+
+ var preallocPI *PostingsIterator
+ pi, ok := prealloc.(*PostingsIterator)
+ if ok && pi != nil {
+ preallocPI = pi
+ }
+ if preallocPI == emptyPostingsIterator {
+ preallocPI = nil
+ }
+
+ return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
}
-func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
+func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
+ rv *PostingsIterator) *PostingsIterator {
if rv == nil {
rv = &PostingsIterator{}
} else {
+ freqNormReader := rv.freqNormReader
+ if freqNormReader != nil {
+ freqNormReader.Reset([]byte(nil))
+ }
+
+ locReader := rv.locReader
+ if locReader != nil {
+ locReader.Reset([]byte(nil))
+ }
+
+ freqChunkOffsets := rv.freqChunkOffsets[:0]
+ locChunkOffsets := rv.locChunkOffsets[:0]
+
+ nextLocs := rv.nextLocs[:0]
+ nextSegmentLocs := rv.nextSegmentLocs[:0]
+
+ buf := rv.buf
+
*rv = PostingsIterator{} // clear the struct
+
+ rv.freqNormReader = freqNormReader
+ rv.locReader = locReader
+
+ rv.freqChunkOffsets = freqChunkOffsets
+ rv.locChunkOffsets = locChunkOffsets
+
+ rv.nextLocs = nextLocs
+ rv.nextSegmentLocs = nextSegmentLocs
+
+ rv.buf = buf
}
+
rv.postings = p
+ rv.includeFreqNorm = includeFreq || includeNorm
+ rv.includeLocs = includeLocs
- if p.postings != nil {
- // prepare the freq chunk details
- var n uint64
- var read int
+ if p.normBits1Hit != 0 {
+ // "1-hit" encoding
+ rv.docNum1Hit = p.docNum1Hit
+ rv.normBits1Hit = p.normBits1Hit
+
+ if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
+ rv.docNum1Hit = DocNum1HitFinished
+ }
+
+ return rv
+ }
+
+ // "general" encoding, check if empty
+ if p.postings == nil {
+ return rv
+ }
+
+ var n uint64
+ var read int
+
+ // prepare the freq chunk details
+ if rv.includeFreqNorm {
var numFreqChunks uint64
numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
- rv.freqChunkLens = make([]uint64, int(numFreqChunks))
+ if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
+ rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
+ } else {
+ rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
+ }
for i := 0; i < int(numFreqChunks); i++ {
- rv.freqChunkLens[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
+ rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.freqChunkStart = p.freqOffset + n
+ }
- // prepare the loc chunk details
+ // prepare the loc chunk details
+ if rv.includeLocs {
n = 0
var numLocChunks uint64
numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
- rv.locChunkLens = make([]uint64, int(numLocChunks))
+ if cap(rv.locChunkOffsets) >= int(numLocChunks) {
+ rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
+ } else {
+ rv.locChunkOffsets = make([]uint64, int(numLocChunks))
+ }
for i := 0; i < int(numLocChunks); i++ {
- rv.locChunkLens[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
+ rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
n += uint64(read)
}
rv.locChunkStart = p.locOffset + n
- rv.locBitmap = p.locBitmap
+ }
- rv.all = p.postings.Iterator()
- if p.except != nil {
- allExcept := roaring.AndNot(p.postings, p.except)
- rv.actual = allExcept.Iterator()
- } else {
- rv.actual = p.postings.Iterator()
- }
+ rv.all = p.postings.Iterator()
+ if p.except != nil {
+ rv.ActualBM = roaring.AndNot(p.postings, p.except)
+ rv.Actual = rv.ActualBM.Iterator()
+ } else {
+ rv.ActualBM = p.postings
+ rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
}
return rv
@@ -90,23 +264,30 @@ func (p *PostingsList) iterator(rv *PostingsIterator) *PostingsIterator {
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
- if p.postings != nil {
- n := p.postings.GetCardinality()
- if p.except != nil {
- e := p.except.GetCardinality()
- if e > n {
- e = n
- }
- return n - e
- }
- return n
+ var n uint64
+ if p.normBits1Hit != 0 {
+ n = 1
+ } else if p.postings != nil {
+ n = p.postings.GetCardinality()
+ }
+ var e uint64
+ if p.except != nil {
+ e = p.except.GetCardinality()
}
- return 0
+ if n <= e {
+ return 0
+ }
+ return n - e
}
func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
rv.postingsOffset = postingsOffset
+ // handle "1-hit" encoding special case
+ if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
+ return rv.init1Hit(postingsOffset)
+ }
+
// read the location of the freq/norm details
var n uint64
var read int
@@ -117,29 +298,16 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
n += uint64(read)
- var locBitmapOffset uint64
- locBitmapOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
- n += uint64(read)
-
- var locBitmapLen uint64
- locBitmapLen, read = binary.Uvarint(d.sb.mem[locBitmapOffset : locBitmapOffset+binary.MaxVarintLen64])
-
- locRoaringBytes := d.sb.mem[locBitmapOffset+uint64(read) : locBitmapOffset+uint64(read)+locBitmapLen]
-
- rv.locBitmap = roaring.NewBitmap()
- _, err := rv.locBitmap.FromBuffer(locRoaringBytes)
- if err != nil {
- return fmt.Errorf("error loading roaring bitmap of locations with hits: %v", err)
- }
-
var postingsLen uint64
postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
n += uint64(read)
roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
- rv.postings = roaring.NewBitmap()
- _, err = rv.postings.FromBuffer(roaringBytes)
+ if rv.postings == nil {
+ rv.postings = roaring.NewBitmap()
+ }
+ _, err := rv.postings.FromBuffer(roaringBytes)
if err != nil {
return fmt.Errorf("error loading roaring bitmap: %v", err)
}
@@ -147,65 +315,137 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
return nil
}
+func (rv *PostingsList) init1Hit(fstVal uint64) error {
+ docNum, normBits := FSTValDecode1Hit(fstVal)
+
+ rv.docNum1Hit = docNum
+ rv.normBits1Hit = normBits
+
+ return nil
+}
+
// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
- postings *PostingsList
- all roaring.IntIterable
- offset int
- locoffset int
- actual roaring.IntIterable
+ postings *PostingsList
+ all roaring.IntIterable
+ Actual roaring.IntIterable
+ ActualBM *roaring.Bitmap
currChunk uint32
currChunkFreqNorm []byte
currChunkLoc []byte
- freqNormDecoder *govarint.Base128Decoder
- locDecoder *govarint.Base128Decoder
- freqChunkLens []uint64
- freqChunkStart uint64
+ freqNormReader *bytes.Reader
+ locReader *bytes.Reader
+
+ freqChunkOffsets []uint64
+ freqChunkStart uint64
+
+ locChunkOffsets []uint64
+ locChunkStart uint64
- locChunkLens []uint64
- locChunkStart uint64
+ next Posting // reused across Next() calls
+ nextLocs []Location // reused across Next() calls
+ nextSegmentLocs []segment.Location // reused across Next() calls
- locBitmap *roaring.Bitmap
+ docNum1Hit uint64
+ normBits1Hit uint64
- next Posting
+ buf []byte
+
+ includeFreqNorm bool
+ includeLocs bool
}
-func (i *PostingsIterator) loadChunk(chunk int) error {
- if chunk >= len(i.freqChunkLens) || chunk >= len(i.locChunkLens) {
- return fmt.Errorf("tried to load chunk that doesn't exist %d/(%d %d)", chunk, len(i.freqChunkLens), len(i.locChunkLens))
+var emptyPostingsIterator = &PostingsIterator{}
+
+func (i *PostingsIterator) Size() int {
+ sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
+ len(i.currChunkFreqNorm) +
+ len(i.currChunkLoc) +
+ len(i.freqChunkOffsets)*size.SizeOfUint64 +
+ len(i.locChunkOffsets)*size.SizeOfUint64 +
+ i.next.Size()
+
+ for _, entry := range i.nextLocs {
+ sizeInBytes += entry.Size()
}
- // load correct chunk bytes
- start := i.freqChunkStart
- for j := 0; j < chunk; j++ {
- start += i.freqChunkLens[j]
+
+ return sizeInBytes
+}
+
+func (i *PostingsIterator) loadChunk(chunk int) error {
+ if i.includeFreqNorm {
+ if chunk >= len(i.freqChunkOffsets) {
+ return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
+ chunk, len(i.freqChunkOffsets))
+ }
+
+ end, start := i.freqChunkStart, i.freqChunkStart
+ s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
+ start += s
+ end += e
+ i.currChunkFreqNorm = i.postings.sb.mem[start:end]
+ if i.freqNormReader == nil {
+ i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
+ } else {
+ i.freqNormReader.Reset(i.currChunkFreqNorm)
+ }
}
- end := start + i.freqChunkLens[chunk]
- i.currChunkFreqNorm = i.postings.sb.mem[start:end]
- i.freqNormDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkFreqNorm))
- start = i.locChunkStart
- for j := 0; j < chunk; j++ {
- start += i.locChunkLens[j]
+ if i.includeLocs {
+ if chunk >= len(i.locChunkOffsets) {
+ return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
+ chunk, len(i.locChunkOffsets))
+ }
+
+ end, start := i.locChunkStart, i.locChunkStart
+ s, e := readChunkBoundary(chunk, i.locChunkOffsets)
+ start += s
+ end += e
+ i.currChunkLoc = i.postings.sb.mem[start:end]
+ if i.locReader == nil {
+ i.locReader = bytes.NewReader(i.currChunkLoc)
+ } else {
+ i.locReader.Reset(i.currChunkLoc)
+ }
}
- end = start + i.locChunkLens[chunk]
- i.currChunkLoc = i.postings.sb.mem[start:end]
- i.locDecoder = govarint.NewU64Base128Decoder(bytes.NewReader(i.currChunkLoc))
+
i.currChunk = uint32(chunk)
return nil
}
-func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
- freq, err := i.freqNormDecoder.GetU64()
+func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
+ if i.normBits1Hit != 0 {
+ return 1, i.normBits1Hit, false, nil
+ }
+
+ freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
if err != nil {
- return 0, 0, fmt.Errorf("error reading frequency: %v", err)
+ return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
}
- normBits, err := i.freqNormDecoder.GetU64()
+ freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
+
+ normBits, err := binary.ReadUvarint(i.freqNormReader)
if err != nil {
- return 0, 0, fmt.Errorf("error reading norm: %v", err)
+ return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
}
- return freq, normBits, err
+
+ return freq, normBits, hasLocs, err
+}
+
+func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
+ rv := freq << 1
+ if hasLocs {
+ rv = rv | 0x01 // 0'th LSB encodes whether there are locations
+ }
+ return rv
+}
+
+func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
+ freq := freqHasLocs >> 1
+ hasLocs := freqHasLocs&0x01 != 0
+ return freq, hasLocs
}
// readLocation processes all the integers on the stream representing a single
@@ -214,27 +454,27 @@ func (i *PostingsIterator) readFreqNorm() (uint64, uint64, error) {
// the contents.
func (i *PostingsIterator) readLocation(l *Location) error {
// read off field
- fieldID, err := i.locDecoder.GetU64()
+ fieldID, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location field: %v", err)
}
// read off pos
- pos, err := i.locDecoder.GetU64()
+ pos, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location pos: %v", err)
}
// read off start
- start, err := i.locDecoder.GetU64()
+ start, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location start: %v", err)
}
// read off end
- end, err := i.locDecoder.GetU64()
+ end, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location end: %v", err)
}
// read off num array pos
- numArrayPos, err := i.locDecoder.GetU64()
+ numArrayPos, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err)
}
@@ -245,14 +485,16 @@ func (i *PostingsIterator) readLocation(l *Location) error {
l.pos = pos
l.start = start
l.end = end
- if numArrayPos > 0 {
+ if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
+ } else {
+ l.ap = l.ap[:int(numArrayPos)]
}
}
// read off array positions
for k := 0; k < int(numArrayPos); k++ {
- ap, err := i.locDecoder.GetU64()
+ ap, err := binary.ReadUvarint(i.locReader)
if err != nil {
return fmt.Errorf("error reading array position: %v", err)
}
@@ -266,97 +508,332 @@ func (i *PostingsIterator) readLocation(l *Location) error {
// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() (segment.Posting, error) {
- if i.actual == nil || !i.actual.HasNext() {
- return nil, nil
+ return i.nextAtOrAfter(0)
+}
+
+// Advance returns the posting at the specified docNum or it is not present
+// the next posting, or if the end is reached, nil
+func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
+ return i.nextAtOrAfter(docNum)
+}
+
+// Next returns the next posting on the postings list, or nil at the end
+func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
+ docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
+ if err != nil || !exists {
+ return nil, err
}
- n := i.actual.Next()
- nChunk := n / i.postings.sb.chunkFactor
- allN := i.all.Next()
- allNChunk := allN / i.postings.sb.chunkFactor
- // n is the next actual hit (excluding some postings)
- // allN is the next hit in the full postings
- // if they don't match, adjust offsets to factor in item we're skipping over
- // incr the all iterator, and check again
- for allN != n {
+ i.next = Posting{} // clear the struct
+ rv := &i.next
+ rv.docNum = docNum
+
+ if !i.includeFreqNorm {
+ return rv, nil
+ }
+
+ var normBits uint64
+ var hasLocs bool
+
+ rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
+ if err != nil {
+ return nil, err
+ }
+
+ rv.norm = math.Float32frombits(uint32(normBits))
- // in different chunks, reset offsets
- if allNChunk != nChunk {
- i.locoffset = 0
- i.offset = 0
+ if i.includeLocs && hasLocs {
+ // prepare locations into reused slices, where we assume
+ // rv.freq >= "number of locs", since in a composite field,
+ // some component fields might have their IncludeTermVector
+ // flags disabled while other component fields are enabled
+ if cap(i.nextLocs) >= int(rv.freq) {
+ i.nextLocs = i.nextLocs[0:rv.freq]
} else {
+ i.nextLocs = make([]Location, rv.freq, rv.freq*2)
+ }
+ if cap(i.nextSegmentLocs) < int(rv.freq) {
+ i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
+ }
+ rv.locs = i.nextSegmentLocs[:0]
- if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
- err := i.loadChunk(int(nChunk))
- if err != nil {
- return nil, fmt.Errorf("error loading chunk: %v", err)
- }
- }
+ numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ if err != nil {
+ return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
+ }
- // read off freq/offsets even though we don't care about them
- freq, _, err := i.readFreqNorm()
+ j := 0
+ startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
+ for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
+ err := i.readLocation(&i.nextLocs[j])
if err != nil {
return nil, err
}
- if i.locBitmap.Contains(allN) {
- for j := 0; j < int(freq); j++ {
- err := i.readLocation(nil)
- if err != nil {
- return nil, err
- }
- }
- }
+ rv.locs = append(rv.locs, &i.nextLocs[j])
+ j++
+ }
+ }
+
+ return rv, nil
+}
+
+var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
+
+// nextBytes returns the docNum and the encoded freq & loc bytes for
+// the next posting
+func (i *PostingsIterator) nextBytes() (
+ docNumOut uint64, freq uint64, normBits uint64,
+ bytesFreqNorm []byte, bytesLoc []byte, err error) {
+ docNum, exists, err := i.nextDocNumAtOrAfter(0)
+ if err != nil || !exists {
+ return 0, 0, 0, nil, nil, err
+ }
+
+ if i.normBits1Hit != 0 {
+ if i.buf == nil {
+ i.buf = make([]byte, binary.MaxVarintLen64*2)
+ }
+ n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
+ n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
+ return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
+ }
+
+ startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
+
+ var hasLocs bool
+
+ freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
+ if err != nil {
+ return 0, 0, 0, nil, nil, err
+ }
+
+ endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
+ bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
+
+ if hasLocs {
+ startLoc := len(i.currChunkLoc) - i.locReader.Len()
+
+ numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ if err != nil {
+ return 0, 0, 0, nil, nil,
+ fmt.Errorf("error reading location nextBytes numLocs: %v", err)
+ }
+
+ // skip over all the location bytes
+ _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
+ if err != nil {
+ return 0, 0, 0, nil, nil, err
+ }
+
+ endLoc := len(i.currChunkLoc) - i.locReader.Len()
+ bytesLoc = i.currChunkLoc[startLoc:endLoc]
+ }
+
+ return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
+}
+
+// nextDocNum returns the next docNum on the postings list, and also
+// sets up the currChunk / loc related fields of the iterator.
+func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
+ if i.normBits1Hit != 0 {
+ if i.docNum1Hit == DocNum1HitFinished {
+ return 0, false, nil
+ }
+ if i.docNum1Hit < atOrAfter {
+ // advanced past our 1-hit
+ i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
+ return 0, false, nil
+ }
+ docNum := i.docNum1Hit
+ i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
+ return docNum, true, nil
+ }
+
+ if i.Actual == nil || !i.Actual.HasNext() {
+ return 0, false, nil
+ }
+
+ if i.postings == nil || i.postings.postings == i.ActualBM {
+ return i.nextDocNumAtOrAfterClean(atOrAfter)
+ }
+
+ n := i.Actual.Next()
+ for uint64(n) < atOrAfter && i.Actual.HasNext() {
+ n = i.Actual.Next()
+ }
+ if uint64(n) < atOrAfter {
+ // couldn't find anything
+ return 0, false, nil
+ }
+ allN := i.all.Next()
+
+ nChunk := n / i.postings.sb.chunkFactor
- // in same chunk, need to account for offsets
- i.offset++
+ // when allN becomes >= to here, then allN is in the same chunk as nChunk.
+ allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
+
+ // n is the next actual hit (excluding some postings), and
+ // allN is the next hit in the full postings, and
+ // if they don't match, move 'all' forwards until they do
+ for allN != n {
+ // we've reached same chunk, so move the freq/norm/loc decoders forward
+ if i.includeFreqNorm && allN >= allNReachesNChunk {
+ err := i.currChunkNext(nChunk)
+ if err != nil {
+ return 0, false, err
+ }
}
allN = i.all.Next()
}
+ if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
+ err := i.loadChunk(int(nChunk))
+ if err != nil {
+ return 0, false, fmt.Errorf("error loading chunk: %v", err)
+ }
+ }
+
+ return uint64(n), true, nil
+}
+
+// optimization when the postings list is "clean" (e.g., no updates &
+// no deletions) where the all bitmap is the same as the actual bitmap
+func (i *PostingsIterator) nextDocNumAtOrAfterClean(
+ atOrAfter uint64) (uint64, bool, error) {
+ n := i.Actual.Next()
+
+ if !i.includeFreqNorm {
+ for uint64(n) < atOrAfter && i.Actual.HasNext() {
+ n = i.Actual.Next()
+ }
+
+ if uint64(n) < atOrAfter {
+ return 0, false, nil // couldn't find anything
+ }
+
+ return uint64(n), true, nil
+ }
+
+ // freq-norm's needed, so maintain freq-norm chunk reader
+ sameChunkNexts := 0 // # of times we called Next() in the same chunk
+
+ nChunk := n / i.postings.sb.chunkFactor
+
+ for uint64(n) < atOrAfter && i.Actual.HasNext() {
+ n = i.Actual.Next()
+
+ nChunkPrev := nChunk
+ nChunk = n / i.postings.sb.chunkFactor
+
+ if nChunk != nChunkPrev {
+ sameChunkNexts = 0
+ } else {
+ sameChunkNexts += 1
+ }
+ }
+
+ if uint64(n) < atOrAfter {
+ // couldn't find anything
+ return 0, false, nil
+ }
+
+ for j := 0; j < sameChunkNexts; j++ {
+ err := i.currChunkNext(nChunk)
+ if err != nil {
+ return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
+ }
+ }
+
if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
err := i.loadChunk(int(nChunk))
if err != nil {
- return nil, fmt.Errorf("error loading chunk: %v", err)
+ return 0, false, fmt.Errorf("error loading chunk: %v", err)
}
}
- i.next = Posting{} // clear the struct.
- rv := &i.next
- rv.iterator = i
- rv.docNum = uint64(n)
+ return uint64(n), true, nil
+}
- var err error
- var normBits uint64
- rv.freq, normBits, err = i.readFreqNorm()
+func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
+ if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
+ err := i.loadChunk(int(nChunk))
+ if err != nil {
+ return fmt.Errorf("error loading chunk: %v", err)
+ }
+ }
+
+ // read off freq/offsets even though we don't care about them
+ _, _, hasLocs, err := i.readFreqNormHasLocs()
if err != nil {
- return nil, err
+ return err
}
- rv.norm = math.Float32frombits(uint32(normBits))
- if i.locBitmap.Contains(n) {
- // read off 'freq' locations
- rv.locs = make([]segment.Location, rv.freq)
- locs := make([]Location, rv.freq)
- for j := 0; j < int(rv.freq); j++ {
- err := i.readLocation(&locs[j])
- if err != nil {
- return nil, err
- }
- rv.locs[j] = &locs[j]
+
+ if i.includeLocs && hasLocs {
+ numLocsBytes, err := binary.ReadUvarint(i.locReader)
+ if err != nil {
+ return fmt.Errorf("error reading location numLocsBytes: %v", err)
+ }
+
+ // skip over all the location bytes
+ _, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
+ if err != nil {
+ return err
}
}
- return rv, nil
+ return nil
+}
+
+// DocNum1Hit returns the docNum and true if this is "1-hit" optimized
+// and the docNum is available.
+func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
+ if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
+ return p.docNum1Hit, true
+ }
+ return 0, false
+}
+
+// PostingsIteratorFromBitmap constructs a PostingsIterator given an
+// "actual" bitmap.
+func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
+ includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
+ return &PostingsIterator{
+ ActualBM: bm,
+ Actual: bm.Iterator(),
+ includeFreqNorm: includeFreqNorm,
+ includeLocs: includeLocs,
+ }, nil
+}
+
+// PostingsIteratorFrom1Hit constructs a PostingsIterator given a
+// 1-hit docNum.
+func PostingsIteratorFrom1Hit(docNum1Hit, normBits1Hit uint64,
+ includeFreqNorm, includeLocs bool) (*PostingsIterator, error) {
+ return &PostingsIterator{
+ docNum1Hit: docNum1Hit,
+ normBits1Hit: normBits1Hit,
+ includeFreqNorm: includeFreqNorm,
+ includeLocs: includeLocs,
+ }, nil
}
// Posting is a single entry in a postings list
type Posting struct {
- iterator *PostingsIterator
- docNum uint64
+ docNum uint64
+ freq uint64
+ norm float32
+ locs []segment.Location
+}
- freq uint64
- norm float32
- locs []segment.Location
+func (p *Posting) Size() int {
+ sizeInBytes := reflectStaticSizePosting
+
+ for _, entry := range p.locs {
+ sizeInBytes += entry.Size()
+ }
+
+ return sizeInBytes
}
// Number returns the document number of this posting in this segment
@@ -364,7 +841,7 @@ func (p *Posting) Number() uint64 {
return p.docNum
}
-// Frequency returns the frequence of occurance of this term in this doc/field
+// Frequency returns the frequencies of occurrence of this term in this doc/field
func (p *Posting) Frequency() uint64 {
return p.freq
}
@@ -374,12 +851,12 @@ func (p *Posting) Norm() float64 {
return float64(p.norm)
}
-// Locations returns the location information for each occurance
+// Locations returns the location information for each occurrence
func (p *Posting) Locations() []segment.Location {
return p.locs
}
-// Location represents the location of a single occurance
+// Location represents the location of a single occurrence
type Location struct {
field string
pos uint64
@@ -388,28 +865,34 @@ type Location struct {
ap []uint64
}
+func (l *Location) Size() int {
+ return reflectStaticSizeLocation +
+ len(l.field) +
+ len(l.ap)*size.SizeOfUint64
+}
+
// Field returns the name of the field (useful in composite fields to know
// which original field the value came from)
func (l *Location) Field() string {
return l.field
}
-// Start returns the start byte offset of this occurance
+// Start returns the start byte offset of this occurrence
func (l *Location) Start() uint64 {
return l.start
}
-// End returns the end byte offset of this occurance
+// End returns the end byte offset of this occurrence
func (l *Location) End() uint64 {
return l.end
}
-// Pos returns the 1-based phrase position of this occurance
+// Pos returns the 1-based phrase position of this occurrence
func (l *Location) Pos() uint64 {
return l.pos
}
-// ArrayPositions returns the array position vector associated with this occurance
+// ArrayPositions returns the array position vector associated with this occurrence
func (l *Location) ArrayPositions() []uint64 {
return l.ap
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
index 40c0af2741..7ba28c2366 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
@@ -20,16 +20,24 @@ import (
"fmt"
"io"
"os"
+ "reflect"
"sync"
"github.com/RoaringBitmap/roaring"
- "github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/size"
"github.com/couchbase/vellum"
mmap "github.com/edsrzf/mmap-go"
"github.com/golang/snappy"
)
+var reflectStaticSizeSegmentBase int
+
+func init() {
+ var sb SegmentBase
+ reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
+}
+
// Open returns a zap impl of a segment
func Open(path string) (segment.Segment, error) {
f, err := os.Open(path)
@@ -47,13 +55,14 @@ func Open(path string) (segment.Segment, error) {
SegmentBase: SegmentBase{
mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16),
- fieldDvIterMap: make(map[uint16]*docValueIterator),
+ fieldDvReaders: make(map[uint16]*docValueReader),
},
f: f,
mm: mm,
path: path,
refs: 1,
}
+ rv.SegmentBase.updateSize()
err = rv.loadConfig()
if err != nil {
@@ -67,7 +76,7 @@ func Open(path string) (segment.Segment, error) {
return nil, err
}
- err = rv.loadDvIterators()
+ err = rv.loadDvReaders()
if err != nil {
_ = rv.Close()
return nil, err
@@ -89,7 +98,39 @@ type SegmentBase struct {
fieldsIndexOffset uint64
docValueOffset uint64
dictLocs []uint64
- fieldDvIterMap map[uint16]*docValueIterator // naive chunk cache per field
+ fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
+ fieldDvNames []string // field names cached in fieldDvReaders
+ size uint64
+}
+
+func (sb *SegmentBase) Size() int {
+ return int(sb.size)
+}
+
+func (sb *SegmentBase) updateSize() {
+ sizeInBytes := reflectStaticSizeSegmentBase +
+ cap(sb.mem)
+
+ // fieldsMap
+ for k, _ := range sb.fieldsMap {
+ sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16
+ }
+
+ // fieldsInv, dictLocs
+ for _, entry := range sb.fieldsInv {
+ sizeInBytes += len(entry) + size.SizeOfString
+ }
+ sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64
+
+ // fieldDvReaders
+ for _, v := range sb.fieldDvReaders {
+ sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr
+ if v != nil {
+ sizeInBytes += v.size()
+ }
+ }
+
+ sb.size = uint64(sizeInBytes)
}
func (sb *SegmentBase) AddRef() {}
@@ -111,56 +152,19 @@ type Segment struct {
refs int64
}
-func (s *Segment) SizeInBytes() uint64 {
+func (s *Segment) Size() int {
// 8 /* size of file pointer */
// 4 /* size of version -> uint32 */
// 4 /* size of crc -> uint32 */
sizeOfUints := 16
- sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints
+ sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints
// mutex, refs -> int64
sizeInBytes += 16
// do not include the mmap'ed part
- return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem))
-}
-
-func (s *SegmentBase) SizeInBytes() uint64 {
- // 4 /* size of memCRC -> uint32 */
- // 4 /* size of chunkFactor -> uint32 */
- // 8 /* size of numDocs -> uint64 */
- // 8 /* size of storedIndexOffset -> uint64 */
- // 8 /* size of fieldsIndexOffset -> uint64 */
- // 8 /* size of docValueOffset -> uint64 */
- sizeInBytes := 40
-
- sizeInBytes += len(s.mem) + int(segment.SizeOfSlice)
-
- // fieldsMap
- for k, _ := range s.fieldsMap {
- sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */
- }
- sizeInBytes += int(segment.SizeOfMap) /* overhead from map */
-
- // fieldsInv, dictLocs
- for _, entry := range s.fieldsInv {
- sizeInBytes += (len(entry) + int(segment.SizeOfString))
- }
- sizeInBytes += len(s.dictLocs) * 8 /* size of uint64 */
- sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */
-
- // fieldDvIterMap
- sizeInBytes += len(s.fieldDvIterMap) *
- int(segment.SizeOfPointer+2 /* size of uint16 */)
- for _, entry := range s.fieldDvIterMap {
- if entry != nil {
- sizeInBytes += int(entry.sizeInBytes())
- }
- }
- sizeInBytes += int(segment.SizeOfMap)
-
- return uint64(sizeInBytes)
+ return sizeInBytes + s.SegmentBase.Size() - cap(s.mem)
}
func (s *Segment) AddRef() {
@@ -185,7 +189,7 @@ func (s *Segment) loadConfig() error {
verOffset := crcOffset - 4
s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4])
- if s.version != version {
+ if s.version != Version {
return fmt.Errorf("unsupported version %d", s.version)
}
@@ -207,7 +211,7 @@ func (s *Segment) loadConfig() error {
}
func (s *SegmentBase) loadFields() error {
- // NOTE for now we assume the fields index immediately preceeds
+ // NOTE for now we assume the fields index immediately precedes
// the footer, and if this changes, need to adjust accordingly (or
// store explicit length), where s.mem was sliced from s.mm in Open().
fieldsIndexEnd := uint64(len(s.mem))
@@ -262,6 +266,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
}
+ rv.fstReader, err = rv.fst.Reader()
+ if err != nil {
+ return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
+ }
}
}
}
@@ -269,50 +277,90 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
return rv, nil
}
+// visitDocumentCtx holds data structures that are reusable across
+// multiple VisitDocument() calls to avoid memory allocations
+type visitDocumentCtx struct {
+ buf []byte
+ reader bytes.Reader
+ arrayPos []uint64
+}
+
+var visitDocumentCtxPool = sync.Pool{
+ New: func() interface{} {
+ reuse := &visitDocumentCtx{}
+ return reuse
+ },
+}
+
// VisitDocument invokes the DocFieldValueVistor for each stored field
// for the specified doc number
func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
+ vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+ defer visitDocumentCtxPool.Put(vdc)
+ return s.visitDocument(vdc, num, visitor)
+}
+
+func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64,
+ visitor segment.DocumentFieldValueVisitor) error {
// first make sure this is a valid number in this segment
if num < s.numDocs {
meta, compressed := s.getDocStoredMetaAndCompressed(num)
- uncompressed, err := snappy.Decode(nil, compressed)
+
+ vdc.reader.Reset(meta)
+
+ // handle _id field special case
+ idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
+ if err != nil {
+ return err
+ }
+ idFieldVal := compressed[:idFieldValLen]
+
+ keepGoing := visitor("_id", byte('t'), idFieldVal, nil)
+ if !keepGoing {
+ visitDocumentCtxPool.Put(vdc)
+ return nil
+ }
+
+ // handle non-"_id" fields
+ compressed = compressed[idFieldValLen:]
+
+ uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed)
if err != nil {
return err
}
- // now decode meta and process
- reader := bytes.NewReader(meta)
- decoder := govarint.NewU64Base128Decoder(reader)
- keepGoing := true
for keepGoing {
- field, err := decoder.GetU64()
+ field, err := binary.ReadUvarint(&vdc.reader)
if err == io.EOF {
break
}
if err != nil {
return err
}
- typ, err := decoder.GetU64()
+ typ, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
- offset, err := decoder.GetU64()
+ offset, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
- l, err := decoder.GetU64()
+ l, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
- numap, err := decoder.GetU64()
+ numap, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
var arrayPos []uint64
if numap > 0 {
- arrayPos = make([]uint64, numap)
+ if cap(vdc.arrayPos) < int(numap) {
+ vdc.arrayPos = make([]uint64, numap)
+ }
+ arrayPos = vdc.arrayPos[:numap]
for i := 0; i < int(numap); i++ {
- ap, err := decoder.GetU64()
+ ap, err := binary.ReadUvarint(&vdc.reader)
if err != nil {
return err
}
@@ -323,10 +371,36 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal
value := uncompressed[offset : offset+l]
keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
}
+
+ vdc.buf = uncompressed
}
return nil
}
+// DocID returns the value of the _id field for the given docNum
+func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
+ if num >= s.numDocs {
+ return nil, nil
+ }
+
+ vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
+
+ meta, compressed := s.getDocStoredMetaAndCompressed(num)
+
+ vdc.reader.Reset(meta)
+
+ // handle _id field special case
+ idFieldValLen, err := binary.ReadUvarint(&vdc.reader)
+ if err != nil {
+ return nil, err
+ }
+ idFieldVal := compressed[:idFieldValLen]
+
+ visitDocumentCtxPool.Put(vdc)
+
+ return idFieldVal, nil
+}
+
// Count returns the number of documents in this segment.
func (s *SegmentBase) Count() uint64 {
return s.numDocs
@@ -343,15 +417,26 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
return nil, err
}
- var postings *PostingsList
+ postingsList := emptyPostingsList
+
+ sMax, err := idDict.fst.GetMaxKey()
+ if err != nil {
+ return nil, err
+ }
+ sMaxStr := string(sMax)
+ filteredIds := make([]string, 0, len(ids))
for _, id := range ids {
- postings, err = idDict.postingsList([]byte(id), nil, postings)
+ if id <= sMaxStr {
+ filteredIds = append(filteredIds, id)
+ }
+ }
+
+ for _, id := range filteredIds {
+ postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
if err != nil {
return nil, err
}
- if postings.postings != nil {
- rv.Or(postings.postings)
- }
+ postingsList.OrInto(rv)
}
}
@@ -441,19 +526,32 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
return s.dictLocs[fieldIDPlus1-1], nil
}
-func (s *SegmentBase) loadDvIterators() error {
+func (s *SegmentBase) loadDvReaders() error {
if s.docValueOffset == fieldNotUninverted {
return nil
}
var read uint64
for fieldID, field := range s.fieldsInv {
- fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+ var fieldLocStart, fieldLocEnd uint64
+ var n int
+ fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
if n <= 0 {
- return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID)
+ return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID)
}
- s.fieldDvIterMap[uint16(fieldID)], _ = s.loadFieldDocValueIterator(field, fieldLoc)
read += uint64(n)
+ fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64])
+ if n <= 0 {
+ return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID)
+ }
+ read += uint64(n)
+
+ fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
+ if fieldDvReader != nil {
+ s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
+ s.fieldDvNames = append(s.fieldDvNames, field)
+ }
}
+
return nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
index c5316a99f0..cddaedd007 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
@@ -15,7 +15,6 @@
package zap
import (
- "bytes"
"encoding/binary"
"io"
@@ -25,28 +24,29 @@ import (
// writes out the length of the roaring bitmap in bytes as varint
// then writes out the roaring bitmap itself
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
- reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
- reuseBuf.Reset()
-
- // write out postings list to memory so we know the len
- postingsListLen, err := r.WriteTo(reuseBuf)
+ reuseBufVarint []byte) (int, error) {
+ buf, err := r.ToBytes()
if err != nil {
return 0, err
}
+
var tw int
- // write out the length of this postings list
- n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
+
+ // write out the length
+ n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
nw, err := w.Write(reuseBufVarint[:n])
tw += nw
if err != nil {
return tw, err
}
- // write out the postings list itself
- nw, err = w.Write(reuseBuf.Bytes())
+
+ // write out the roaring bytes
+ nw, err = w.Write(buf)
tw += nw
if err != nil {
return tw, err
}
+
return tw, nil
}
@@ -118,7 +118,7 @@ func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset
return err
}
// write out 32-bit version
- err = binary.Write(w, binary.BigEndian, version)
+ err = binary.Write(w, binary.BigEndian, Version)
if err != nil {
return err
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
index bb99757687..8babb31fa4 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
@@ -15,10 +15,10 @@
package scorch
import (
- "bytes"
"container/heap"
"encoding/binary"
"fmt"
+ "reflect"
"sort"
"sync"
"sync/atomic"
@@ -27,8 +27,13 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/couchbase/vellum"
+ lev2 "github.com/couchbase/vellum/levenshtein2"
)
+// re usable, threadsafe levenshtein builders
+var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
+
type asynchSegmentResult struct {
dictItr segment.DictionaryIterator
@@ -40,15 +45,36 @@ type asynchSegmentResult struct {
err error
}
+var reflectStaticSizeIndexSnapshot int
+
+func init() {
+ var is interface{} = IndexSnapshot{}
+ reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
+ var err error
+ lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
+ if err != nil {
+ panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
+ }
+ lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
+ if err != nil {
+ panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
+ }
+}
+
type IndexSnapshot struct {
parent *Scorch
segment []*SegmentSnapshot
offsets []uint64
internal map[string][]byte
epoch uint64
+ size uint64
+ creator string
m sync.Mutex // Protects the fields that follow.
refs int64
+
+ m2 sync.Mutex // Protects the fields that follow.
+ fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
}
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -85,12 +111,27 @@ func (i *IndexSnapshot) DecRef() (err error) {
return err
}
+func (i *IndexSnapshot) Close() error {
+ return i.DecRef()
+}
+
+func (i *IndexSnapshot) Size() int {
+ return int(i.size)
+}
+
+func (i *IndexSnapshot) updateSize() {
+ i.size += uint64(reflectStaticSizeIndexSnapshot)
+ for _, s := range i.segment {
+ i.size += uint64(s.Size())
+ }
+}
+
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
- dict, err := segment.Dictionary(field)
+ dict, err := segment.segment.Dictionary(field)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
@@ -116,7 +157,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
- curr: next,
+ curr: *next,
})
}
}
@@ -151,6 +192,56 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
})
}
+func (i *IndexSnapshot) FieldDictRegexp(field string,
+ termRegex string) (index.FieldDict, error) {
+ // TODO: potential optimization where the literal prefix represents the,
+ // entire regexp, allowing us to use PrefixIterator(prefixTerm)?
+
+ a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex)
+ if err != nil {
+ return nil, err
+ }
+
+ return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+ return i.AutomatonIterator(a, prefixBeg, prefixEnd)
+ })
+}
+
+func (i *IndexSnapshot) getLevAutomaton(term string,
+ fuzziness uint8) (vellum.Automaton, error) {
+ if fuzziness == 1 {
+ return lb1.BuildDfa(term, fuzziness)
+ } else if fuzziness == 2 {
+ return lb2.BuildDfa(term, fuzziness)
+ }
+ return nil, fmt.Errorf("fuzziness exceeds the max limit")
+}
+
+func (i *IndexSnapshot) FieldDictFuzzy(field string,
+ term string, fuzziness int, prefix string) (index.FieldDict, error) {
+ a, err := i.getLevAutomaton(term, uint8(fuzziness))
+ if err != nil {
+ return nil, err
+ }
+
+ var prefixBeg, prefixEnd []byte
+ if prefix != "" {
+ prefixBeg = []byte(prefix)
+ prefixEnd = segment.IncrementBytes(prefixBeg)
+ }
+
+ return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+ return i.AutomatonIterator(a, prefixBeg, prefixEnd)
+ })
+}
+
+func (i *IndexSnapshot) FieldDictOnly(field string,
+ onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
+ return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
+ return i.OnlyIterator(onlyTerms, includeCount)
+ })
+}
+
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
@@ -264,21 +355,26 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
rv = document.NewDocument(id)
- err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
+ err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool {
if name == "_id" {
return true
}
+
+ // copy value, array positions to preserve them beyond the scope of this callback
+ value := append([]byte(nil), val...)
+ arrayPos := append([]uint64(nil), pos...)
+
switch typ {
case 't':
- rv.AddField(document.NewTextField(name, pos, value))
+ rv.AddField(document.NewTextField(name, arrayPos, value))
case 'n':
- rv.AddField(document.NewNumericFieldFromBytes(name, pos, value))
+ rv.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value))
case 'd':
- rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value))
+ rv.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value))
case 'b':
- rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value))
+ rv.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value))
case 'g':
- rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value))
+ rv.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value))
}
return true
@@ -307,24 +403,15 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
}
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
- var found bool
- var rv string
- err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
- if field == "_id" {
- found = true
- rv = string(value)
- return false
- }
- return true
- })
+ v, err := i.segment[segmentIndex].DocID(localDocNum)
if err != nil {
return "", err
}
-
- if found {
- return rv, nil
+ if v == nil {
+ return "", fmt.Errorf("document number %d not found", docNum)
}
- return "", fmt.Errorf("document number %d not found", docNum)
+
+ return string(v), nil
}
func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
@@ -349,33 +436,81 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
-
- rv := &IndexSnapshotTermFieldReader{
- term: term,
- field: field,
- snapshot: i,
- postings: make([]segment.PostingsList, len(i.segment)),
- iterators: make([]segment.PostingsIterator, len(i.segment)),
- includeFreq: includeFreq,
- includeNorm: includeNorm,
- includeTermVectors: includeTermVectors,
+ rv := i.allocTermFieldReaderDicts(field)
+
+ rv.term = term
+ rv.field = field
+ rv.snapshot = i
+ if rv.postings == nil {
+ rv.postings = make([]segment.PostingsList, len(i.segment))
+ }
+ if rv.iterators == nil {
+ rv.iterators = make([]segment.PostingsIterator, len(i.segment))
+ }
+ rv.segmentOffset = 0
+ rv.includeFreq = includeFreq
+ rv.includeNorm = includeNorm
+ rv.includeTermVectors = includeTermVectors
+ rv.currPosting = nil
+ rv.currID = rv.currID[:0]
+
+ if rv.dicts == nil {
+ rv.dicts = make([]segment.TermDictionary, len(i.segment))
+ for i, segment := range i.segment {
+ dict, err := segment.segment.Dictionary(field)
+ if err != nil {
+ return nil, err
+ }
+ rv.dicts[i] = dict
+ }
}
+
for i, segment := range i.segment {
- dict, err := segment.Dictionary(field)
- if err != nil {
- return nil, err
- }
- pl, err := dict.PostingsList(string(term), nil)
+ pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i])
if err != nil {
return nil, err
}
rv.postings[i] = pl
- rv.iterators[i] = pl.Iterator()
+ rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
}
- atomic.AddUint64(&i.parent.stats.termSearchersStarted, uint64(1))
+ atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1))
return rv, nil
}
+func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
+ i.m2.Lock()
+ if i.fieldTFRs != nil {
+ tfrs := i.fieldTFRs[field]
+ last := len(tfrs) - 1
+ if last >= 0 {
+ tfr = tfrs[last]
+ tfrs[last] = nil
+ i.fieldTFRs[field] = tfrs[:last]
+ i.m2.Unlock()
+ return
+ }
+ }
+ i.m2.Unlock()
+ return &IndexSnapshotTermFieldReader{}
+}
+
+func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
+ i.parent.rootLock.RLock()
+ obsolete := i.parent.root != i
+ i.parent.rootLock.RUnlock()
+ if obsolete {
+ // if we're not the current root (mutations happened), don't bother recycling
+ return
+ }
+
+ i.m2.Lock()
+ if i.fieldTFRs == nil {
+ i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
+ }
+ i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr)
+ i.m2.Unlock()
+}
+
func docNumberToBytes(buf []byte, in uint64) []byte {
if len(buf) != 8 {
if cap(buf) >= 8 {
@@ -389,115 +524,172 @@ func docNumberToBytes(buf []byte, in uint64) []byte {
}
func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
- var res uint64
- err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
- if err != nil {
- return 0, err
+ if len(in) != 8 {
+ return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in)
}
- return res, nil
+ return binary.BigEndian.Uint64(in), nil
}
func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
fields []string, visitor index.DocumentFieldTermVisitor) error {
+ _, err := i.documentVisitFieldTerms(id, fields, visitor, nil)
+ return err
+}
+func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID,
+ fields []string, visitor index.DocumentFieldTermVisitor,
+ dvs segment.DocVisitState) (segment.DocVisitState, error) {
docNum, err := docInternalToNumber(id)
if err != nil {
- return err
+ return nil, err
}
+
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
if segmentIndex >= len(i.segment) {
- return nil
+ return nil, nil
}
+ _, dvs, err = i.documentVisitFieldTermsOnSegment(
+ segmentIndex, localDocNum, fields, nil, visitor, dvs)
+
+ return dvs, err
+}
+
+func (i *IndexSnapshot) documentVisitFieldTermsOnSegment(
+ segmentIndex int, localDocNum uint64, fields []string, cFields []string,
+ visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) (
+ cFieldsOut []string, dvsOut segment.DocVisitState, err error) {
ss := i.segment[segmentIndex]
- if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
- // get the list of doc value persisted fields
- pFields, err := zaps.VisitableDocValueFields()
+ var vFields []string // fields that are visitable via the segment
+
+ ssv, ssvOk := ss.segment.(segment.DocumentFieldTermVisitable)
+ if ssvOk && ssv != nil {
+ vFields, err = ssv.VisitableDocValueFields()
if err != nil {
- return err
- }
- // assort the fields for which terms look up have to
- // be performed runtime
- dvPendingFields := extractDvPendingFields(fields, pFields)
- if len(dvPendingFields) == 0 {
- // all fields are doc value persisted
- return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+ return nil, nil, err
}
+ }
- // concurrently trigger the runtime doc value preparations for
- // pending fields as well as the visit of the persisted doc values
- errCh := make(chan error, 1)
+ var errCh chan error
- go func() {
- defer close(errCh)
- err := ss.cachedDocs.prepareFields(fields, ss)
- if err != nil {
- errCh <- err
- }
- }()
+ // cFields represents the fields that we'll need from the
+ // cachedDocs, and might be optionally be provided by the caller,
+ // if the caller happens to know we're on the same segmentIndex
+ // from a previous invocation
+ if cFields == nil {
+ cFields = subtractStrings(fields, vFields)
+
+ if !ss.cachedDocs.hasFields(cFields) {
+ errCh = make(chan error, 1)
+
+ go func() {
+ err := ss.cachedDocs.prepareFields(cFields, ss)
+ if err != nil {
+ errCh <- err
+ }
+ close(errCh)
+ }()
+ }
+ }
- // visit the persisted dv while the cache preparation is in progress
- err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
+ if ssvOk && ssv != nil && len(vFields) > 0 {
+ dvs, err = ssv.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs)
if err != nil {
- return err
+ return nil, nil, err
}
+ }
- // err out if fieldCache preparation failed
+ if errCh != nil {
err = <-errCh
if err != nil {
- return err
+ return nil, nil, err
}
+ }
- visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
- return nil
+ if len(cFields) > 0 {
+ ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
}
- return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
+ return cFields, dvs, nil
+}
+
+func (i *IndexSnapshot) DocValueReader(fields []string) (
+ index.DocValueReader, error) {
+ return &DocValueReader{i: i, fields: fields, currSegmentIndex: -1}, nil
+}
+
+type DocValueReader struct {
+ i *IndexSnapshot
+ fields []string
+ dvs segment.DocVisitState
+
+ currSegmentIndex int
+ currCachedFields []string
}
-func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
- ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error {
- err := ss.cachedDocs.prepareFields(fields, ss)
+func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
+ visitor index.DocumentFieldTermVisitor) (err error) {
+ docNum, err := docInternalToNumber(id)
if err != nil {
return err
}
- visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor)
- return nil
+ segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum)
+ if segmentIndex >= len(dvr.i.segment) {
+ return nil
+ }
+
+ if dvr.currSegmentIndex != segmentIndex {
+ dvr.currSegmentIndex = segmentIndex
+ dvr.currCachedFields = nil
+ }
+
+ dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment(
+ dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs)
+
+ return err
}
-func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
- ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) {
+func (i *IndexSnapshot) DumpAll() chan interface{} {
+ rv := make(chan interface{})
+ go func() {
+ close(rv)
+ }()
+ return rv
+}
- for _, field := range fields {
- if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
- if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
- for {
- i := bytes.Index(tlist, TermSeparatorSplitSlice)
- if i < 0 {
- break
- }
- visitor(field, tlist[0:i])
- tlist = tlist[i+1:]
- }
- }
- }
- }
+func (i *IndexSnapshot) DumpDoc(id string) chan interface{} {
+ rv := make(chan interface{})
+ go func() {
+ close(rv)
+ }()
+ return rv
+}
+func (i *IndexSnapshot) DumpFields() chan interface{} {
+ rv := make(chan interface{})
+ go func() {
+ close(rv)
+ }()
+ return rv
}
-func extractDvPendingFields(requestedFields, persistedFields []string) []string {
- removeMap := map[string]struct{}{}
- for _, str := range persistedFields {
- removeMap[str] = struct{}{}
+// subtractStrings returns set a minus elements of set b.
+func subtractStrings(a, b []string) []string {
+ if len(b) == 0 {
+ return a
}
- rv := make([]string, 0, len(requestedFields))
- for _, s := range requestedFields {
- if _, ok := removeMap[s]; !ok {
- rv = append(rv, s)
+ rv := make([]string, 0, len(a))
+OUTER:
+ for _, as := range a {
+ for _, bs := range b {
+ if as == bs {
+ continue OUTER
+ }
}
+ rv = append(rv, as)
}
return rv
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
index 3c902cad6b..abd3bde8c1 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
@@ -23,12 +23,13 @@ import (
type segmentDictCursor struct {
itr segment.DictionaryIterator
- curr *index.DictEntry
+ curr index.DictEntry
}
type IndexSnapshotFieldDict struct {
snapshot *IndexSnapshot
cursors []*segmentDictCursor
+ entry index.DictEntry
}
func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
@@ -51,10 +52,10 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} {
}
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
- if len(i.cursors) <= 0 {
+ if len(i.cursors) == 0 {
return nil, nil
}
- rv := i.cursors[0].curr
+ i.entry = i.cursors[0].curr
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
@@ -64,12 +65,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
heap.Pop(i)
} else {
// modified heap, fix it
- i.cursors[0].curr = next
+ i.cursors[0].curr = *next
heap.Fix(i, 0)
}
// look for any other entries with the exact same term
- for len(i.cursors) > 0 && i.cursors[0].curr.Term == rv.Term {
- rv.Count += i.cursors[0].curr.Count
+ for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
+ i.entry.Count += i.cursors[0].curr.Count
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
@@ -79,12 +80,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
heap.Pop(i)
} else {
// modified heap, fix it
- i.cursors[0].curr = next
+ i.cursors[0].curr = *next
heap.Fix(i, 0)
}
}
- return rv, nil
+ return &i.entry, nil
}
func (i *IndexSnapshotFieldDict) Close() error {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
index d1205ff8e8..27da208655 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
@@ -16,17 +16,30 @@ package scorch
import (
"bytes"
+ "reflect"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeIndexSnapshotDocIDReader int
+
+func init() {
+ var isdr IndexSnapshotDocIDReader
+ reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
+}
+
type IndexSnapshotDocIDReader struct {
snapshot *IndexSnapshot
iterators []roaring.IntIterable
segmentOffset int
}
+func (i *IndexSnapshotDocIDReader) Size() int {
+ return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
+}
+
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
for i.segmentOffset < len(i.iterators) {
if !i.iterators[i.segmentOffset].HasNext() {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
index 87fd0d14f3..5d56f19441 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
@@ -16,16 +16,27 @@ package scorch
import (
"bytes"
+ "fmt"
+ "reflect"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeIndexSnapshotTermFieldReader int
+
+func init() {
+ var istfr IndexSnapshotTermFieldReader
+ reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
+}
+
type IndexSnapshotTermFieldReader struct {
term []byte
field string
snapshot *IndexSnapshot
+ dicts []segment.TermDictionary
postings []segment.PostingsList
iterators []segment.PostingsIterator
segmentOffset int
@@ -36,13 +47,34 @@ type IndexSnapshotTermFieldReader struct {
currID index.IndexInternalID
}
+func (i *IndexSnapshotTermFieldReader) Size() int {
+ sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
+ len(i.term) +
+ len(i.field) +
+ len(i.currID)
+
+ for _, entry := range i.postings {
+ sizeInBytes += entry.Size()
+ }
+
+ for _, entry := range i.iterators {
+ sizeInBytes += entry.Size()
+ }
+
+ if i.currPosting != nil {
+ sizeInBytes += i.currPosting.Size()
+ }
+
+ return sizeInBytes
+}
+
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
rv := preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
// find the next hit
- for i.segmentOffset < len(i.postings) {
+ for i.segmentOffset < len(i.iterators) {
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err
@@ -72,9 +104,16 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin
}
if i.includeTermVectors {
locs := next.Locations()
- rv.Vectors = make([]*index.TermFieldVector, len(locs))
+ if cap(rv.Vectors) < len(locs) {
+ rv.Vectors = make([]*index.TermFieldVector, len(locs))
+ backing := make([]index.TermFieldVector, len(locs))
+ for i := range backing {
+ rv.Vectors[i] = &backing[i]
+ }
+ }
+ rv.Vectors = rv.Vectors[:len(locs)]
for i, loc := range locs {
- rv.Vectors[i] = &index.TermFieldVector{
+ *rv.Vectors[i] = index.TermFieldVector{
Start: loc.Start(),
End: loc.End(),
Pos: loc.Pos(),
@@ -96,24 +135,37 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
}
*i = *(i2.(*IndexSnapshotTermFieldReader))
}
- // FIXME do something better
- next, err := i.Next(preAlloced)
+ num, err := docInternalToNumber(ID)
+ if err != nil {
+ return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
+ }
+ segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
+ if segIndex >= len(i.snapshot.segment) {
+ return nil, fmt.Errorf("computed segment index %d out of bounds %d",
+ segIndex, len(i.snapshot.segment))
+ }
+ // skip directly to the target segment
+ i.segmentOffset = segIndex
+ next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
if err != nil {
return nil, err
}
if next == nil {
- return nil, nil
+ // we jumped directly to the segment that should have contained it
+ // but it wasn't there, so reuse Next() which should correctly
+ // get the next hit after it (we moved i.segmentOffset)
+ return i.Next(preAlloced)
}
- for bytes.Compare(next.ID, ID) < 0 {
- next, err = i.Next(preAlloced)
- if err != nil {
- return nil, err
- }
- if next == nil {
- break
- }
+
+ if preAlloced == nil {
+ preAlloced = &index.TermFieldDoc{}
}
- return next, nil
+ preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
+ i.snapshot.offsets[segIndex])
+ i.postingToTermFieldDoc(next, preAlloced)
+ i.currID = preAlloced.ID
+ i.currPosting = next
+ return preAlloced, nil
}
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
@@ -126,7 +178,8 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 {
func (i *IndexSnapshotTermFieldReader) Close() error {
if i.snapshot != nil {
- atomic.AddUint64(&i.snapshot.parent.stats.termSearchersFinished, uint64(1))
+ atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
+ i.snapshot.recycleTermFieldReader(i)
}
return nil
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
index 247003311e..470868d0eb 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
@@ -19,7 +19,7 @@ import (
"log"
"github.com/blevesearch/bleve/index/scorch/segment"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
type RollbackPoint struct {
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
index 5e64cb1f2f..f3a2c56a98 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
@@ -15,42 +15,25 @@
package scorch
import (
+ "bytes"
"sync"
+ "sync/atomic"
"github.com/RoaringBitmap/roaring"
+ "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
+ "github.com/blevesearch/bleve/size"
)
var TermSeparator byte = 0xff
var TermSeparatorSplitSlice = []byte{TermSeparator}
-type SegmentDictionarySnapshot struct {
- s *SegmentSnapshot
- d segment.TermDictionary
-}
-
-func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
- // TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
- return s.d.PostingsList(term, s.s.deleted)
-}
-
-func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
- return s.d.Iterator()
-}
-
-func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
- return s.d.PrefixIterator(prefix)
-}
-
-func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
- return s.d.RangeIterator(start, end)
-}
-
type SegmentSnapshot struct {
id uint64
segment segment.Segment
deleted *roaring.Bitmap
+ creator string
cachedDocs *cachedDocs
}
@@ -83,8 +66,11 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
return s.segment.VisitDocument(num, visitor)
}
-func (s *SegmentSnapshot) Count() uint64 {
+func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
+ return s.segment.DocID(num)
+}
+func (s *SegmentSnapshot) Count() uint64 {
rv := s.segment.Count()
if s.deleted != nil {
rv -= s.deleted.GetCardinality()
@@ -92,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 {
return rv
}
-func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
- d, err := s.segment.Dictionary(field)
- if err != nil {
- return nil, err
- }
- return &SegmentDictionarySnapshot{
- s: s,
- d: d,
- }, nil
-}
-
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
rv, err := s.segment.DocNumbers(docIDs)
if err != nil {
@@ -114,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
return rv, nil
}
-// DocNumbersLive returns bitsit containing doc numbers for all live docs
+// DocNumbersLive returns a bitmap containing doc numbers for all live docs
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
rv := roaring.NewBitmap()
rv.AddRange(0, s.segment.Count())
@@ -128,36 +103,68 @@ func (s *SegmentSnapshot) Fields() []string {
return s.segment.Fields()
}
+func (s *SegmentSnapshot) Size() (rv int) {
+ rv = s.segment.Size()
+ if s.deleted != nil {
+ rv += int(s.deleted.GetSizeInBytes())
+ }
+ rv += s.cachedDocs.Size()
+ return
+}
+
type cachedFieldDocs struct {
+ m sync.Mutex
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
+ size uint64
}
-func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
- defer close(cfd.readyCh)
+func (cfd *cachedFieldDocs) Size() int {
+ var rv int
+ cfd.m.Lock()
+ for _, entry := range cfd.docs {
+ rv += 8 /* size of uint64 */ + len(entry)
+ }
+ cfd.m.Unlock()
+ return rv
+}
+
+func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
+ cfd.m.Lock()
+ defer func() {
+ close(cfd.readyCh)
+ cfd.m.Unlock()
+ }()
+ cfd.size += uint64(size.SizeOfUint64) /* size field */
dict, err := ss.segment.Dictionary(field)
if err != nil {
cfd.err = err
return
}
+ var postings segment.PostingsList
+ var postingsItr segment.PostingsIterator
+
dictItr := dict.Iterator()
next, err := dictItr.Next()
for err == nil && next != nil {
- postings, err1 := dict.PostingsList(next.Term, nil)
+ var err1 error
+ postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
if err1 != nil {
cfd.err = err1
return
}
- postingsItr := postings.Iterator()
+ cfd.size += uint64(size.SizeOfUint64) /* map key */
+ postingsItr = postings.Iterator(false, false, false, postingsItr)
nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil {
docNum := nextPosting.Number()
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
+ cfd.size += uint64(len(next.Term) + 1) // map value
nextPosting, err2 = postingsItr.Next()
}
@@ -178,10 +185,12 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
type cachedDocs struct {
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
+ size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
c.m.Lock()
+
if c.cache == nil {
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
}
@@ -194,7 +203,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
docs: make(map[uint64][]byte),
}
- go c.cache[field].prepareFields(field, ss)
+ go c.cache[field].prepareField(field, ss)
}
}
@@ -209,21 +218,62 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e
c.m.Lock()
}
+ c.updateSizeLOCKED()
+
c.m.Unlock()
return nil
}
-func (c *cachedDocs) sizeInBytes() uint64 {
- sizeInBytes := 0
+// hasFields returns true if the cache has all the given fields
+func (c *cachedDocs) hasFields(fields []string) bool {
c.m.Lock()
+ for _, field := range fields {
+ if _, exists := c.cache[field]; !exists {
+ c.m.Unlock()
+ return false // found a field not in cache
+ }
+ }
+ c.m.Unlock()
+ return true
+}
+
+func (c *cachedDocs) Size() int {
+ return int(atomic.LoadUint64(&c.size))
+}
+
+func (c *cachedDocs) updateSizeLOCKED() {
+ sizeInBytes := 0
for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k)
if v != nil {
- for _, entry := range v.docs { // docs
- sizeInBytes += 8 /* size of uint64 */ + len(entry)
+ sizeInBytes += v.Size()
+ }
+ }
+ atomic.StoreUint64(&c.size, uint64(sizeInBytes))
+}
+
+func (c *cachedDocs) visitDoc(localDocNum uint64,
+ fields []string, visitor index.DocumentFieldTermVisitor) {
+ c.m.Lock()
+
+ for _, field := range fields {
+ if cachedFieldDocs, exists := c.cache[field]; exists {
+ c.m.Unlock()
+ <-cachedFieldDocs.readyCh
+ c.m.Lock()
+
+ if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
+ for {
+ i := bytes.Index(tlist, TermSeparatorSplitSlice)
+ if i < 0 {
+ break
+ }
+ visitor(field, tlist[0:i])
+ tlist = tlist[i+1:]
+ }
}
}
}
+
c.m.Unlock()
- return uint64(sizeInBytes)
}
diff --git a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
index c44a977bfd..2eb832f2cf 100644
--- a/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
+++ b/vendor/github.com/blevesearch/bleve/index/scorch/stats.go
@@ -16,63 +16,125 @@ package scorch
import (
"encoding/json"
- "io/ioutil"
+ "reflect"
"sync/atomic"
)
-// Stats tracks statistics about the index
+// Stats tracks statistics about the index, fields that are
+// prefixed like CurXxxx are gauges (can go up and down),
+// and fields that are prefixed like TotXxxx are monotonically
+// increasing counters.
type Stats struct {
- updates, deletes, batches, errors uint64
- analysisTime, indexTime uint64
- termSearchersStarted uint64
- termSearchersFinished uint64
- numPlainTextBytesIndexed uint64
- numItemsIntroduced uint64
- numItemsPersisted uint64
- i *Scorch
-}
+ TotUpdates uint64
+ TotDeletes uint64
-func (s *Stats) statsMap() (map[string]interface{}, error) {
- m := map[string]interface{}{}
- m["updates"] = atomic.LoadUint64(&s.updates)
- m["deletes"] = atomic.LoadUint64(&s.deletes)
- m["batches"] = atomic.LoadUint64(&s.batches)
- m["errors"] = atomic.LoadUint64(&s.errors)
- m["analysis_time"] = atomic.LoadUint64(&s.analysisTime)
- m["index_time"] = atomic.LoadUint64(&s.indexTime)
- m["term_searchers_started"] = atomic.LoadUint64(&s.termSearchersStarted)
- m["term_searchers_finished"] = atomic.LoadUint64(&s.termSearchersFinished)
- m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&s.numPlainTextBytesIndexed)
- m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced)
- m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted)
-
- if s.i.path != "" {
- finfos, err := ioutil.ReadDir(s.i.path)
- if err != nil {
- return nil, err
- }
+ TotBatches uint64
+ TotBatchesEmpty uint64
+ TotBatchIntroTime uint64
+ MaxBatchIntroTime uint64
- var numFilesOnDisk, numBytesUsedDisk uint64
+ CurRootEpoch uint64
+ LastPersistedEpoch uint64
+ LastMergedEpoch uint64
- for _, finfo := range finfos {
- if !finfo.IsDir() {
- numBytesUsedDisk += uint64(finfo.Size())
- numFilesOnDisk++
- }
- }
+ TotOnErrors uint64
- m["num_bytes_used_disk"] = numBytesUsedDisk
- m["num_files_on_disk"] = numFilesOnDisk
- }
+ TotAnalysisTime uint64
+ TotIndexTime uint64
+
+ TotIndexedPlainTextBytes uint64
+
+ TotTermSearchersStarted uint64
+ TotTermSearchersFinished uint64
+
+ TotIntroduceLoop uint64
+ TotIntroduceSegmentBeg uint64
+ TotIntroduceSegmentEnd uint64
+ TotIntroducePersistBeg uint64
+ TotIntroducePersistEnd uint64
+ TotIntroduceMergeBeg uint64
+ TotIntroduceMergeEnd uint64
+ TotIntroduceRevertBeg uint64
+ TotIntroduceRevertEnd uint64
+
+ TotIntroducedItems uint64
+ TotIntroducedSegmentsBatch uint64
+ TotIntroducedSegmentsMerge uint64
+
+ TotPersistLoopBeg uint64
+ TotPersistLoopErr uint64
+ TotPersistLoopProgress uint64
+ TotPersistLoopWait uint64
+ TotPersistLoopWaitNotified uint64
+ TotPersistLoopEnd uint64
+
+ TotPersistedItems uint64
+ TotItemsToPersist uint64
+ TotPersistedSegments uint64
+
+ TotPersisterSlowMergerPause uint64
+ TotPersisterSlowMergerResume uint64
+
+ TotPersisterNapPauseCompleted uint64
+ TotPersisterMergerNapBreak uint64
- return m, nil
+ TotFileMergeLoopBeg uint64
+ TotFileMergeLoopErr uint64
+ TotFileMergeLoopEnd uint64
+
+ TotFileMergePlan uint64
+ TotFileMergePlanErr uint64
+ TotFileMergePlanNone uint64
+ TotFileMergePlanOk uint64
+
+ TotFileMergePlanTasks uint64
+ TotFileMergePlanTasksDone uint64
+ TotFileMergePlanTasksErr uint64
+ TotFileMergePlanTasksSegments uint64
+ TotFileMergePlanTasksSegmentsEmpty uint64
+
+ TotFileMergeSegmentsEmpty uint64
+ TotFileMergeSegments uint64
+ TotFileSegmentsAtRoot uint64
+ TotFileMergeWrittenBytes uint64
+
+ TotFileMergeZapBeg uint64
+ TotFileMergeZapEnd uint64
+ TotFileMergeZapTime uint64
+ MaxFileMergeZapTime uint64
+
+ TotFileMergeIntroductions uint64
+ TotFileMergeIntroductionsDone uint64
+ TotFileMergeIntroductionsSkipped uint64
+
+ TotMemMergeBeg uint64
+ TotMemMergeErr uint64
+ TotMemMergeDone uint64
+ TotMemMergeZapBeg uint64
+ TotMemMergeZapEnd uint64
+ TotMemMergeZapTime uint64
+ MaxMemMergeZapTime uint64
+ TotMemMergeSegments uint64
+ TotMemorySegmentsAtRoot uint64
}
-// MarshalJSON implements json.Marshaler
-func (s *Stats) MarshalJSON() ([]byte, error) {
- m, err := s.statsMap()
- if err != nil {
- return nil, err
+// atomically populates the returned map
+func (s *Stats) ToMap() map[string]interface{} {
+ m := map[string]interface{}{}
+ sve := reflect.ValueOf(s).Elem()
+ svet := sve.Type()
+ for i := 0; i < svet.NumField(); i++ {
+ svef := sve.Field(i)
+ if svef.CanAddr() {
+ svefp := svef.Addr().Interface()
+ m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
+ }
}
- return json.Marshal(m)
+ return m
+}
+
+// MarshalJSON implements json.Marshaler, and in contrast to standard
+// json marshaling provides atomic safety
+func (s *Stats) MarshalJSON() ([]byte, error) {
+ return json.Marshal(s.ToMap())
}
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
index 82ab946fd7..4b5019f1f2 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
@@ -17,7 +17,7 @@ package boltdb
import (
"bytes"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
type Iterator struct {
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
index 1d701c982c..4cd94183c6 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
@@ -16,7 +16,7 @@ package boltdb
import (
"github.com/blevesearch/bleve/index/store"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
type Reader struct {
diff --git a/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go b/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
index d8de0768f4..56613d5315 100644
--- a/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
+++ b/vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
@@ -30,7 +30,7 @@ import (
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
- "github.com/boltdb/bolt"
+ bolt "github.com/etcd-io/bbolt"
)
const (
@@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore,
bo.ReadOnly = ro
}
+ if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
+ bo.InitialMmapSize = initialMmapSize
+ } else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
+ bo.InitialMmapSize = int(initialMmapSize)
+ }
+
db, err := bolt.Open(path, 0600, bo)
if err != nil {
return nil, err
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
index 77d523c302..ea7243eaa6 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
@@ -15,11 +15,20 @@
package upsidedown
import (
+ "reflect"
+
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
+var reflectStaticSizeIndexReader int
+
+func init() {
+ var ir IndexReader
+ reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
+}
+
type IndexReader struct {
index *UpsideDownCouch
kvreader store.KVReader
@@ -201,3 +210,17 @@ func incrementBytes(in []byte) []byte {
}
return rv
}
+
+func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
+ return &DocValueReader{i: i, fields: fields}, nil
+}
+
+type DocValueReader struct {
+ i *IndexReader
+ fields []string
+}
+
+func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
+ visitor index.DocumentFieldTermVisitor) error {
+ return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
+}
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
index 1f40c02ded..bc0fef1199 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
@@ -16,13 +16,27 @@ package upsidedown
import (
"bytes"
+ "reflect"
"sort"
"sync/atomic"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeUpsideDownCouchTermFieldReader int
+var reflectStaticSizeUpsideDownCouchDocIDReader int
+
+func init() {
+ var tfr UpsideDownCouchTermFieldReader
+ reflectStaticSizeUpsideDownCouchTermFieldReader =
+ int(reflect.TypeOf(tfr).Size())
+ var cdr UpsideDownCouchDocIDReader
+ reflectStaticSizeUpsideDownCouchDocIDReader =
+ int(reflect.TypeOf(cdr).Size())
+}
+
type UpsideDownCouchTermFieldReader struct {
count uint64
indexReader *IndexReader
@@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct {
includeTermVectors bool
}
+func (r *UpsideDownCouchTermFieldReader) Size() int {
+ sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
+ len(r.term) +
+ r.tfrPrealloc.Size() +
+ len(r.keyBuf)
+
+ if r.tfrNext != nil {
+ sizeInBytes += r.tfrNext.Size()
+ }
+
+ return sizeInBytes
+}
+
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) {
@@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct {
onlyMode bool
}
-func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
+func (r *UpsideDownCouchDocIDReader) Size() int {
+ sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
+ reflectStaticSizeIndexReader + size.SizeOfPtr
+
+ for _, entry := range r.only {
+ sizeInBytes += size.SizeOfString + len(entry)
+ }
+ return sizeInBytes
+}
+
+func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
startBytes := []byte{0x0}
endBytes := []byte{0xff}
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/row.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
index 7e503ae05e..531e0a0d33 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
@@ -20,10 +20,22 @@ import (
"fmt"
"io"
"math"
+ "reflect"
+ "github.com/blevesearch/bleve/size"
"github.com/golang/protobuf/proto"
)
+var reflectStaticSizeTermFrequencyRow int
+var reflectStaticSizeTermVector int
+
+func init() {
+ var tfr TermFrequencyRow
+ reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
+ var tv TermVector
+ reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
+}
+
const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
@@ -358,6 +370,11 @@ type TermVector struct {
end uint64
}
+func (tv *TermVector) Size() int {
+ return reflectStaticSizeTermVector + size.SizeOfPtr +
+ len(tv.arrayPositions)*size.SizeOfUint64
+}
+
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
}
@@ -371,6 +388,18 @@ type TermFrequencyRow struct {
field uint16
}
+func (tfr *TermFrequencyRow) Size() int {
+ sizeInBytes := reflectStaticSizeTermFrequencyRow +
+ len(tfr.term) +
+ len(tfr.doc)
+
+ for _, entry := range tfr.vectors {
+ sizeInBytes += entry.Size()
+ }
+
+ return sizeInBytes
+}
+
func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term
}
@@ -555,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error {
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:]
- if len(tfr.doc) <= 0 {
+ if len(tfr.doc) == 0 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
index 70e6e457f6..e4bc3d8f02 100644
--- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
+++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
@@ -775,7 +775,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
}
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
- if len(in) <= 0 {
+ if len(in) == 0 {
return nil
}
@@ -810,15 +810,17 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
- go func() {
- for _, doc := range batch.IndexOps {
- if doc != nil {
- aw := index.NewAnalysisWork(udc, doc, resultChan)
- // put the work on the queue
- udc.analysisQueue.Queue(aw)
+ if len(batch.IndexOps) > 0 {
+ go func() {
+ for _, doc := range batch.IndexOps {
+ if doc != nil {
+ aw := index.NewAnalysisWork(udc, doc, resultChan)
+ // put the work on the queue
+ udc.analysisQueue.Queue(aw)
+ }
}
- }
- }()
+ }()
+ }
// retrieve back index rows concurrent with analysis
docBackIndexRowErr := error(nil)
@@ -958,6 +960,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
+
+ persistedCallback := batch.PersistedCallback()
+ if persistedCallback != nil {
+ persistedCallback(err)
+ }
return
}
diff --git a/vendor/github.com/blevesearch/bleve/index_alias_impl.go b/vendor/github.com/blevesearch/bleve/index_alias_impl.go
index f678a059b7..335fcade2e 100644
--- a/vendor/github.com/blevesearch/bleve/index_alias_impl.go
+++ b/vendor/github.com/blevesearch/bleve/index_alias_impl.go
@@ -433,6 +433,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Explain: req.Explain,
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
+ Score: req.Score,
}
return &rv
}
diff --git a/vendor/github.com/blevesearch/bleve/index_impl.go b/vendor/github.com/blevesearch/bleve/index_impl.go
index caea1b8e04..fe61b8064a 100644
--- a/vendor/github.com/blevesearch/bleve/index_impl.go
+++ b/vendor/github.com/blevesearch/bleve/index_impl.go
@@ -50,6 +50,12 @@ const storePath = "store"
var mappingInternalKey = []byte("_mapping")
+const SearchQueryStartCallbackKey = "_search_query_start_callback_key"
+const SearchQueryEndCallbackKey = "_search_query_end_callback_key"
+
+type SearchQueryStartCallbackFn func(size uint64) error
+type SearchQueryEndCallbackFn func(size uint64) error
+
func indexStorePath(path string) string {
return path + string(os.PathSeparator) + storePath
}
@@ -362,8 +368,70 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
return i.SearchInContext(context.Background(), req)
}
+var documentMatchEmptySize int
+var searchContextEmptySize int
+var facetResultEmptySize int
+var documentEmptySize int
+
+func init() {
+ var dm search.DocumentMatch
+ documentMatchEmptySize = dm.Size()
+
+ var sc search.SearchContext
+ searchContextEmptySize = sc.Size()
+
+ var fr search.FacetResult
+ facetResultEmptySize = fr.Size()
+
+ var d document.Document
+ documentEmptySize = d.Size()
+}
+
+// memNeededForSearch is a helper function that returns an estimate of RAM
+// needed to execute a search request.
+func memNeededForSearch(req *SearchRequest,
+ searcher search.Searcher,
+ topnCollector *collector.TopNCollector) uint64 {
+
+ backingSize := req.Size + req.From + 1
+ if req.Size+req.From > collector.PreAllocSizeSkipCap {
+ backingSize = collector.PreAllocSizeSkipCap + 1
+ }
+ numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
+
+ estimate := 0
+
+ // overhead, size in bytes from collector
+ estimate += topnCollector.Size()
+
+ // pre-allocing DocumentMatchPool
+ estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize
+
+ // searcher overhead
+ estimate += searcher.Size()
+
+ // overhead from results, lowestMatchOutsideResults
+ estimate += (numDocMatches + 1) * documentMatchEmptySize
+
+ // additional overhead from SearchResult
+ estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus
+
+ // overhead from facet results
+ if req.Facets != nil {
+ estimate += len(req.Facets) * facetResultEmptySize
+ }
+
+ // highlighting, store
+ if len(req.Fields) > 0 || req.Highlight != nil {
+ // Size + From => number of hits
+ estimate += (req.Size + req.From) * documentEmptySize
+ }
+
+ return uint64(estimate)
+}
+
// SearchInContext executes a search request operation within the provided
-// Context. Returns a SearchResult object or an error.
+// Context. Returns a SearchResult object or an error.
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
@@ -390,6 +458,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
+ Score: req.Score,
})
if err != nil {
return nil, err
@@ -428,6 +497,24 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
collector.SetFacetsBuilder(facetsBuilder)
}
+ memNeeded := memNeededForSearch(req, searcher, collector)
+ if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
+ if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
+ err = cbF(memNeeded)
+ }
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil {
+ if cbF, ok := cb.(SearchQueryEndCallbackFn); ok {
+ defer func() {
+ _ = cbF(memNeeded)
+ }()
+ }
+ }
+
err = collector.Collect(ctx, searcher, indexReader)
if err != nil {
return nil, err
@@ -459,7 +546,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
doc, err := indexReader.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
- for _, f := range req.Fields {
+ fieldsToLoad := deDuplicate(req.Fields)
+ for _, f := range fieldsToLoad {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
var value interface{}
@@ -533,9 +621,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return &SearchResult{
Status: &SearchStatus{
Total: 1,
- Failed: 0,
Successful: 1,
- Errors: make(map[string]error),
},
Request: req,
Hits: hits,
@@ -755,3 +841,16 @@ func (f *indexImplFieldDict) Close() error {
}
return f.indexReader.Close()
}
+
+// helper function to remove duplicate entries from slice of strings
+func deDuplicate(fields []string) []string {
+ entries := make(map[string]struct{})
+ ret := []string{}
+ for _, entry := range fields {
+ if _, exists := entries[entry]; !exists {
+ entries[entry] = struct{}{}
+ ret = append(ret, entry)
+ }
+ }
+ return ret
+}
diff --git a/vendor/github.com/blevesearch/bleve/index_meta.go b/vendor/github.com/blevesearch/bleve/index_meta.go
index 95592a65dc..d814799a89 100644
--- a/vendor/github.com/blevesearch/bleve/index_meta.go
+++ b/vendor/github.com/blevesearch/bleve/index_meta.go
@@ -18,6 +18,7 @@ import (
"encoding/json"
"io/ioutil"
"os"
+ "path/filepath"
"github.com/blevesearch/bleve/index/upsidedown"
)
@@ -92,5 +93,5 @@ func (i *indexMeta) Save(path string) (err error) {
}
func indexMetaPath(path string) string {
- return path + string(os.PathSeparator) + metaFilename
+ return filepath.Join(path, metaFilename)
}
diff --git a/vendor/github.com/blevesearch/bleve/mapping/document.go b/vendor/github.com/blevesearch/bleve/mapping/document.go
index 6ec0c66bb2..f950b59bef 100644
--- a/vendor/github.com/blevesearch/bleve/mapping/document.go
+++ b/vendor/github.com/blevesearch/bleve/mapping/document.go
@@ -42,7 +42,7 @@ type DocumentMapping struct {
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
- DefaultAnalyzer string `json:"default_analyzer"`
+ DefaultAnalyzer string `json:"default_analyzer,omitempty"`
// StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"`
@@ -324,13 +324,17 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
- // allow default "json" tag to be overriden
+ // allow default "json" tag to be overridden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
}
val := reflect.ValueOf(data)
+ if !val.IsValid() {
+ return
+ }
+
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
@@ -420,7 +424,11 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
- fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+ if fieldMapping.Type == "geopoint" {
+ fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
+ } else {
+ fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
+ }
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
diff --git a/vendor/github.com/blevesearch/bleve/mapping/index.go b/vendor/github.com/blevesearch/bleve/mapping/index.go
index fc5d12a73a..602764cbbf 100644
--- a/vendor/github.com/blevesearch/bleve/mapping/index.go
+++ b/vendor/github.com/blevesearch/bleve/mapping/index.go
@@ -320,8 +320,8 @@ func (im *IndexMappingImpl) determineType(data interface{}) string {
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
- walkContext := im.newWalkContext(doc, docMapping)
if docMapping.Enabled {
+ walkContext := im.newWalkContext(doc, docMapping)
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
diff --git a/vendor/github.com/blevesearch/bleve/mapping/reflect.go b/vendor/github.com/blevesearch/bleve/mapping/reflect.go
index 3068b19065..6500a70592 100644
--- a/vendor/github.com/blevesearch/bleve/mapping/reflect.go
+++ b/vendor/github.com/blevesearch/bleve/mapping/reflect.go
@@ -35,6 +35,9 @@ func lookupPropertyPath(data interface{}, path string) interface{} {
func lookupPropertyPathPart(data interface{}, part string) interface{} {
val := reflect.ValueOf(data)
+ if !val.IsValid() {
+ return nil
+ }
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
diff --git a/vendor/github.com/blevesearch/bleve/numeric/bin.go b/vendor/github.com/blevesearch/bleve/numeric/bin.go
index cd71392dc3..368952a2cb 100644
--- a/vendor/github.com/blevesearch/bleve/numeric/bin.go
+++ b/vendor/github.com/blevesearch/bleve/numeric/bin.go
@@ -14,7 +14,7 @@ var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64
// apdated from org.apache.lucene.util.BitUtil
-// whcih was adapted from:
+// which was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 {
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
diff --git a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
index 4200c23bbd..76ea001ba7 100644
--- a/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
+++ b/vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
@@ -77,6 +77,10 @@ func (p PrefixCoded) Int64() (int64, error) {
}
func ValidPrefixCodedTerm(p string) (bool, int) {
+ return ValidPrefixCodedTermBytes([]byte(p))
+}
+
+func ValidPrefixCodedTermBytes(p []byte) (bool, int) {
if len(p) > 0 {
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 {
return false, 0
diff --git a/vendor/github.com/blevesearch/bleve/search.go b/vendor/github.com/blevesearch/bleve/search.go
index 46d849c1b1..ebd69971ef 100644
--- a/vendor/github.com/blevesearch/bleve/search.go
+++ b/vendor/github.com/blevesearch/bleve/search.go
@@ -17,15 +17,29 @@ package bleve
import (
"encoding/json"
"fmt"
+ "reflect"
"time"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/datetime/optional"
+ "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/search/collector"
"github.com/blevesearch/bleve/search/query"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeSearchResult int
+var reflectStaticSizeSearchStatus int
+
+func init() {
+ var sr SearchResult
+ reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
+ var ss SearchStatus
+ reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
+}
+
var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
@@ -247,6 +261,7 @@ func (h *HighlightRequest) AddField(field string) {
// Explain triggers inclusion of additional search
// result score explanations.
// Sort describes the desired order for the results to be returned.
+// Score controls the kind of scoring performed
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
@@ -259,6 +274,7 @@ type SearchRequest struct {
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
+ Score string `json:"score,omitempty"`
}
func (r *SearchRequest) Validate() error {
@@ -308,6 +324,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
+ Score string `json:"score"`
}
err := json.Unmarshal(input, &temp)
@@ -334,6 +351,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Fields = temp.Fields
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
+ r.Score = temp.Score
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err
@@ -432,6 +450,24 @@ type SearchResult struct {
Facets search.FacetResults `json:"facets"`
}
+func (sr *SearchResult) Size() int {
+ sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
+ reflectStaticSizeSearchStatus
+
+ for _, entry := range sr.Hits {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ for k, v := range sr.Facets {
+ sizeInBytes += size.SizeOfString + len(k) +
+ v.Size()
+ }
+
+ return sizeInBytes
+}
+
func (sr *SearchResult) String() string {
rv := ""
if sr.Total > 0 {
@@ -488,3 +524,44 @@ func (sr *SearchResult) Merge(other *SearchResult) {
sr.Facets.Merge(other.Facets)
}
+
+// MemoryNeededForSearchResult is an exported helper function to determine the RAM
+// needed to accommodate the results for a given search request.
+func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
+ if req == nil {
+ return 0
+ }
+
+ numDocMatches := req.Size + req.From
+ if req.Size+req.From > collector.PreAllocSizeSkipCap {
+ numDocMatches = collector.PreAllocSizeSkipCap
+ }
+
+ estimate := 0
+
+ // overhead from the SearchResult structure
+ var sr SearchResult
+ estimate += sr.Size()
+
+ var dm search.DocumentMatch
+ sizeOfDocumentMatch := dm.Size()
+
+ // overhead from results
+ estimate += numDocMatches * sizeOfDocumentMatch
+
+ // overhead from facet results
+ if req.Facets != nil {
+ var fr search.FacetResult
+ estimate += len(req.Facets) * fr.Size()
+ }
+
+ // highlighting, store
+ var d document.Document
+ if len(req.Fields) > 0 || req.Highlight != nil {
+ for i := 0; i < (req.Size + req.From); i++ {
+ estimate += (req.Size + req.From) * d.Size()
+ }
+ }
+
+ return uint64(estimate)
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/collector.go b/vendor/github.com/blevesearch/bleve/search/collector.go
index 0d163a9d9d..df3ff9c5ab 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector.go
@@ -30,3 +30,23 @@ type Collector interface {
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults
}
+
+// DocumentMatchHandler is the type of document match callback
+// bleve will invoke during the search.
+// Eventually, bleve will indicate the completion of an ongoing search,
+// by passing a nil value for the document match callback.
+// The application should take a copy of the hit/documentMatch
+// if it wish to own it or need prolonged access to it.
+type DocumentMatchHandler func(hit *DocumentMatch) error
+
+type MakeDocumentMatchHandlerKeyType string
+
+var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
+ "MakeDocumentMatchHandlerKey")
+
+// MakeDocumentMatchHandler is an optional DocumentMatchHandler
+// builder function which the applications can pass to bleve.
+// These builder methods gives a DocumentMatchHandler function
+// to bleve, which it will invoke on every document matches.
+type MakeDocumentMatchHandler func(ctx *SearchContext) (
+ callback DocumentMatchHandler, loadID bool, err error)
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/heap.go b/vendor/github.com/blevesearch/bleve/search/collector/heap.go
index bdf72eade3..05502d5dfa 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/heap.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/heap.go
@@ -25,9 +25,9 @@ type collectStoreHeap struct {
compare collectorCompare
}
-func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
+func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{
- heap: make(search.DocumentMatchCollection, 0, cap),
+ heap: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
heap.Init(rv)
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/list.go b/vendor/github.com/blevesearch/bleve/search/collector/list.go
index ec2f69cb82..f01d205c9c 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/list.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/list.go
@@ -25,7 +25,7 @@ type collectStoreList struct {
compare collectorCompare
}
-func newStoreList(cap int, compare collectorCompare) *collectStoreList {
+func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{
results: list.New(),
compare: compare,
@@ -34,8 +34,7 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
return rv
}
-func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
- size int) *search.DocumentMatch {
+func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/slice.go b/vendor/github.com/blevesearch/bleve/search/collector/slice.go
index 32cb862447..85fe73c408 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/slice.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/slice.go
@@ -21,9 +21,9 @@ type collectStoreSlice struct {
compare collectorCompare
}
-func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
+func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{
- slice: make(search.DocumentMatchCollection, 0, cap),
+ slice: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
return rv
diff --git a/vendor/github.com/blevesearch/bleve/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
index 388370e7e7..378a7b114a 100644
--- a/vendor/github.com/blevesearch/bleve/search/collector/topn.go
+++ b/vendor/github.com/blevesearch/bleve/search/collector/topn.go
@@ -16,12 +16,21 @@ package collector
import (
"context"
+ "reflect"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTopNCollector int
+
+func init() {
+ var coll TopNCollector
+ reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
+}
+
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
@@ -58,6 +67,8 @@ type TopNCollector struct {
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
+ updateFieldVisitor index.DocumentFieldTermVisitor
+ dvReader index.DocValueReader
}
// CheckDoneEvery controls how frequently we check the context deadline
@@ -98,6 +109,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
return hc
}
+func (hc *TopNCollector) Size() int {
+ sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
+
+ if hc.facetsBuilder != nil {
+ sizeInBytes += hc.facetsBuilder.Size()
+ }
+
+ for _, entry := range hc.neededFields {
+ sizeInBytes += len(entry) + size.SizeOfString
+ }
+
+ sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
+
+ return sizeInBytes
+}
+
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
@@ -113,8 +140,34 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
+ Collector: hc,
}
+ hc.dvReader, err = reader.DocValueReader(hc.neededFields)
+ if err != nil {
+ return err
+ }
+
+ hc.updateFieldVisitor = func(field string, term []byte) {
+ if hc.facetsBuilder != nil {
+ hc.facetsBuilder.UpdateVisitor(field, term)
+ }
+ hc.sort.UpdateVisitor(field, term)
+ }
+
+ dmHandlerMaker := MakeTopNDocumentMatchHandler
+ if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
+ dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
+ }
+ // use the application given builder for making the custom document match
+ // handler and perform callbacks/invocations on the newly made handler.
+ dmHandler, loadID, err := dmHandlerMaker(searchContext)
+ if err != nil {
+ return err
+ }
+
+ hc.needDocIds = hc.needDocIds || loadID
+
select {
case <-ctx.Done():
return ctx.Err()
@@ -130,13 +183,26 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
}
}
- err = hc.collectSingle(searchContext, reader, next)
+ err = hc.prepareDocumentMatch(searchContext, reader, next)
+ if err != nil {
+ break
+ }
+
+ err = dmHandler(next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
+
+ // help finalize/flush the results in case
+ // of custom document match handlers.
+ err = dmHandler(nil)
+ if err != nil {
+ return err
+ }
+
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
@@ -152,8 +218,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var sortByScoreOpt = []string{"_score"}
-func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
- var err error
+func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
+ reader index.IndexReader, d *search.DocumentMatch) (err error) {
// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
@@ -187,33 +253,49 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
hc.sort.Value(d)
}
- // optimization, we track lowest sorting hit already removed from heap
- // with this one comparison, we can avoid all heap operations if
- // this hit would have been added and then immediately removed
- if hc.lowestMatchOutsideResults != nil {
- cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
- if cmp >= 0 {
- // this hit can't possibly be in the result set, so avoid heap ops
- ctx.DocumentMatchPool.Put(d)
- return nil
- }
- }
+ return nil
+}
- removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
- if removed != nil {
- if hc.lowestMatchOutsideResults == nil {
- hc.lowestMatchOutsideResults = removed
- } else {
- cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
- if cmp < 0 {
- tmp := hc.lowestMatchOutsideResults
- hc.lowestMatchOutsideResults = removed
- ctx.DocumentMatchPool.Put(tmp)
+func MakeTopNDocumentMatchHandler(
+ ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
+ var hc *TopNCollector
+ var ok bool
+ if hc, ok = ctx.Collector.(*TopNCollector); ok {
+ return func(d *search.DocumentMatch) error {
+ if d == nil {
+ return nil
+ }
+ // optimization, we track lowest sorting hit already removed from heap
+ // with this one comparison, we can avoid all heap operations if
+ // this hit would have been added and then immediately removed
+ if hc.lowestMatchOutsideResults != nil {
+ cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
+ hc.lowestMatchOutsideResults)
+ if cmp >= 0 {
+ // this hit can't possibly be in the result set, so avoid heap ops
+ ctx.DocumentMatchPool.Put(d)
+ return nil
+ }
}
- }
- }
- return nil
+ removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
+ if removed != nil {
+ if hc.lowestMatchOutsideResults == nil {
+ hc.lowestMatchOutsideResults = removed
+ } else {
+ cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
+ removed, hc.lowestMatchOutsideResults)
+ if cmp < 0 {
+ tmp := hc.lowestMatchOutsideResults
+ hc.lowestMatchOutsideResults = removed
+ ctx.DocumentMatchPool.Put(tmp)
+ }
+ }
+ }
+ return nil
+ }, false, nil
+ }
+ return nil, false, nil
}
// visitFieldTerms is responsible for visiting the field terms of the
@@ -223,13 +305,7 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
hc.facetsBuilder.StartDoc()
}
- err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
- if hc.facetsBuilder != nil {
- hc.facetsBuilder.UpdateVisitor(field, term)
- }
- hc.sort.UpdateVisitor(field, term)
- })
-
+ err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
@@ -257,6 +333,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
return err
}
}
+ doc.Complete(nil)
return nil
})
@@ -288,5 +365,5 @@ func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
- return search.FacetResults{}
+ return nil
}
diff --git a/vendor/github.com/blevesearch/bleve/search/explanation.go b/vendor/github.com/blevesearch/bleve/search/explanation.go
index 766367d776..3b81737b50 100644
--- a/vendor/github.com/blevesearch/bleve/search/explanation.go
+++ b/vendor/github.com/blevesearch/bleve/search/explanation.go
@@ -17,8 +17,18 @@ package search
import (
"encoding/json"
"fmt"
+ "reflect"
+
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeExplanation int
+
+func init() {
+ var e Explanation
+ reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
+}
+
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
@@ -32,3 +42,14 @@ func (expl *Explanation) String() string {
}
return string(js)
}
+
+func (expl *Explanation) Size() int {
+ sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
+ len(expl.Message)
+
+ for _, entry := range expl.Children {
+ sizeInBytes += entry.Size()
+ }
+
+ return sizeInBytes
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
index 8657a553a9..c45442e4d8 100644
--- a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
+++ b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
@@ -15,13 +15,25 @@
package facet
import (
+ "reflect"
"sort"
"time"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeDateTimeFacetBuilder int
+var reflectStaticSizedateTimeRange int
+
+func init() {
+ var dtfb DateTimeFacetBuilder
+ reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
+ var dtr dateTimeRange
+ reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
+}
+
type dateTimeRange struct {
start time.Time
end time.Time
@@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
}
}
+func (fb *DateTimeFacetBuilder) Size() int {
+ sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
+ len(fb.field)
+
+ for k, _ := range fb.termsCount {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfInt
+ }
+
+ for k, _ := range fb.ranges {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfPtr + reflectStaticSizedateTimeRange
+ }
+
+ return sizeInBytes
+}
+
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
diff --git a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
index 2ab5f27893..c1692b5498 100644
--- a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
+++ b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
@@ -15,12 +15,24 @@
package facet
import (
+ "reflect"
"sort"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeNumericFacetBuilder int
+var reflectStaticSizenumericRange int
+
+func init() {
+ var nfb NumericFacetBuilder
+ reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
+ var nr numericRange
+ reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
+}
+
type numericRange struct {
min *float64
max *float64
@@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
}
}
+func (fb *NumericFacetBuilder) Size() int {
+ sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
+ len(fb.field)
+
+ for k, _ := range fb.termsCount {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfInt
+ }
+
+ for k, _ := range fb.ranges {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfPtr + reflectStaticSizenumericRange
+ }
+
+ return sizeInBytes
+}
+
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,
diff --git a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
index a41e475a91..5b5901e01c 100644
--- a/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
+++ b/vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
@@ -15,11 +15,20 @@
package facet
import (
+ "reflect"
"sort"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTermsFacetBuilder int
+
+func init() {
+ var tfb TermsFacetBuilder
+ reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
+}
+
type TermsFacetBuilder struct {
size int
field string
@@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
}
}
+func (fb *TermsFacetBuilder) Size() int {
+ sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
+ len(fb.field)
+
+ for k, _ := range fb.termsCount {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfInt
+ }
+
+ return sizeInBytes
+}
+
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
diff --git a/vendor/github.com/blevesearch/bleve/search/facets_builder.go b/vendor/github.com/blevesearch/bleve/search/facets_builder.go
index 05e270413a..7fc0bedf30 100644
--- a/vendor/github.com/blevesearch/bleve/search/facets_builder.go
+++ b/vendor/github.com/blevesearch/bleve/search/facets_builder.go
@@ -15,11 +15,32 @@
package search
import (
+ "reflect"
"sort"
"github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeFacetsBuilder int
+var reflectStaticSizeFacetResult int
+var reflectStaticSizeTermFacet int
+var reflectStaticSizeNumericRangeFacet int
+var reflectStaticSizeDateRangeFacet int
+
+func init() {
+ var fb FacetsBuilder
+ reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
+ var fr FacetResult
+ reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
+ var tf TermFacet
+ reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
+ var nrf NumericRangeFacet
+ reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
+ var drf DateRangeFacet
+ reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
+}
+
type FacetBuilder interface {
StartDoc()
UpdateVisitor(field string, term []byte)
@@ -27,23 +48,40 @@ type FacetBuilder interface {
Result() *FacetResult
Field() string
+
+ Size() int
}
type FacetsBuilder struct {
indexReader index.IndexReader
- facets map[string]FacetBuilder
+ facetNames []string
+ facets []FacetBuilder
fields []string
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
return &FacetsBuilder{
indexReader: indexReader,
- facets: make(map[string]FacetBuilder, 0),
}
}
+func (fb *FacetsBuilder) Size() int {
+ sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
+
+ for k, v := range fb.facets {
+ sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
+ }
+
+ for _, entry := range fb.fields {
+ sizeInBytes += size.SizeOfString + len(entry)
+ }
+
+ return sizeInBytes
+}
+
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
- fb.facets[name] = facetBuilder
+ fb.facetNames = append(fb.facetNames, name)
+ fb.facets = append(fb.facets, facetBuilder)
fb.fields = append(fb.fields, facetBuilder.Field())
}
@@ -213,6 +251,14 @@ type FacetResult struct {
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
+func (fr *FacetResult) Size() int {
+ return reflectStaticSizeFacetResult + size.SizeOfPtr +
+ len(fr.Field) +
+ len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
+ len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
+ len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
+}
+
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing
@@ -287,9 +333,9 @@ func (fr FacetResults) Fixup(name string, size int) {
func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults)
- for facetName, facetBuilder := range fb.facets {
+ for i, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result()
- fr[facetName] = facetResult
+ fr[fb.facetNames[i]] = facetResult
}
return fr
}
diff --git a/vendor/github.com/blevesearch/bleve/search/levenshtein.go b/vendor/github.com/blevesearch/bleve/search/levenshtein.go
index ec033143af..687608d3ff 100644
--- a/vendor/github.com/blevesearch/bleve/search/levenshtein.go
+++ b/vendor/github.com/blevesearch/bleve/search/levenshtein.go
@@ -57,15 +57,24 @@ func LevenshteinDistance(a, b string) int {
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
+ v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
+ return v, wasMax
+}
+
+func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
la := len(a)
lb := len(b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
- return max, true
+ return max, true, d
}
- d := make([]int, la+1)
+ if cap(d) < la+1 {
+ d = make([]int, la+1)
+ }
+ d = d[:la+1]
+
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
@@ -98,8 +107,8 @@ func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
}
// after each row if rowmin isn't less than max stop
if rowmin > max {
- return max, true
+ return max, true, d
}
}
- return d[la], false
+ return d[la], false, d
}
diff --git a/vendor/github.com/blevesearch/bleve/search/pool.go b/vendor/github.com/blevesearch/bleve/search/pool.go
index b9b52a613f..ba8be8fc27 100644
--- a/vendor/github.com/blevesearch/bleve/search/pool.go
+++ b/vendor/github.com/blevesearch/bleve/search/pool.go
@@ -14,6 +14,17 @@
package search
+import (
+ "reflect"
+)
+
+var reflectStaticSizeDocumentMatchPool int
+
+func init() {
+ var dmp DocumentMatchPool
+ reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
+}
+
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log
diff --git a/vendor/github.com/blevesearch/bleve/search/query/conjunction.go b/vendor/github.com/blevesearch/bleve/search/query/conjunction.go
index 39cc312de8..1a7ed1bc05 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/conjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/conjunction.go
@@ -70,9 +70,11 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
}
ss = append(ss, sr)
}
+
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
+
return searcher.NewConjunctionSearcher(i, ss, options)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
index dacc3a75b1..2bc1d70441 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/disjunction.go
@@ -58,7 +58,8 @@ func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m
}
-func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
+func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
+ options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(i, m, options)
@@ -76,9 +77,17 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
}
ss = append(ss, sr)
}
+
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
+ } else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
+ // apply optimization only if both conditions below are satisfied:
+ // - disjunction searcher has only 1 child searcher
+ // - parent searcher's min setting is equal to child searcher's min
+
+ return ss[0], nil
}
+
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/query.go b/vendor/github.com/blevesearch/bleve/search/query/query.go
index 1b0d94c012..c7c1eefb80 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/query.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/query.go
@@ -296,32 +296,28 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
}
expand = func(query Query) (Query, error) {
- switch query.(type) {
+ switch q := query.(type) {
case *QueryStringQuery:
- q := query.(*QueryStringQuery)
parsed, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
}
return expand(parsed)
case *ConjunctionQuery:
- q := *query.(*ConjunctionQuery)
children, err := expandSlice(q.Conjuncts)
if err != nil {
return nil, err
}
q.Conjuncts = children
- return &q, nil
+ return q, nil
case *DisjunctionQuery:
- q := *query.(*DisjunctionQuery)
children, err := expandSlice(q.Disjuncts)
if err != nil {
return nil, err
}
q.Disjuncts = children
- return &q, nil
+ return q, nil
case *BooleanQuery:
- q := *query.(*BooleanQuery)
var err error
q.Must, err = expand(q.Must)
if err != nil {
@@ -335,7 +331,7 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
if err != nil {
return nil, err
}
- return &q, nil
+ return q, nil
default:
return query, nil
}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go b/vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
index 9c59ceddea..3a9cf23983 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
@@ -273,6 +273,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// see where to go
if !l.seenDot && next == '.' {
// stay in this state
+ l.seenDot = true
l.buf += string(next)
return inNumOrStrState, true
} else if unicode.IsDigit(next) {
diff --git a/vendor/github.com/blevesearch/bleve/search/query/regexp.go b/vendor/github.com/blevesearch/bleve/search/query/regexp.go
index 09544fcf1b..0c87a6f92e 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/regexp.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/regexp.go
@@ -15,7 +15,6 @@
package query
import (
- "regexp"
"strings"
"github.com/blevesearch/bleve/index"
@@ -28,7 +27,6 @@ type RegexpQuery struct {
Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
- compiled *regexp.Regexp
}
// NewRegexpQuery creates a new Query which finds
@@ -64,33 +62,20 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
- err := q.compile()
- if err != nil {
- return nil, err
+
+ // require that pattern NOT be anchored to start and end of term.
+ // do not attempt to remove trailing $, its presence is not
+ // known to interfere with LiteralPrefix() the way ^ does
+ // and removing $ introduces possible ambiguities with escaped \$, \\$, etc
+ actualRegexp := q.Regexp
+ if strings.HasPrefix(actualRegexp, "^") {
+ actualRegexp = actualRegexp[1:] // remove leading ^
}
- return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
+ return searcher.NewRegexpStringSearcher(i, actualRegexp, field,
+ q.BoostVal.Value(), options)
}
func (q *RegexpQuery) Validate() error {
- return q.compile()
-}
-
-func (q *RegexpQuery) compile() error {
- if q.compiled == nil {
- // require that pattern NOT be anchored to start and end of term
- actualRegexp := q.Regexp
- if strings.HasPrefix(actualRegexp, "^") {
- actualRegexp = actualRegexp[1:] // remove leading ^
- }
- // do not attempt to remove trailing $, it's presence is not
- // known to interfere with LiteralPrefix() the way ^ does
- // and removing $ introduces possible ambiguities with escaped \$, \\$, etc
- var err error
- q.compiled, err = regexp.Compile(actualRegexp)
- if err != nil {
- return err
- }
- }
- return nil
+ return nil // real validation delayed until searcher constructor
}
diff --git a/vendor/github.com/blevesearch/bleve/search/query/wildcard.go b/vendor/github.com/blevesearch/bleve/search/query/wildcard.go
index 7fd7482c4d..747dfe76ff 100644
--- a/vendor/github.com/blevesearch/bleve/search/query/wildcard.go
+++ b/vendor/github.com/blevesearch/bleve/search/query/wildcard.go
@@ -15,7 +15,6 @@
package query
import (
- "regexp"
"strings"
"github.com/blevesearch/bleve/index"
@@ -47,7 +46,6 @@ type WildcardQuery struct {
Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
- compiled *regexp.Regexp
}
// NewWildcardQuery creates a new Query which finds
@@ -83,24 +81,13 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
- if q.compiled == nil {
- var err error
- q.compiled, err = q.convertToRegexp()
- if err != nil {
- return nil, err
- }
- }
- return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
-}
+ regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
-func (q *WildcardQuery) Validate() error {
- var err error
- q.compiled, err = q.convertToRegexp()
- return err
+ return searcher.NewRegexpStringSearcher(i, regexpString, field,
+ q.BoostVal.Value(), options)
}
-func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
- regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
- return regexp.Compile(regexpString)
+func (q *WildcardQuery) Validate() error {
+ return nil // real validation delayed until searcher constructor
}
diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
index aad6f9c160..48cdf3ae90 100644
--- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
@@ -15,13 +15,27 @@
package scorer
import (
+ "reflect"
+
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeConjunctionQueryScorer int
+
+func init() {
+ var cqs ConjunctionQueryScorer
+ reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
+}
+
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
+func (s *ConjunctionQueryScorer) Size() int {
+ return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
+}
+
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,
@@ -35,15 +49,11 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
childrenExplanations = make([]*search.Explanation, len(constituents))
}
- locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
- if docMatch.Locations != nil {
- locations = append(locations, docMatch.Locations)
- }
}
newScore := sum
var newExpl *search.Explanation
@@ -55,11 +65,8 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
- if len(locations) == 1 {
- rv.Locations = locations[0]
- } else if len(locations) > 1 {
- rv.Locations = search.MergeLocations(locations)
- }
+ rv.FieldTermLocations = search.MergeFieldTermLocations(
+ rv.FieldTermLocations, constituents[1:])
return rv
}
diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
index a65a826f2d..dc10fdaa4e 100644
--- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
+++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
@@ -16,11 +16,20 @@ package scorer
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeConstantScorer int
+
+func init() {
+ var cs ConstantScorer
+ reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
+}
+
type ConstantScorer struct {
constant float64
boost float64
@@ -30,6 +39,16 @@ type ConstantScorer struct {
queryWeightExplanation *search.Explanation
}
+func (s *ConstantScorer) Size() int {
+ sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
+
+ if s.queryWeightExplanation != nil {
+ sizeInBytes += s.queryWeightExplanation.Size()
+ }
+
+ return sizeInBytes
+}
+
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
options: options,
diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
index 184a15d276..7a955e168e 100644
--- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
@@ -16,14 +16,27 @@ package scorer
import (
"fmt"
+ "reflect"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeDisjunctionQueryScorer int
+
+func init() {
+ var dqs DisjunctionQueryScorer
+ reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
+}
+
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
+func (s *DisjunctionQueryScorer) Size() int {
+ return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
+}
+
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,
@@ -37,15 +50,11 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
childrenExplanations = make([]*search.Explanation, len(constituents))
}
- var locations []search.FieldTermLocationMap
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
- if docMatch.Locations != nil {
- locations = append(locations, docMatch.Locations)
- }
}
var rawExpl *search.Explanation
@@ -67,11 +76,8 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
- if len(locations) == 1 {
- rv.Locations = locations[0]
- } else if len(locations) > 1 {
- rv.Locations = search.MergeLocations(locations)
- }
+ rv.FieldTermLocations = search.MergeFieldTermLocations(
+ rv.FieldTermLocations, constituents[1:])
return rv
}
diff --git a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
index b5f46322ca..5544f2d011 100644
--- a/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
+++ b/vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
@@ -17,13 +17,22 @@ package scorer
import (
"fmt"
"math"
+ "reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTermQueryScorer int
+
+func init() {
+ var tqs TermQueryScorer
+ reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
+}
+
type TermQueryScorer struct {
- queryTerm []byte
+ queryTerm string
queryField string
queryBoost float64
docTerm uint64
@@ -36,9 +45,24 @@ type TermQueryScorer struct {
queryWeightExplanation *search.Explanation
}
+func (s *TermQueryScorer) Size() int {
+ sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
+ len(s.queryTerm) + len(s.queryField)
+
+ if s.idfExplanation != nil {
+ sizeInBytes += s.idfExplanation.Size()
+ }
+
+ if s.queryWeightExplanation != nil {
+ sizeInBytes += s.queryWeightExplanation.Size()
+ }
+
+ return sizeInBytes
+}
+
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
- queryTerm: queryTerm,
+ queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
@@ -82,7 +106,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
- Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
+ Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
Children: childrenExplanations,
}
}
@@ -104,7 +128,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
- Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
+ Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
@@ -113,7 +137,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
- Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
+ Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
@@ -127,7 +151,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
- Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
+ Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
@@ -140,41 +164,31 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
rv.Expl = scoreExplanation
}
- if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
- locs := make([]search.Location, len(termMatch.Vectors))
- locsUsed := 0
-
- totalPositions := 0
- for _, v := range termMatch.Vectors {
- totalPositions += len(v.ArrayPositions)
+ if len(termMatch.Vectors) > 0 {
+ if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
+ rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
}
- positions := make(search.ArrayPositions, totalPositions)
- positionsUsed := 0
- rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors {
- tlm := rv.Locations[v.Field]
- if tlm == nil {
- tlm = make(search.TermLocationMap)
- rv.Locations[v.Field] = tlm
- }
-
- loc := &locs[locsUsed]
- locsUsed++
-
- loc.Pos = v.Pos
- loc.Start = v.Start
- loc.End = v.End
-
+ var ap search.ArrayPositions
if len(v.ArrayPositions) > 0 {
- loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
- for i, ap := range v.ArrayPositions {
- loc.ArrayPositions[i] = ap
+ n := len(rv.FieldTermLocations)
+ if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
+ ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
}
- positionsUsed += len(v.ArrayPositions)
+ ap = append(ap, v.ArrayPositions...)
}
-
- tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
+ rv.FieldTermLocations =
+ append(rv.FieldTermLocations, search.FieldTermLocation{
+ Field: v.Field,
+ Term: s.queryTerm,
+ Location: search.Location{
+ Pos: v.Pos,
+ Start: v.Start,
+ End: v.End,
+ ArrayPositions: ap,
+ },
+ })
}
}
diff --git a/vendor/github.com/blevesearch/bleve/search/search.go b/vendor/github.com/blevesearch/bleve/search/search.go
index f9a92783b7..f8a282d165 100644
--- a/vendor/github.com/blevesearch/bleve/search/search.go
+++ b/vendor/github.com/blevesearch/bleve/search/search.go
@@ -16,11 +16,25 @@ package search
import (
"fmt"
+ "reflect"
- "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeDocumentMatch int
+var reflectStaticSizeSearchContext int
+var reflectStaticSizeLocation int
+
+func init() {
+ var dm DocumentMatch
+ reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
+ var sc SearchContext
+ reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
+ var l Location
+ reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
+}
+
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
@@ -47,6 +61,11 @@ type Location struct {
ArrayPositions ArrayPositions `json:"array_positions"`
}
+func (l *Location) Size() int {
+ return reflectStaticSizeLocation + size.SizeOfPtr +
+ len(l.ArrayPositions)*size.SizeOfUint64
+}
+
type Locations []*Location
type TermLocationMap map[string]Locations
@@ -57,6 +76,12 @@ func (t TermLocationMap) AddLocation(term string, location *Location) {
type FieldTermLocationMap map[string]TermLocationMap
+type FieldTermLocation struct {
+ Field string
+ Term string
+ Location Location
+}
+
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
@@ -74,11 +99,14 @@ type DocumentMatch struct {
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
- // if we load the document for this hit, remember it so we dont load again
- Document *document.Document `json:"-"`
-
// used to maintain natural index order
HitNumber uint64 `json:"-"`
+
+ // used to temporarily hold field term location information during
+ // search processing in an efficient, recycle-friendly manner, to
+ // be later incorporated into the Locations map when search
+ // results are completed
+ FieldTermLocations []FieldTermLocation `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@@ -108,15 +136,116 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
+ // remember the FieldTermLocations backing array
+ ftls := dm.FieldTermLocations
+ for i := range ftls { // recycle the ArrayPositions of each location
+ ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
+ }
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
+ // reuse the FieldTermLocations already allocated (and reset len to 0)
+ dm.FieldTermLocations = ftls[:0]
return dm
}
+func (dm *DocumentMatch) Size() int {
+ sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
+ len(dm.Index) +
+ len(dm.ID) +
+ len(dm.IndexInternalID)
+
+ if dm.Expl != nil {
+ sizeInBytes += dm.Expl.Size()
+ }
+
+ for k, v := range dm.Locations {
+ sizeInBytes += size.SizeOfString + len(k)
+ for k1, v1 := range v {
+ sizeInBytes += size.SizeOfString + len(k1) +
+ size.SizeOfSlice
+ for _, entry := range v1 {
+ sizeInBytes += entry.Size()
+ }
+ }
+ }
+
+ for k, v := range dm.Fragments {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfSlice
+
+ for _, entry := range v {
+ sizeInBytes += size.SizeOfString + len(entry)
+ }
+ }
+
+ for _, entry := range dm.Sort {
+ sizeInBytes += size.SizeOfString + len(entry)
+ }
+
+ for k, _ := range dm.Fields {
+ sizeInBytes += size.SizeOfString + len(k) +
+ size.SizeOfPtr
+ }
+
+ return sizeInBytes
+}
+
+// Complete performs final preparation & transformation of the
+// DocumentMatch at the end of search processing, also allowing the
+// caller to provide an optional preallocated locations slice
+func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
+ // transform the FieldTermLocations slice into the Locations map
+ nlocs := len(dm.FieldTermLocations)
+ if nlocs > 0 {
+ if cap(prealloc) < nlocs {
+ prealloc = make([]Location, nlocs)
+ }
+ prealloc = prealloc[:nlocs]
+
+ var lastField string
+ var tlm TermLocationMap
+
+ for i, ftl := range dm.FieldTermLocations {
+ if lastField != ftl.Field {
+ lastField = ftl.Field
+
+ if dm.Locations == nil {
+ dm.Locations = make(FieldTermLocationMap)
+ }
+
+ tlm = dm.Locations[ftl.Field]
+ if tlm == nil {
+ tlm = make(TermLocationMap)
+ dm.Locations[ftl.Field] = tlm
+ }
+ }
+
+ loc := &prealloc[i]
+ *loc = ftl.Location
+
+ if len(loc.ArrayPositions) > 0 { // copy
+ loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
+ }
+
+ tlm[ftl.Term] = append(tlm[ftl.Term], loc)
+
+ dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
+ Location: Location{
+ ArrayPositions: ftl.Location.ArrayPositions[:0],
+ },
+ }
+ }
+ }
+
+ dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
+
+ return prealloc
+}
+
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
@@ -135,6 +264,7 @@ type Searcher interface {
SetQueryNorm(float64)
Count() uint64
Min() int
+ Size() int
DocumentMatchPoolSize() int
}
@@ -142,9 +272,26 @@ type Searcher interface {
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
+ Score string
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
+ Collector Collector
+}
+
+func (sc *SearchContext) Size() int {
+ sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
+ reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
+
+ if sc.DocumentMatchPool != nil {
+ for _, entry := range sc.DocumentMatchPool.avail {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+ }
+
+ return sizeInBytes
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
index a905c29e50..bbbced4795 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
@@ -16,12 +16,21 @@ package searcher
import (
"math"
+ "reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeBooleanSearcher int
+
+func init() {
+ var bs BooleanSearcher
+ reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
+}
+
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
@@ -52,6 +61,32 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc
return &rv, nil
}
+func (s *BooleanSearcher) Size() int {
+ sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
+
+ if s.mustSearcher != nil {
+ sizeInBytes += s.mustSearcher.Size()
+ }
+
+ if s.shouldSearcher != nil {
+ sizeInBytes += s.shouldSearcher.Size()
+ }
+
+ if s.mustNotSearcher != nil {
+ sizeInBytes += s.mustNotSearcher.Size()
+ }
+
+ sizeInBytes += s.scorer.Size()
+
+ for _, entry := range s.matches {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ return sizeInBytes
+}
+
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
@@ -284,6 +319,7 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
return nil, err
}
}
+
return rv, nil
}
@@ -296,41 +332,52 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
}
- var err error
- if s.mustSearcher != nil {
- if s.currMust != nil {
- ctx.DocumentMatchPool.Put(s.currMust)
- }
- s.currMust, err = s.mustSearcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
- }
- }
- if s.shouldSearcher != nil {
- if s.currShould != nil {
- ctx.DocumentMatchPool.Put(s.currShould)
- }
- s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
+ // Advance the searchers only if the currentID cursor is trailing the lookup ID,
+ // additionally if the mustNotSearcher has been initialized, ensure that the
+ // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
+ // currentID) is trailing the lookup ID as well - for in the case where currentID
+ // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
+ // advance the currentID or the currMustNot cursors.
+ if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
+ (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
+ var err error
+ if s.mustSearcher != nil {
+ if s.currMust != nil {
+ ctx.DocumentMatchPool.Put(s.currMust)
+ }
+ s.currMust, err = s.mustSearcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
}
- }
- if s.mustNotSearcher != nil {
- if s.currMustNot != nil {
- ctx.DocumentMatchPool.Put(s.currMustNot)
+
+ if s.shouldSearcher != nil {
+ if s.currShould != nil {
+ ctx.DocumentMatchPool.Put(s.currShould)
+ }
+ s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
}
- s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
+
+ if s.mustNotSearcher != nil {
+ if s.currMustNot != nil {
+ ctx.DocumentMatchPool.Put(s.currMustNot)
+ }
+ s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
}
- }
- if s.mustSearcher != nil && s.currMust != nil {
- s.currentID = s.currMust.IndexInternalID
- } else if s.mustSearcher == nil && s.currShould != nil {
- s.currentID = s.currShould.IndexInternalID
- } else {
- s.currentID = nil
+ if s.mustSearcher != nil && s.currMust != nil {
+ s.currentID = s.currMust.IndexInternalID
+ } else if s.mustSearcher == nil && s.currShould != nil {
+ s.currentID = s.currShould.IndexInternalID
+ } else {
+ s.currentID = nil
+ }
}
return s.Next(ctx)
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
index 73fba19cd0..ac737bccd0 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
@@ -16,13 +16,22 @@ package searcher
import (
"math"
+ "reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeConjunctionSearcher int
+
+func init() {
+ var cs ConjunctionSearcher
+ reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
+}
+
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
@@ -34,14 +43,27 @@ type ConjunctionSearcher struct {
options search.SearcherOptions
}
-func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
- // build the downstream searchers
+func NewConjunctionSearcher(indexReader index.IndexReader,
+ qsearchers []search.Searcher, options search.SearcherOptions) (
+ search.Searcher, error) {
+ // build the sorted downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
- // sort the searchers
sort.Sort(searchers)
+
+ // attempt the "unadorned" conjunction optimization only when we
+ // do not need extra information like freq-norm's or term vectors
+ if len(searchers) > 1 &&
+ options.Score == "none" && !options.IncludeTermVectors {
+ rv, err := optimizeCompositeSearcher("conjunction:unadorned",
+ indexReader, searchers, options)
+ if err != nil || rv != nil {
+ return rv, err
+ }
+ }
+
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
@@ -51,9 +73,36 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
+
+ // attempt push-down conjunction optimization when there's >1 searchers
+ if len(searchers) > 1 {
+ rv, err := optimizeCompositeSearcher("conjunction",
+ indexReader, searchers, options)
+ if err != nil || rv != nil {
+ return rv, err
+ }
+ }
+
return &rv, nil
}
+func (s *ConjunctionSearcher) Size() int {
+ sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
+ s.scorer.Size()
+
+ for _, entry := range s.searchers {
+ sizeInBytes += entry.Size()
+ }
+
+ for _, entry := range s.currs {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ return sizeInBytes
+}
+
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
@@ -108,7 +157,7 @@ func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
var rv *search.DocumentMatch
var err error
OUTER:
- for s.currs[s.maxIDIdx] != nil {
+ for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
maxID := s.currs[s.maxIDIdx].IndexInternalID
i := 0
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
index b6910ddb67..6a296b68fa 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2014 Couchbase, Inc.
+// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -16,12 +16,9 @@ package searcher
import (
"fmt"
- "math"
- "sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
- "github.com/blevesearch/bleve/search/scorer"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
@@ -29,246 +26,84 @@ import (
// error instead of exeucting searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
-type DisjunctionSearcher struct {
- indexReader index.IndexReader
- searchers OrderedSearcherList
- numSearchers int
- queryNorm float64
- currs []*search.DocumentMatch
- scorer *scorer.DisjunctionQueryScorer
- min int
- matching []*search.DocumentMatch
- matchingIdxs []int
- initialized bool
-}
-
-func tooManyClauses(count int) bool {
- if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
- return true
- }
- return false
-}
-
-func tooManyClausesErr() error {
- return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
- DisjunctionMaxClauseCount)
-}
+// DisjunctionHeapTakeover is a compile time setting that applications can
+// adjust to control when the DisjunctionSearcher will switch from a simple
+// slice implementation to a heap implementation.
+var DisjunctionHeapTakeover = 10
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
- *DisjunctionSearcher, error) {
- return newDisjunctionSearcher(indexReader, qsearchers, min, options,
- true)
+ search.Searcher, error) {
+ return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
- limit bool) (
- *DisjunctionSearcher, error) {
- if limit && tooManyClauses(len(qsearchers)) {
- return nil, tooManyClausesErr()
- }
- // build the downstream searchers
- searchers := make(OrderedSearcherList, len(qsearchers))
- for i, searcher := range qsearchers {
- searchers[i] = searcher
- }
- // sort the searchers
- sort.Sort(sort.Reverse(searchers))
- // build our searcher
- rv := DisjunctionSearcher{
- indexReader: indexReader,
- searchers: searchers,
- numSearchers: len(searchers),
- currs: make([]*search.DocumentMatch, len(searchers)),
- scorer: scorer.NewDisjunctionQueryScorer(options),
- min: int(min),
- matching: make([]*search.DocumentMatch, len(searchers)),
- matchingIdxs: make([]int, len(searchers)),
- }
- rv.computeQueryNorm()
- return &rv, nil
-}
-
-func (s *DisjunctionSearcher) computeQueryNorm() {
- // first calculate sum of squared weights
- sumOfSquaredWeights := 0.0
- for _, searcher := range s.searchers {
- sumOfSquaredWeights += searcher.Weight()
- }
- // now compute query norm from this
- s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
- // finally tell all the downstream searchers the norm
- for _, searcher := range s.searchers {
- searcher.SetQueryNorm(s.queryNorm)
- }
-}
-
-func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
- var err error
- // get all searchers pointing at their first match
- for i, searcher := range s.searchers {
- if s.currs[i] != nil {
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = searcher.Next(ctx)
- if err != nil {
- return err
+ limit bool) (search.Searcher, error) {
+ // attempt the "unadorned" disjunction optimization only when we
+ // do not need extra information like freq-norm's or term vectors
+ // and the requested min is simple
+ if len(qsearchers) > 1 && min <= 1 &&
+ options.Score == "none" && !options.IncludeTermVectors {
+ rv, err := optimizeCompositeSearcher("disjunction:unadorned",
+ indexReader, qsearchers, options)
+ if err != nil || rv != nil {
+ return rv, err
}
}
- err = s.updateMatches()
- if err != nil {
- return err
+ if len(qsearchers) > DisjunctionHeapTakeover {
+ return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
+ limit)
}
-
- s.initialized = true
- return nil
+ return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options,
+ limit)
}
-func (s *DisjunctionSearcher) updateMatches() error {
- matching := s.matching[:0]
- matchingIdxs := s.matchingIdxs[:0]
-
- for i := 0; i < len(s.currs); i++ {
- curr := s.currs[i]
- if curr == nil {
- continue
- }
-
- if len(matching) > 0 {
- cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
- if cmp > 0 {
- continue
- }
+func optimizeCompositeSearcher(optimizationKind string,
+ indexReader index.IndexReader, qsearchers []search.Searcher,
+ options search.SearcherOptions) (search.Searcher, error) {
+ var octx index.OptimizableContext
- if cmp < 0 {
- matching = matching[:0]
- matchingIdxs = matchingIdxs[:0]
- }
+ for _, searcher := range qsearchers {
+ o, ok := searcher.(index.Optimizable)
+ if !ok {
+ return nil, nil
}
- matching = append(matching, curr)
- matchingIdxs = append(matchingIdxs, i)
- }
-
- s.matching = matching
- s.matchingIdxs = matchingIdxs
-
- return nil
-}
-
-func (s *DisjunctionSearcher) Weight() float64 {
- var rv float64
- for _, searcher := range s.searchers {
- rv += searcher.Weight()
- }
- return rv
-}
-
-func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
- for _, searcher := range s.searchers {
- searcher.SetQueryNorm(qnorm)
- }
-}
-
-func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
- *search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
- var err error
- var rv *search.DocumentMatch
-
- found := false
- for !found && len(s.matching) > 0 {
- if len(s.matching) >= s.min {
- found = true
- // score this match
- rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
- }
-
- // invoke next on all the matching searchers
- for _, i := range s.matchingIdxs {
- searcher := s.searchers[i]
- if s.currs[i] != rv {
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = searcher.Next(ctx)
- if err != nil {
- return nil, err
- }
- }
-
- err = s.updateMatches()
+ var err error
+ octx, err = o.Optimize(optimizationKind, octx)
if err != nil {
return nil, err
}
- }
- return rv, nil
-}
-func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
- ID index.IndexInternalID) (*search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
- // get all searchers pointing at their first match
- var err error
- for i, searcher := range s.searchers {
- if s.currs[i] != nil {
- if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
- continue
- }
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = searcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
+ if octx == nil {
+ return nil, nil
}
}
- err = s.updateMatches()
- if err != nil {
+ optimized, err := octx.Finish()
+ if err != nil || optimized == nil {
return nil, err
}
- return s.Next(ctx)
-}
-
-func (s *DisjunctionSearcher) Count() uint64 {
- // for now return a worst case
- var sum uint64
- for _, searcher := range s.searchers {
- sum += searcher.Count()
+ tfr, ok := optimized.(index.TermFieldReader)
+ if !ok {
+ return nil, nil
}
- return sum
-}
-func (s *DisjunctionSearcher) Close() (rv error) {
- for _, searcher := range s.searchers {
- err := searcher.Close()
- if err != nil && rv == nil {
- rv = err
- }
- }
- return rv
+ return newTermSearcherFromReader(indexReader, tfr,
+ []byte(optimizationKind), "*", 1.0, options)
}
-func (s *DisjunctionSearcher) Min() int {
- return s.min
+func tooManyClauses(count int) bool {
+ if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
+ return true
+ }
+ return false
}
-func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
- rv := len(s.currs)
- for _, s := range s.searchers {
- rv += s.DocumentMatchPoolSize()
- }
- return rv
+func tooManyClausesErr(count int) error {
+ return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
+ count, DisjunctionMaxClauseCount)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
new file mode 100644
index 0000000000..ec133f1f83
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
@@ -0,0 +1,343 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+ "bytes"
+ "container/heap"
+ "math"
+ "reflect"
+
+ "github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDisjunctionHeapSearcher int
+var reflectStaticSizeSearcherCurr int
+
+func init() {
+ var dhs DisjunctionHeapSearcher
+ reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
+
+ var sc SearcherCurr
+ reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
+}
+
+type SearcherCurr struct {
+ searcher search.Searcher
+ curr *search.DocumentMatch
+}
+
+type DisjunctionHeapSearcher struct {
+ indexReader index.IndexReader
+
+ numSearchers int
+ scorer *scorer.DisjunctionQueryScorer
+ min int
+ queryNorm float64
+ initialized bool
+ searchers []search.Searcher
+ heap []*SearcherCurr
+
+ matching []*search.DocumentMatch
+ matchingCurrs []*SearcherCurr
+}
+
+func newDisjunctionHeapSearcher(indexReader index.IndexReader,
+ searchers []search.Searcher, min float64, options search.SearcherOptions,
+ limit bool) (
+ *DisjunctionHeapSearcher, error) {
+ if limit && tooManyClauses(len(searchers)) {
+ return nil, tooManyClausesErr(len(searchers))
+ }
+
+ // build our searcher
+ rv := DisjunctionHeapSearcher{
+ indexReader: indexReader,
+ searchers: searchers,
+ numSearchers: len(searchers),
+ scorer: scorer.NewDisjunctionQueryScorer(options),
+ min: int(min),
+ matching: make([]*search.DocumentMatch, len(searchers)),
+ matchingCurrs: make([]*SearcherCurr, len(searchers)),
+ heap: make([]*SearcherCurr, 0, len(searchers)),
+ }
+ rv.computeQueryNorm()
+ return &rv, nil
+}
+
+func (s *DisjunctionHeapSearcher) Size() int {
+ sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
+ s.scorer.Size()
+
+ for _, entry := range s.searchers {
+ sizeInBytes += entry.Size()
+ }
+
+ for _, entry := range s.matching {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ // for matchingCurrs and heap, just use static size * len
+ // since searchers and document matches already counted above
+ sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
+ sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
+
+ return sizeInBytes
+}
+
+func (s *DisjunctionHeapSearcher) computeQueryNorm() {
+ // first calculate sum of squared weights
+ sumOfSquaredWeights := 0.0
+ for _, searcher := range s.searchers {
+ sumOfSquaredWeights += searcher.Weight()
+ }
+ // now compute query norm from this
+ s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
+ // finally tell all the downstream searchers the norm
+ for _, searcher := range s.searchers {
+ searcher.SetQueryNorm(s.queryNorm)
+ }
+}
+
+func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
+ // alloc a single block of SearcherCurrs
+ block := make([]SearcherCurr, len(s.searchers))
+
+ // get all searchers pointing at their first match
+ for i, searcher := range s.searchers {
+ curr, err := searcher.Next(ctx)
+ if err != nil {
+ return err
+ }
+ if curr != nil {
+ block[i].searcher = searcher
+ block[i].curr = curr
+ heap.Push(s, &block[i])
+ }
+ }
+
+ err := s.updateMatches()
+ if err != nil {
+ return err
+ }
+ s.initialized = true
+ return nil
+}
+
+func (s *DisjunctionHeapSearcher) updateMatches() error {
+ matching := s.matching[:0]
+ matchingCurrs := s.matchingCurrs[:0]
+
+ if len(s.heap) > 0 {
+
+ // top of the heap is our next hit
+ next := heap.Pop(s).(*SearcherCurr)
+ matching = append(matching, next.curr)
+ matchingCurrs = append(matchingCurrs, next)
+
+ // now as long as top of heap matches, keep popping
+ for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
+ next = heap.Pop(s).(*SearcherCurr)
+ matching = append(matching, next.curr)
+ matchingCurrs = append(matchingCurrs, next)
+ }
+ }
+
+ s.matching = matching
+ s.matchingCurrs = matchingCurrs
+
+ return nil
+}
+
+func (s *DisjunctionHeapSearcher) Weight() float64 {
+ var rv float64
+ for _, searcher := range s.searchers {
+ rv += searcher.Weight()
+ }
+ return rv
+}
+
+func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
+ for _, searcher := range s.searchers {
+ searcher.SetQueryNorm(qnorm)
+ }
+}
+
+func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
+ *search.DocumentMatch, error) {
+ if !s.initialized {
+ err := s.initSearchers(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ var rv *search.DocumentMatch
+ found := false
+ for !found && len(s.matching) > 0 {
+ if len(s.matching) >= s.min {
+ found = true
+ // score this match
+ rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
+ }
+
+ // invoke next on all the matching searchers
+ for _, matchingCurr := range s.matchingCurrs {
+ if matchingCurr.curr != rv {
+ ctx.DocumentMatchPool.Put(matchingCurr.curr)
+ }
+ curr, err := matchingCurr.searcher.Next(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if curr != nil {
+ matchingCurr.curr = curr
+ heap.Push(s, matchingCurr)
+ }
+ }
+
+ err := s.updateMatches()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return rv, nil
+}
+
+func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
+ ID index.IndexInternalID) (*search.DocumentMatch, error) {
+ if !s.initialized {
+ err := s.initSearchers(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // if there is anything in matching, toss it back onto the heap
+ for _, matchingCurr := range s.matchingCurrs {
+ heap.Push(s, matchingCurr)
+ }
+ s.matching = s.matching[:0]
+ s.matchingCurrs = s.matchingCurrs[:0]
+
+ // find all searchers that actually need to be advanced
+ // advance them, using s.matchingCurrs as temp storage
+ for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
+ searcherCurr := heap.Pop(s).(*SearcherCurr)
+ ctx.DocumentMatchPool.Put(searcherCurr.curr)
+ curr, err := searcherCurr.searcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
+ if curr != nil {
+ searcherCurr.curr = curr
+ s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
+ }
+ }
+ // now all of the searchers that we advanced have to be pushed back
+ for _, matchingCurr := range s.matchingCurrs {
+ heap.Push(s, matchingCurr)
+ }
+ // reset our temp space
+ s.matchingCurrs = s.matchingCurrs[:0]
+
+ err := s.updateMatches()
+ if err != nil {
+ return nil, err
+ }
+
+ return s.Next(ctx)
+}
+
+func (s *DisjunctionHeapSearcher) Count() uint64 {
+ // for now return a worst case
+ var sum uint64
+ for _, searcher := range s.searchers {
+ sum += searcher.Count()
+ }
+ return sum
+}
+
+func (s *DisjunctionHeapSearcher) Close() (rv error) {
+ for _, searcher := range s.searchers {
+ err := searcher.Close()
+ if err != nil && rv == nil {
+ rv = err
+ }
+ }
+ return rv
+}
+
+func (s *DisjunctionHeapSearcher) Min() int {
+ return s.min
+}
+
+func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
+ rv := len(s.searchers)
+ for _, s := range s.searchers {
+ rv += s.DocumentMatchPoolSize()
+ }
+ return rv
+}
+
+// a disjunction searcher implements the index.Optimizable interface
+// but only activates on an edge case where the disjunction is a
+// wrapper around a single Optimizable child searcher
+func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
+ index.OptimizableContext, error) {
+ if len(s.searchers) == 1 {
+ o, ok := s.searchers[0].(index.Optimizable)
+ if ok {
+ return o.Optimize(kind, octx)
+ }
+ }
+
+ return octx, nil
+}
+
+// heap impl
+
+func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
+
+func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
+ if s.heap[i].curr == nil {
+ return true
+ } else if s.heap[j].curr == nil {
+ return false
+ }
+ return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
+}
+
+func (s *DisjunctionHeapSearcher) Swap(i, j int) {
+ s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
+}
+
+func (s *DisjunctionHeapSearcher) Push(x interface{}) {
+ s.heap = append(s.heap, x.(*SearcherCurr))
+}
+
+func (s *DisjunctionHeapSearcher) Pop() interface{} {
+ old := s.heap
+ n := len(old)
+ x := old[n-1]
+ s.heap = old[0 : n-1]
+ return x
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
new file mode 100644
index 0000000000..e47f39ad09
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
@@ -0,0 +1,298 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package searcher
+
+import (
+ "math"
+ "reflect"
+ "sort"
+
+ "github.com/blevesearch/bleve/index"
+ "github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
+)
+
+var reflectStaticSizeDisjunctionSliceSearcher int
+
+func init() {
+ var ds DisjunctionSliceSearcher
+ reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size())
+}
+
+type DisjunctionSliceSearcher struct {
+ indexReader index.IndexReader
+ searchers OrderedSearcherList
+ numSearchers int
+ queryNorm float64
+ currs []*search.DocumentMatch
+ scorer *scorer.DisjunctionQueryScorer
+ min int
+ matching []*search.DocumentMatch
+ matchingIdxs []int
+ initialized bool
+}
+
+func newDisjunctionSliceSearcher(indexReader index.IndexReader,
+ qsearchers []search.Searcher, min float64, options search.SearcherOptions,
+ limit bool) (
+ *DisjunctionSliceSearcher, error) {
+ if limit && tooManyClauses(len(qsearchers)) {
+ return nil, tooManyClausesErr(len(qsearchers))
+ }
+ // build the downstream searchers
+ searchers := make(OrderedSearcherList, len(qsearchers))
+ for i, searcher := range qsearchers {
+ searchers[i] = searcher
+ }
+ // sort the searchers
+ sort.Sort(sort.Reverse(searchers))
+ // build our searcher
+ rv := DisjunctionSliceSearcher{
+ indexReader: indexReader,
+ searchers: searchers,
+ numSearchers: len(searchers),
+ currs: make([]*search.DocumentMatch, len(searchers)),
+ scorer: scorer.NewDisjunctionQueryScorer(options),
+ min: int(min),
+ matching: make([]*search.DocumentMatch, len(searchers)),
+ matchingIdxs: make([]int, len(searchers)),
+ }
+ rv.computeQueryNorm()
+ return &rv, nil
+}
+
+func (s *DisjunctionSliceSearcher) Size() int {
+ sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
+ s.scorer.Size()
+
+ for _, entry := range s.searchers {
+ sizeInBytes += entry.Size()
+ }
+
+ for _, entry := range s.currs {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ for _, entry := range s.matching {
+ if entry != nil {
+ sizeInBytes += entry.Size()
+ }
+ }
+
+ sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
+
+ return sizeInBytes
+}
+
+func (s *DisjunctionSliceSearcher) computeQueryNorm() {
+ // first calculate sum of squared weights
+ sumOfSquaredWeights := 0.0
+ for _, searcher := range s.searchers {
+ sumOfSquaredWeights += searcher.Weight()
+ }
+ // now compute query norm from this
+ s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
+ // finally tell all the downstream searchers the norm
+ for _, searcher := range s.searchers {
+ searcher.SetQueryNorm(s.queryNorm)
+ }
+}
+
+func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
+ var err error
+ // get all searchers pointing at their first match
+ for i, searcher := range s.searchers {
+ if s.currs[i] != nil {
+ ctx.DocumentMatchPool.Put(s.currs[i])
+ }
+ s.currs[i], err = searcher.Next(ctx)
+ if err != nil {
+ return err
+ }
+ }
+
+ err = s.updateMatches()
+ if err != nil {
+ return err
+ }
+
+ s.initialized = true
+ return nil
+}
+
+func (s *DisjunctionSliceSearcher) updateMatches() error {
+ matching := s.matching[:0]
+ matchingIdxs := s.matchingIdxs[:0]
+
+ for i := 0; i < len(s.currs); i++ {
+ curr := s.currs[i]
+ if curr == nil {
+ continue
+ }
+
+ if len(matching) > 0 {
+ cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
+ if cmp > 0 {
+ continue
+ }
+
+ if cmp < 0 {
+ matching = matching[:0]
+ matchingIdxs = matchingIdxs[:0]
+ }
+ }
+
+ matching = append(matching, curr)
+ matchingIdxs = append(matchingIdxs, i)
+ }
+
+ s.matching = matching
+ s.matchingIdxs = matchingIdxs
+
+ return nil
+}
+
+func (s *DisjunctionSliceSearcher) Weight() float64 {
+ var rv float64
+ for _, searcher := range s.searchers {
+ rv += searcher.Weight()
+ }
+ return rv
+}
+
+func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) {
+ for _, searcher := range s.searchers {
+ searcher.SetQueryNorm(qnorm)
+ }
+}
+
+func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
+ *search.DocumentMatch, error) {
+ if !s.initialized {
+ err := s.initSearchers(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+ var err error
+ var rv *search.DocumentMatch
+
+ found := false
+ for !found && len(s.matching) > 0 {
+ if len(s.matching) >= s.min {
+ found = true
+ // score this match
+ rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
+ }
+
+ // invoke next on all the matching searchers
+ for _, i := range s.matchingIdxs {
+ searcher := s.searchers[i]
+ if s.currs[i] != rv {
+ ctx.DocumentMatchPool.Put(s.currs[i])
+ }
+ s.currs[i], err = searcher.Next(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ err = s.updateMatches()
+ if err != nil {
+ return nil, err
+ }
+ }
+ return rv, nil
+}
+
+func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext,
+ ID index.IndexInternalID) (*search.DocumentMatch, error) {
+ if !s.initialized {
+ err := s.initSearchers(ctx)
+ if err != nil {
+ return nil, err
+ }
+ }
+ // get all searchers pointing at their first match
+ var err error
+ for i, searcher := range s.searchers {
+ if s.currs[i] != nil {
+ if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
+ continue
+ }
+ ctx.DocumentMatchPool.Put(s.currs[i])
+ }
+ s.currs[i], err = searcher.Advance(ctx, ID)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ err = s.updateMatches()
+ if err != nil {
+ return nil, err
+ }
+
+ return s.Next(ctx)
+}
+
+func (s *DisjunctionSliceSearcher) Count() uint64 {
+ // for now return a worst case
+ var sum uint64
+ for _, searcher := range s.searchers {
+ sum += searcher.Count()
+ }
+ return sum
+}
+
+func (s *DisjunctionSliceSearcher) Close() (rv error) {
+ for _, searcher := range s.searchers {
+ err := searcher.Close()
+ if err != nil && rv == nil {
+ rv = err
+ }
+ }
+ return rv
+}
+
+func (s *DisjunctionSliceSearcher) Min() int {
+ return s.min
+}
+
+func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
+ rv := len(s.currs)
+ for _, s := range s.searchers {
+ rv += s.DocumentMatchPoolSize()
+ }
+ return rv
+}
+
+// a disjunction searcher implements the index.Optimizable interface
+// but only activates on an edge case where the disjunction is a
+// wrapper around a single Optimizable child searcher
+func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) (
+ index.OptimizableContext, error) {
+ if len(s.searchers) == 1 {
+ o, ok := s.searchers[0].(index.Optimizable)
+ if ok {
+ return o.Optimize(kind, octx)
+ }
+ }
+
+ return octx, nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
index 06351b4a0d..3b258a580a 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
@@ -15,11 +15,21 @@
package searcher
import (
+ "reflect"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeDocIDSearcher int
+
+func init() {
+ var ds DocIDSearcher
+ reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
+}
+
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
@@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64
}, nil
}
+func (s *DocIDSearcher) Size() int {
+ return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
+ s.reader.Size() +
+ s.scorer.Size()
+}
+
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
index 219f2ee7eb..7c95fb41c6 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
@@ -15,10 +15,20 @@
package searcher
import (
+ "reflect"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeFilteringSearcher int
+
+func init() {
+ var fs FilteringSearcher
+ reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
+}
+
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
@@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch
}
}
+func (f *FilteringSearcher) Size() int {
+ return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
+ f.child.Size()
+}
+
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
index 90abaa0a85..8176e59b51 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
@@ -15,13 +15,26 @@
package searcher
import (
+ "fmt"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
+var MaxFuzziness = 2
+
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
+
+ if fuzziness > MaxFuzziness {
+ return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
+ }
+
+ if fuzziness < 0 {
+ return nil, fmt.Errorf("invalid fuzziness, negative")
+ }
+
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
@@ -31,7 +44,6 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
break
}
}
-
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
@@ -45,12 +57,40 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string,
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
+
+ // in case of advanced reader implementations directly call
+ // the levenshtein automaton based iterator to collect the
+ // candidate terms
+ if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
+ fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if cerr := fieldDict.Close(); cerr != nil && err == nil {
+ err = cerr
+ }
+ }()
+ tfd, err := fieldDict.Next()
+ for err == nil && tfd != nil {
+ rv = append(rv, tfd.Term)
+ if tooManyClauses(len(rv)) {
+ return nil, tooManyClausesErr(len(rv))
+ }
+ tfd, err = fieldDict.Next()
+ }
+ return rv, err
+ }
+
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
+ if err != nil {
+ return nil, err
+ }
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
@@ -58,13 +98,16 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
}()
// enumerate terms and check levenshtein distance
+ var reuse []int
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
- ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
+ var ld int
+ var exceeded bool
+ ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
- return rv, tooManyClausesErr()
+ return nil, tooManyClausesErr(len(rv))
}
}
tfd, err = fieldDict.Next()
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
index f8b1b4cf7a..289e416782 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
@@ -40,6 +40,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
+ dvReader, err := indexReader.DocValueReader([]string{field})
+ if err != nil {
+ return nil, err
+ }
+
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
@@ -48,7 +53,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
- buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
+ buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
@@ -144,26 +149,25 @@ func relateAndRecurse(start, end uint64, res uint,
return nil, nil
}
-func buildRectFilter(indexReader index.IndexReader, field string,
+func buildRectFilter(dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
- err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
- []string{field}, func(field string, term []byte) {
- // only consider the values which are shifted 0
- prefixCoded := numeric.PrefixCoded(term)
- shift, err := prefixCoded.Shift()
- if err == nil && shift == 0 {
- var i64 int64
- i64, err = prefixCoded.Int64()
- if err == nil {
- lon = geo.MortonUnhashLon(uint64(i64))
- lat = geo.MortonUnhashLat(uint64(i64))
- found = true
- }
+ err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+ // only consider the values which are shifted 0
+ prefixCoded := numeric.PrefixCoded(term)
+ shift, err := prefixCoded.Shift()
+ if err == nil && shift == 0 {
+ var i64 int64
+ i64, err = prefixCoded.Int64()
+ if err == nil {
+ lon = geo.MortonUnhashLon(uint64(i64))
+ lat = geo.MortonUnhashLat(uint64(i64))
+ found = true
}
- })
+ }
+ })
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
index fd559766fd..a15c194e86 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
@@ -39,9 +39,14 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
return nil, err
}
+ dvReader, err := indexReader.DocValueReader([]string{field})
+ if err != nil {
+ return nil, err
+ }
+
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
- buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
+ buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
@@ -87,25 +92,25 @@ func boxSearcher(indexReader index.IndexReader,
return boxSearcher, nil
}
-func buildDistFilter(indexReader index.IndexReader, field string,
+func buildDistFilter(dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
- err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
- []string{field}, func(field string, term []byte) {
- // only consider the values which are shifted 0
- prefixCoded := numeric.PrefixCoded(term)
- shift, err := prefixCoded.Shift()
- if err == nil && shift == 0 {
- i64, err := prefixCoded.Int64()
- if err == nil {
- lon = geo.MortonUnhashLon(uint64(i64))
- lat = geo.MortonUnhashLat(uint64(i64))
- found = true
- }
+
+ err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
+ // only consider the values which are shifted 0
+ prefixCoded := numeric.PrefixCoded(term)
+ shift, err := prefixCoded.Shift()
+ if err == nil && shift == 0 {
+ i64, err := prefixCoded.Int64()
+ if err == nil {
+ lon = geo.MortonUnhashLon(uint64(i64))
+ lat = geo.MortonUnhashLat(uint64(i64))
+ found = true
}
- })
+ }
+ })
if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat)
if dist <= maxDist/1000 {
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_match_all.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_match_all.go
index 822db2ea00..bb66401229 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_match_all.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_match_all.go
@@ -15,11 +15,21 @@
package searcher
import (
+ "reflect"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeMatchAllSearcher int
+
+func init() {
+ var mas MatchAllSearcher
+ reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
+}
+
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
@@ -46,6 +56,12 @@ func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options s
}, nil
}
+func (s *MatchAllSearcher) Size() int {
+ return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
+ s.reader.Size() +
+ s.scorer.Size()
+}
+
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_match_none.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_match_none.go
index 947596714e..a345e17f77 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_match_none.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_match_none.go
@@ -15,10 +15,20 @@
package searcher
import (
+ "reflect"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeMatchNoneSearcher int
+
+func init() {
+ var mns MatchNoneSearcher
+ reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
+}
+
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
@@ -29,6 +39,10 @@ func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, er
}, nil
}
+func (s *MatchNoneSearcher) Size() int {
+ return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
+}
+
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
index b469beadbb..c48366ee27 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
@@ -22,6 +22,10 @@ import (
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
+ if limit && tooManyClauses(len(terms)) {
+ return nil, tooManyClausesErr(len(terms))
+ }
+
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
@@ -46,6 +50,10 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
+ if limit && tooManyClauses(len(terms)) {
+ return nil, tooManyClausesErr(len(terms))
+ }
+
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
index 7f42d72508..e52ef9a825 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
@@ -68,7 +68,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
return nil, err
}
if tooManyClauses(len(terms)) {
- return nil, tooManyClausesErr()
+ return nil, tooManyClausesErr(len(terms))
}
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
@@ -77,6 +77,25 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
+
+ if ir, ok := indexReader.(index.IndexReaderOnly); ok {
+ fieldDict, err := ir.FieldDictOnly(field, terms, false)
+ if err != nil {
+ return nil, err
+ }
+ // enumerate the terms (no need to check them again)
+ tfd, err := fieldDict.Next()
+ for err == nil && tfd != nil {
+ rv = append(rv, []byte(tfd.Term))
+ tfd, err = fieldDict.Next()
+ }
+ if cerr := fieldDict.Close(); cerr != nil && err == nil {
+ err = cerr
+ }
+
+ return rv, err
+ }
+
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
return nil, err
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go
index 6237cecfd3..51b7e5bd8a 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_phrase.go
@@ -17,21 +17,52 @@ package searcher
import (
"fmt"
"math"
+ "reflect"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizePhraseSearcher int
+
+func init() {
+ var ps PhraseSearcher
+ reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
+}
+
type PhraseSearcher struct {
- indexReader index.IndexReader
- mustSearcher *ConjunctionSearcher
+ mustSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
- slop int
terms [][]string
+ path phrasePath
+ paths []phrasePath
+ locations []search.Location
initialized bool
}
+func (s *PhraseSearcher) Size() int {
+ sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
+
+ if s.mustSearcher != nil {
+ sizeInBytes += s.mustSearcher.Size()
+ }
+
+ if s.currMust != nil {
+ sizeInBytes += s.currMust.Size()
+ }
+
+ for _, entry := range s.terms {
+ sizeInBytes += size.SizeOfSlice
+ for _, entry1 := range entry {
+ sizeInBytes += size.SizeOfString + len(entry1)
+ }
+ }
+
+ return sizeInBytes
+}
+
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
@@ -96,7 +127,6 @@ func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, fie
// build our searcher
rv := PhraseSearcher{
- indexReader: indexReader,
mustSearcher: mustSearcher,
terms: terms,
}
@@ -133,6 +163,9 @@ func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
var err error
if s.mustSearcher != nil {
+ if s.currMust != nil {
+ ctx.DocumentMatchPool.Put(s.currMust)
+ }
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
@@ -177,48 +210,64 @@ func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch,
return nil, nil
}
-// checkCurrMustMatch is soley concerned with determining if the DocumentMatch
+// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
- rvftlm := make(search.FieldTermLocationMap, 0)
- freq := 0
+ s.locations = s.currMust.Complete(s.locations)
+
+ locations := s.currMust.Locations
+ s.currMust.Locations = nil
+
+ ftls := s.currMust.FieldTermLocations
+
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
- for field, tlm := range s.currMust.Locations {
-
- f, rvtlm := s.checkCurrMustMatchField(ctx, tlm)
- if f > 0 {
- freq += f
- rvftlm[field] = rvtlm
- }
+ for field, tlm := range locations {
+ ftls = s.checkCurrMustMatchField(ctx, field, tlm, ftls)
}
- if freq > 0 {
+ if len(ftls) > 0 {
// return match
rv := s.currMust
- rv.Locations = rvftlm
+ s.currMust = nil
+ rv.FieldTermLocations = ftls
return rv
}
return nil
}
-// checkCurrMustMatchField is soley concerned with determining if one particular
-// field within the currMust DocumentMatch Locations satisfies the phase
-// constraints (possibly more than once). if so, the number of times it was
-// satisfied, and these locations are returned. otherwise 0 and either
-// a nil or empty TermLocationMap
-func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
- paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
- rv := make(search.TermLocationMap, len(s.terms))
- for _, p := range paths {
- p.MergeInto(rv)
+// checkCurrMustMatchField is solely concerned with determining if one
+// particular field within the currMust DocumentMatch Locations
+// satisfies the phase constraints (possibly more than once). if so,
+// the matching field term locations are appended to the provided
+// slice
+func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext,
+ field string, tlm search.TermLocationMap,
+ ftls []search.FieldTermLocation) []search.FieldTermLocation {
+ if s.path == nil {
+ s.path = make(phrasePath, 0, len(s.terms))
}
- return len(paths), rv
+ s.paths = findPhrasePaths(0, nil, s.terms, tlm, s.path[:0], 0, s.paths[:0])
+ for _, p := range s.paths {
+ for _, pp := range p {
+ ftls = append(ftls, search.FieldTermLocation{
+ Field: field,
+ Term: pp.term,
+ Location: search.Location{
+ Pos: pp.loc.Pos,
+ Start: pp.loc.Start,
+ End: pp.loc.End,
+ ArrayPositions: pp.loc.ArrayPositions,
+ },
+ })
+ }
+ }
+ return ftls
}
type phrasePart struct {
@@ -230,7 +279,7 @@ func (p *phrasePart) String() string {
return fmt.Sprintf("[%s %v]", p.term, p.loc)
}
-type phrasePath []*phrasePart
+type phrasePath []phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
@@ -238,24 +287,51 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
}
}
-// findPhrasePaths is a function to identify phase matches from a set of known
-// term locations. the implementation is recursive, so care must be taken
-// with arguments and return values.
+func (p phrasePath) String() string {
+ rv := "["
+ for i, pp := range p {
+ if i > 0 {
+ rv += ", "
+ }
+ rv += pp.String()
+ }
+ rv += "]"
+ return rv
+}
+
+// findPhrasePaths is a function to identify phase matches from a set
+// of known term locations. it recursive so care must be taken with
+// arguments and return values.
//
-// prev - the previous location, nil on first invocation
-// phraseTerms - slice containing the phrase terms themselves
+// prevPos - the previous location, 0 on first invocation
+// ap - array positions of the first candidate phrase part to
+// which further recursive phrase parts must match,
+// nil on initial invocation or when there are no array positions
+// phraseTerms - slice containing the phrase terms,
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
-// offset - the offset from the previous that this next term must match
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
+// remainingSlop - amount of sloppiness that's allowed, which is the
+// sum of the editDistances from each matching phrase part,
+// where 0 means no sloppiness allowed (all editDistances must be 0),
+// decremented during recursion
+// rv - the final result being appended to by all the recursive calls
//
// returns slice of paths, or nil if invocation did not find any successul paths
-func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
-
+func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
+ tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
- return []phrasePath{p}
+ // snapshot or copy the recursively built phrasePath p and
+ // append it to the rv, also optimizing by checking if next
+ // phrasePath item in the rv (which we're about to overwrite)
+ // is available for reuse
+ var pcopy phrasePath
+ if len(rv) < cap(rv) {
+ pcopy = rv[:len(rv)+1][len(rv)][:0]
+ }
+ return append(rv, append(pcopy, p...))
}
car := phraseTerms[0]
@@ -268,13 +344,13 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
- return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
+ return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
}
- var rv []phrasePath
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
+ LOCATIONS_LOOP:
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
@@ -287,11 +363,18 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]s
dist = editDistance(prevPos+1, loc.Pos)
}
- // if enough slop reamining, continue recursively
+ // if enough slop remaining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
+ // skip if we've already used this term+loc already
+ for _, ppart := range p {
+ if ppart.term == carTerm && ppart.loc == loc {
+ continue LOCATIONS_LOOP
+ }
+ }
+
// this location works, add it to the path (but not for empty term)
- px := append(p, &phrasePart{term: carTerm, loc: loc})
- rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
+ px := append(p, phrasePart{term: carTerm, loc: loc})
+ rv = findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
}
}
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
index b7cf520ac1..4def832c47 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
@@ -21,17 +21,57 @@ import (
"github.com/blevesearch/bleve/search"
)
+// NewRegexpStringSearcher is similar to NewRegexpSearcher, but
+// additionally optimizes for index readers that handle regexp's.
+func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
+ field string, boost float64, options search.SearcherOptions) (
+ search.Searcher, error) {
+ ir, ok := indexReader.(index.IndexReaderRegexp)
+ if !ok {
+ r, err := regexp.Compile(pattern)
+ if err != nil {
+ return nil, err
+ }
+
+ return NewRegexpSearcher(indexReader, r, field, boost, options)
+ }
+
+ fieldDict, err := ir.FieldDictRegexp(field, pattern)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if cerr := fieldDict.Close(); cerr != nil && err == nil {
+ err = cerr
+ }
+ }()
+
+ var candidateTerms []string
+
+ tfd, err := fieldDict.Next()
+ for err == nil && tfd != nil {
+ candidateTerms = append(candidateTerms, tfd.Term)
+ tfd, err = fieldDict.Next()
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
+ options, true)
+}
+
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
-func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
+func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
+ var candidateTerms []string
prefixTerm, complete := pattern.LiteralPrefix()
- var candidateTerms []string
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
@@ -49,7 +89,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
}
func findRegexpCandidateTerms(indexReader index.IndexReader,
- pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
+ pattern index.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@@ -70,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
- return rv, tooManyClausesErr()
+ return rv, tooManyClausesErr(len(rv))
}
}
tfd, err = fieldDict.Next()
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_term.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
index 6fae6ae5ae..c1af74c76e 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
@@ -15,11 +15,21 @@
package searcher
import (
+ "reflect"
+
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/scorer"
+ "github.com/blevesearch/bleve/size"
)
+var reflectStaticSizeTermSearcher int
+
+func init() {
+ var ts TermSearcher
+ reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
+}
+
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
@@ -28,28 +38,20 @@ type TermSearcher struct {
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
- reader, err := indexReader.TermFieldReader([]byte(term), field, true, true, options.IncludeTermVectors)
- if err != nil {
- return nil, err
- }
- count, err := indexReader.DocCount()
- if err != nil {
- _ = reader.Close()
- return nil, err
- }
- scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
- return &TermSearcher{
- indexReader: indexReader,
- reader: reader,
- scorer: scorer,
- }, nil
+ return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options)
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
- reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
+ needFreqNorm := options.Score != "none"
+ reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
+ return newTermSearcherFromReader(indexReader, reader, term, field, boost, options)
+}
+
+func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
+ term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
@@ -63,6 +65,13 @@ func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field stri
}, nil
}
+func (s *TermSearcher) Size() int {
+ return reflectStaticSizeTermSearcher + size.SizeOfPtr +
+ s.reader.Size() +
+ s.tfd.Size() +
+ s.scorer.Size()
+}
+
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
@@ -120,3 +129,13 @@ func (s *TermSearcher) Min() int {
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}
+
+func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
+ index.OptimizableContext, error) {
+ o, ok := s.reader.(index.Optimizable)
+ if ok {
+ return o.Optimize(kind, octx)
+ }
+
+ return octx, nil
+}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
index 05d092249a..b5af4631fe 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
@@ -27,13 +27,24 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
if err != nil {
return nil, err
}
+ defer func() {
+ if cerr := fieldDict.Close(); cerr != nil && err == nil {
+ err = cerr
+ }
+ }()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
+ if tooManyClauses(len(terms)) {
+ return nil, tooManyClausesErr(len(terms))
+ }
tfd, err = fieldDict.Next()
}
+ if err != nil {
+ return nil, err
+ }
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}
diff --git a/vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go b/vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go
index 267c681b47..90be1e11a2 100644
--- a/vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go
+++ b/vendor/github.com/blevesearch/bleve/search/searcher/search_term_range.go
@@ -48,6 +48,12 @@ func NewTermRangeSearcher(indexReader index.IndexReader,
return nil, err
}
+ defer func() {
+ if cerr := fieldDict.Close(); cerr != nil && err == nil {
+ err = cerr
+ }
+ }()
+
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
diff --git a/vendor/github.com/blevesearch/bleve/search/sort.go b/vendor/github.com/blevesearch/bleve/search/sort.go
index 28705d369e..e17f707879 100644
--- a/vendor/github.com/blevesearch/bleve/search/sort.go
+++ b/vendor/github.com/blevesearch/bleve/search/sort.go
@@ -15,6 +15,7 @@
package search
import (
+ "bytes"
"encoding/json"
"fmt"
"math"
@@ -251,23 +252,21 @@ func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatc
}
func (so SortOrder) RequiresScore() bool {
- rv := false
for _, soi := range so {
if soi.RequiresScoring() {
- rv = true
+ return true
}
}
- return rv
+ return false
}
func (so SortOrder) RequiresDocID() bool {
- rv := false
for _, soi := range so {
if soi.RequiresDocID() {
- rv = true
+ return true
}
}
- return rv
+ return false
}
func (so SortOrder) RequiredFields() []string {
@@ -279,7 +278,7 @@ func (so SortOrder) RequiredFields() []string {
}
func (so SortOrder) CacheIsScore() []bool {
- var rv []bool
+ rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.RequiresScoring())
}
@@ -287,7 +286,7 @@ func (so SortOrder) CacheIsScore() []bool {
}
func (so SortOrder) CacheDescending() []bool {
- var rv []bool
+ rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.Descending())
}
@@ -344,14 +343,15 @@ type SortField struct {
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
- values []string
+ values [][]byte
+ tmp [][]byte
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortField) UpdateVisitor(field string, term []byte) {
if field == s.Field {
- s.values = append(s.values, string(term))
+ s.values = append(s.values, term)
}
}
@@ -361,7 +361,7 @@ func (s *SortField) UpdateVisitor(field string, term []byte) {
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
- s.values = nil
+ s.values = s.values[:0]
return iTerm
}
@@ -370,17 +370,17 @@ func (s *SortField) Descending() bool {
return s.Desc
}
-func (s *SortField) filterTermsByMode(terms []string) string {
+func (s *SortField) filterTermsByMode(terms [][]byte) string {
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
- return terms[0]
+ return string(terms[0])
} else if len(terms) > 1 {
switch s.Mode {
case SortFieldMin:
- sort.Strings(terms)
- return terms[0]
+ sort.Sort(BytesSlice(terms))
+ return string(terms[0])
case SortFieldMax:
- sort.Strings(terms)
- return terms[len(terms)-1]
+ sort.Sort(BytesSlice(terms))
+ return string(terms[len(terms)-1])
}
}
@@ -402,13 +402,13 @@ func (s *SortField) filterTermsByMode(terms []string) string {
// return only the terms which had shift of 0
// if we are in explicit number or date mode, return only valid
// prefix coded numbers with shift of 0
-func (s *SortField) filterTermsByType(terms []string) []string {
+func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
stype := s.Type
if stype == SortFieldAuto {
allTermsPrefixCoded := true
- var termsWithShiftZero []string
+ termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
- valid, shift := numeric.ValidPrefixCodedTerm(term)
+ valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
} else if !valid {
@@ -417,16 +417,18 @@ func (s *SortField) filterTermsByType(terms []string) []string {
}
if allTermsPrefixCoded {
terms = termsWithShiftZero
+ s.tmp = termsWithShiftZero[:0]
}
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
- var termsWithShiftZero []string
+ termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
- valid, shift := numeric.ValidPrefixCodedTerm(term)
+ valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
terms = termsWithShiftZero
+ s.tmp = termsWithShiftZero[:0]
}
return terms
}
@@ -486,8 +488,7 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
}
func (s *SortField) Copy() SearchSort {
- var rv SortField
- rv = *s
+ rv := *s
return &rv
}
@@ -499,7 +500,6 @@ type SortDocID struct {
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
-
}
// Value returns the sort value of the DocumentMatch
@@ -529,8 +529,7 @@ func (s *SortDocID) MarshalJSON() ([]byte, error) {
}
func (s *SortDocID) Copy() SearchSort {
- var rv SortDocID
- rv = *s
+ rv := *s
return &rv
}
@@ -542,7 +541,6 @@ type SortScore struct {
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
-
}
// Value returns the sort value of the DocumentMatch
@@ -572,8 +570,7 @@ func (s *SortScore) MarshalJSON() ([]byte, error) {
}
func (s *SortScore) Copy() SearchSort {
- var rv SortScore
- rv = *s
+ rv := *s
return &rv
}
@@ -583,7 +580,6 @@ var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// their distance from the specified point.
func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
*SortGeoDistance, error) {
-
rv := &SortGeoDistance{
Field: field,
Desc: desc,
@@ -627,7 +623,7 @@ func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
func (s *SortGeoDistance) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
- s.values = nil
+ s.values = s.values[:0]
if iTerm == "" {
return maxDistance
@@ -705,7 +701,12 @@ func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
}
func (s *SortGeoDistance) Copy() SearchSort {
- var rv SortGeoDistance
- rv = *s
+ rv := *s
return &rv
}
+
+type BytesSlice [][]byte
+
+func (p BytesSlice) Len() int { return len(p) }
+func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 }
+func (p BytesSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
diff --git a/vendor/github.com/blevesearch/bleve/search/util.go b/vendor/github.com/blevesearch/bleve/search/util.go
index 83212af1fa..19dd5d68bd 100644
--- a/vendor/github.com/blevesearch/bleve/search/util.go
+++ b/vendor/github.com/blevesearch/bleve/search/util.go
@@ -40,3 +40,30 @@ func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
}
return rv
}
+
+func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
+ n := len(dest)
+ for _, dm := range matches {
+ n += len(dm.FieldTermLocations)
+ }
+ if cap(dest) < n {
+ dest = append(make([]FieldTermLocation, 0, n), dest...)
+ }
+
+ for _, dm := range matches {
+ for _, ftl := range dm.FieldTermLocations {
+ dest = append(dest, FieldTermLocation{
+ Field: ftl.Field,
+ Term: ftl.Term,
+ Location: Location{
+ Pos: ftl.Location.Pos,
+ Start: ftl.Location.Start,
+ End: ftl.Location.End,
+ ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
+ },
+ })
+ }
+ }
+
+ return dest
+}
diff --git a/vendor/github.com/blevesearch/bleve/size/sizes.go b/vendor/github.com/blevesearch/bleve/size/sizes.go
new file mode 100644
index 0000000000..0990bf86ec
--- /dev/null
+++ b/vendor/github.com/blevesearch/bleve/size/sizes.go
@@ -0,0 +1,59 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package size
+
+import (
+ "reflect"
+)
+
+func init() {
+ var b bool
+ SizeOfBool = int(reflect.TypeOf(b).Size())
+ var f32 float32
+ SizeOfFloat32 = int(reflect.TypeOf(f32).Size())
+ var f64 float64
+ SizeOfFloat64 = int(reflect.TypeOf(f64).Size())
+ var i int
+ SizeOfInt = int(reflect.TypeOf(i).Size())
+ var m map[int]int
+ SizeOfMap = int(reflect.TypeOf(m).Size())
+ var ptr *int
+ SizeOfPtr = int(reflect.TypeOf(ptr).Size())
+ var slice []int
+ SizeOfSlice = int(reflect.TypeOf(slice).Size())
+ var str string
+ SizeOfString = int(reflect.TypeOf(str).Size())
+ var u8 uint8
+ SizeOfUint8 = int(reflect.TypeOf(u8).Size())
+ var u16 uint16
+ SizeOfUint16 = int(reflect.TypeOf(u16).Size())
+ var u32 uint32
+ SizeOfUint32 = int(reflect.TypeOf(u32).Size())
+ var u64 uint64
+ SizeOfUint64 = int(reflect.TypeOf(u64).Size())
+}
+
+var SizeOfBool int
+var SizeOfFloat32 int
+var SizeOfFloat64 int
+var SizeOfInt int
+var SizeOfMap int
+var SizeOfPtr int
+var SizeOfSlice int
+var SizeOfString int
+var SizeOfUint8 int
+var SizeOfUint16 int
+var SizeOfUint32 int
+var SizeOfUint64 int
diff --git a/vendor/github.com/boltdb/bolt/bolt_mips64le.go b/vendor/github.com/boltdb/bolt/bolt_mips64le.go
deleted file mode 100644
index e8a9c3d149..0000000000
--- a/vendor/github.com/boltdb/bolt/bolt_mips64le.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// +build mips64le
-package bolt
-
-// maxMapSize represents the largest mmap size supported by Bolt.
-const maxMapSize = 0xFFFFFFFFFFFF // 256TB
-
-// maxAllocSize is the size used when creating array pointers.
-const maxAllocSize = 0x7FFFFFFF
-
-// brokenUnaligned Are unaligned load/stores broken on this arch?
-var brokenUnaligned = false
diff --git a/vendor/github.com/boltdb/bolt/bolt_mipsle.go b/vendor/github.com/boltdb/bolt/bolt_mipsle.go
deleted file mode 100644
index d5af4d128e..0000000000
--- a/vendor/github.com/boltdb/bolt/bolt_mipsle.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// +build mipsle
-package bolt
-
-// maxMapSize represents the largest mmap size supported by Bolt.
-const maxMapSize = 0x40000000 // 1GB
-
-// maxAllocSize is the size used when creating array pointers.
-const maxAllocSize = 0xFFFFFFF
-
-// brokenUnaligned Are unaligned load/stores broken on this arch?
-var brokenUnaligned = false
diff --git a/vendor/github.com/boltdb/bolt/freelist.go b/vendor/github.com/boltdb/bolt/freelist.go
deleted file mode 100644
index aba48f58c6..0000000000
--- a/vendor/github.com/boltdb/bolt/freelist.go
+++ /dev/null
@@ -1,252 +0,0 @@
-package bolt
-
-import (
- "fmt"
- "sort"
- "unsafe"
-)
-
-// freelist represents a list of all pages that are available for allocation.
-// It also tracks pages that have been freed but are still in use by open transactions.
-type freelist struct {
- ids []pgid // all free and available free page ids.
- pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
- cache map[pgid]bool // fast lookup of all free and pending page ids.
-}
-
-// newFreelist returns an empty, initialized freelist.
-func newFreelist() *freelist {
- return &freelist{
- pending: make(map[txid][]pgid),
- cache: make(map[pgid]bool),
- }
-}
-
-// size returns the size of the page after serialization.
-func (f *freelist) size() int {
- n := f.count()
- if n >= 0xFFFF {
- // The first element will be used to store the count. See freelist.write.
- n++
- }
- return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
-}
-
-// count returns count of pages on the freelist
-func (f *freelist) count() int {
- return f.free_count() + f.pending_count()
-}
-
-// free_count returns count of free pages
-func (f *freelist) free_count() int {
- return len(f.ids)
-}
-
-// pending_count returns count of pending pages
-func (f *freelist) pending_count() int {
- var count int
- for _, list := range f.pending {
- count += len(list)
- }
- return count
-}
-
-// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
-// f.count returns the minimum length required for dst.
-func (f *freelist) copyall(dst []pgid) {
- m := make(pgids, 0, f.pending_count())
- for _, list := range f.pending {
- m = append(m, list...)
- }
- sort.Sort(m)
- mergepgids(dst, f.ids, m)
-}
-
-// allocate returns the starting page id of a contiguous list of pages of a given size.
-// If a contiguous block cannot be found then 0 is returned.
-func (f *freelist) allocate(n int) pgid {
- if len(f.ids) == 0 {
- return 0
- }
-
- var initial, previd pgid
- for i, id := range f.ids {
- if id <= 1 {
- panic(fmt.Sprintf("invalid page allocation: %d", id))
- }
-
- // Reset initial page if this is not contiguous.
- if previd == 0 || id-previd != 1 {
- initial = id
- }
-
- // If we found a contiguous block then remove it and return it.
- if (id-initial)+1 == pgid(n) {
- // If we're allocating off the beginning then take the fast path
- // and just adjust the existing slice. This will use extra memory
- // temporarily but the append() in free() will realloc the slice
- // as is necessary.
- if (i + 1) == n {
- f.ids = f.ids[i+1:]
- } else {
- copy(f.ids[i-n+1:], f.ids[i+1:])
- f.ids = f.ids[:len(f.ids)-n]
- }
-
- // Remove from the free cache.
- for i := pgid(0); i < pgid(n); i++ {
- delete(f.cache, initial+i)
- }
-
- return initial
- }
-
- previd = id
- }
- return 0
-}
-
-// free releases a page and its overflow for a given transaction id.
-// If the page is already free then a panic will occur.
-func (f *freelist) free(txid txid, p *page) {
- if p.id <= 1 {
- panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
- }
-
- // Free page and all its overflow pages.
- var ids = f.pending[txid]
- for id := p.id; id <= p.id+pgid(p.overflow); id++ {
- // Verify that page is not already free.
- if f.cache[id] {
- panic(fmt.Sprintf("page %d already freed", id))
- }
-
- // Add to the freelist and cache.
- ids = append(ids, id)
- f.cache[id] = true
- }
- f.pending[txid] = ids
-}
-
-// release moves all page ids for a transaction id (or older) to the freelist.
-func (f *freelist) release(txid txid) {
- m := make(pgids, 0)
- for tid, ids := range f.pending {
- if tid <= txid {
- // Move transaction's pending pages to the available freelist.
- // Don't remove from the cache since the page is still free.
- m = append(m, ids...)
- delete(f.pending, tid)
- }
- }
- sort.Sort(m)
- f.ids = pgids(f.ids).merge(m)
-}
-
-// rollback removes the pages from a given pending tx.
-func (f *freelist) rollback(txid txid) {
- // Remove page ids from cache.
- for _, id := range f.pending[txid] {
- delete(f.cache, id)
- }
-
- // Remove pages from pending list.
- delete(f.pending, txid)
-}
-
-// freed returns whether a given page is in the free list.
-func (f *freelist) freed(pgid pgid) bool {
- return f.cache[pgid]
-}
-
-// read initializes the freelist from a freelist page.
-func (f *freelist) read(p *page) {
- // If the page.count is at the max uint16 value (64k) then it's considered
- // an overflow and the size of the freelist is stored as the first element.
- idx, count := 0, int(p.count)
- if count == 0xFFFF {
- idx = 1
- count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
- }
-
- // Copy the list of page ids from the freelist.
- if count == 0 {
- f.ids = nil
- } else {
- ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count]
- f.ids = make([]pgid, len(ids))
- copy(f.ids, ids)
-
- // Make sure they're sorted.
- sort.Sort(pgids(f.ids))
- }
-
- // Rebuild the page cache.
- f.reindex()
-}
-
-// write writes the page ids onto a freelist page. All free and pending ids are
-// saved to disk since in the event of a program crash, all pending ids will
-// become free.
-func (f *freelist) write(p *page) error {
- // Combine the old free pgids and pgids waiting on an open transaction.
-
- // Update the header flag.
- p.flags |= freelistPageFlag
-
- // The page.count can only hold up to 64k elements so if we overflow that
- // number then we handle it by putting the size in the first element.
- lenids := f.count()
- if lenids == 0 {
- p.count = uint16(lenids)
- } else if lenids < 0xFFFF {
- p.count = uint16(lenids)
- f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
- } else {
- p.count = 0xFFFF
- ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
- f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
- }
-
- return nil
-}
-
-// reload reads the freelist from a page and filters out pending items.
-func (f *freelist) reload(p *page) {
- f.read(p)
-
- // Build a cache of only pending pages.
- pcache := make(map[pgid]bool)
- for _, pendingIDs := range f.pending {
- for _, pendingID := range pendingIDs {
- pcache[pendingID] = true
- }
- }
-
- // Check each page in the freelist and build a new available freelist
- // with any pages not in the pending lists.
- var a []pgid
- for _, id := range f.ids {
- if !pcache[id] {
- a = append(a, id)
- }
- }
- f.ids = a
-
- // Once the available list is rebuilt then rebuild the free cache so that
- // it includes the available and pending free pages.
- f.reindex()
-}
-
-// reindex rebuilds the free cache based on available and pending free lists.
-func (f *freelist) reindex() {
- f.cache = make(map[pgid]bool, len(f.ids))
- for _, id := range f.ids {
- f.cache[id] = true
- }
- for _, pendingIDs := range f.pending {
- for _, pendingID := range pendingIDs {
- f.cache[pendingID] = true
- }
- }
-}
diff --git a/vendor/github.com/couchbase/vellum/automaton.go b/vendor/github.com/couchbase/vellum/automaton.go
index 47526595bc..70398f2d47 100644
--- a/vendor/github.com/couchbase/vellum/automaton.go
+++ b/vendor/github.com/couchbase/vellum/automaton.go
@@ -81,5 +81,5 @@ func (m *AlwaysMatch) Accept(int, byte) int {
return 0
}
-// creating an alwaysMatchAutomaton to avoid unnecesary repeated allocations.
+// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
var alwaysMatchAutomaton = &AlwaysMatch{}
diff --git a/vendor/github.com/couchbase/vellum/builder.go b/vendor/github.com/couchbase/vellum/builder.go
index b21db98072..f793329575 100644
--- a/vendor/github.com/couchbase/vellum/builder.go
+++ b/vendor/github.com/couchbase/vellum/builder.go
@@ -38,8 +38,7 @@ type Builder struct {
encoder encoder
opts *BuilderOpts
- builderNodePool builderNodePool
- transitionPool transitionPool
+ builderNodePool *builderNodePool
}
const noneAddr = 1
@@ -51,12 +50,14 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
if opts == nil {
opts = defaultBuilderOpts
}
+ builderNodePool := &builderNodePool{}
rv := &Builder{
- registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize),
- opts: opts,
- lastAddr: noneAddr,
+ unfinished: newUnfinishedNodes(builderNodePool),
+ registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize),
+ builderNodePool: builderNodePool,
+ opts: opts,
+ lastAddr: noneAddr,
}
- rv.unfinished = newUnfinishedNodes(&rv.builderNodePool)
var err error
rv.encoder, err = loadEncoder(opts.Encoder, w)
@@ -71,9 +72,7 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
}
func (b *Builder) Reset(w io.Writer) error {
- b.transitionPool.reset()
- b.builderNodePool.reset()
- b.unfinished.Reset(&b.builderNodePool)
+ b.unfinished.Reset()
b.registry.Reset()
b.lastAddr = noneAddr
b.encoder.reset(w)
@@ -107,7 +106,7 @@ func (b *Builder) Insert(key []byte, val uint64) error {
return err
}
b.copyLastKey(key)
- b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool)
+ b.unfinished.addSuffix(key[prefixLen:], out)
return nil
}
@@ -142,7 +141,7 @@ func (b *Builder) compileFrom(iState int) error {
if addr == noneAddr {
node = b.unfinished.popEmpty()
} else {
- node = b.unfinished.popFreeze(addr, &b.transitionPool)
+ node = b.unfinished.popFreeze(addr)
}
var err error
addr, err = b.compile(node)
@@ -150,7 +149,7 @@ func (b *Builder) compileFrom(iState int) error {
return nil
}
}
- b.unfinished.topLastFreeze(addr, &b.transitionPool)
+ b.unfinished.topLastFreeze(addr)
return nil
}
@@ -183,22 +182,25 @@ type unfinishedNodes struct {
// this means calls get() and pushXYZ() must be paired,
// as well as calls put() and popXYZ()
cache []builderNodeUnfinished
+
+ builderNodePool *builderNodePool
}
-func (u *unfinishedNodes) Reset(p *builderNodePool) {
+func (u *unfinishedNodes) Reset() {
u.stack = u.stack[:0]
for i := 0; i < len(u.cache); i++ {
u.cache[i] = builderNodeUnfinished{}
}
- u.pushEmpty(false, p)
+ u.pushEmpty(false)
}
func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes {
rv := &unfinishedNodes{
- stack: make([]*builderNodeUnfinished, 0, 64),
- cache: make([]builderNodeUnfinished, 64),
+ stack: make([]*builderNodeUnfinished, 0, 64),
+ cache: make([]builderNodeUnfinished, 64),
+ builderNodePool: p,
}
- rv.pushEmpty(false, p)
+ rv.pushEmpty(false)
return rv
}
@@ -249,9 +251,9 @@ func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte,
return i, out
}
-func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) {
+func (u *unfinishedNodes) pushEmpty(final bool) {
next := u.get()
- next.node = p.alloc()
+ next.node = u.builderNodePool.Get()
next.node.final = final
u.stack = append(u.stack, next)
}
@@ -265,11 +267,11 @@ func (u *unfinishedNodes) popRoot() *builderNode {
return rv
}
-func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode {
+func (u *unfinishedNodes) popFreeze(addr int) *builderNode {
l := len(u.stack)
var unfinished *builderNodeUnfinished
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
- unfinished.lastCompiled(addr, tp)
+ unfinished.lastCompiled(addr)
rv := unfinished.node
u.put()
return rv
@@ -289,12 +291,12 @@ func (u *unfinishedNodes) setRootOutput(out uint64) {
u.stack[0].node.finalOutput = out
}
-func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) {
+func (u *unfinishedNodes) topLastFreeze(addr int) {
last := len(u.stack) - 1
- u.stack[last].lastCompiled(addr, tp)
+ u.stack[last].lastCompiled(addr)
}
-func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
+func (u *unfinishedNodes) addSuffix(bs []byte, out uint64) {
if len(bs) == 0 {
return
}
@@ -304,13 +306,13 @@ func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
u.stack[last].lastOut = out
for _, b := range bs[1:] {
next := u.get()
- next.node = p.alloc()
+ next.node = u.builderNodePool.Get()
next.hasLastT = true
next.lastIn = b
next.lastOut = 0
u.stack = append(u.stack, next)
}
- u.pushEmpty(true, p)
+ u.pushEmpty(true)
}
type builderNodeUnfinished struct {
@@ -320,17 +322,17 @@ type builderNodeUnfinished struct {
hasLastT bool
}
-func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) {
+func (b *builderNodeUnfinished) lastCompiled(addr int) {
if b.hasLastT {
transIn := b.lastIn
transOut := b.lastOut
b.hasLastT = false
b.lastOut = 0
- trans := tp.alloc()
- trans.in = transIn
- trans.out = transOut
- trans.addr = addr
- b.node.trans = append(b.node.trans, trans)
+ b.node.trans = append(b.node.trans, transition{
+ in: transIn,
+ out: transOut,
+ addr: addr,
+ })
}
}
@@ -338,8 +340,8 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
if b.node.final {
b.node.finalOutput = outputCat(prefix, b.node.finalOutput)
}
- for _, t := range b.node.trans {
- t.out = outputCat(prefix, t.out)
+ for i := range b.node.trans {
+ b.node.trans[i].out = outputCat(prefix, b.node.trans[i].out)
}
if b.hasLastT {
b.lastOut = outputCat(prefix, b.lastOut)
@@ -348,8 +350,22 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
type builderNode struct {
finalOutput uint64
- trans []*transition
+ trans []transition
final bool
+
+ // intrusive linked list
+ next *builderNode
+}
+
+// reset resets the receiver builderNode to a re-usable state.
+func (n *builderNode) reset() {
+ n.final = false
+ n.finalOutput = 0
+ for i := range n.trans {
+ n.trans[i] = emptyTransition
+ }
+ n.trans = n.trans[:0]
+ n.next = nil
}
func (n *builderNode) equiv(o *builderNode) bool {
@@ -377,6 +393,8 @@ func (n *builderNode) equiv(o *builderNode) bool {
return true
}
+var emptyTransition = transition{}
+
type transition struct {
out uint64
addr int
@@ -398,56 +416,37 @@ func outputCat(l, r uint64) uint64 {
return l + r
}
-// the next builderNode to alloc() will be all[nextOuter][nextInner]
+// builderNodePool pools builderNodes using a singly linked list.
+//
+// NB: builderNode lifecylce is described by the following interactions -
+// +------------------------+ +----------------------+
+// | Unfinished Nodes | Transfer once | Registry |
+// |(not frozen builderNode)|-----builderNode is ------->| (frozen builderNode) |
+// +------------------------+ marked frozen +----------------------+
+// ^ |
+// | |
+// | Put()
+// | Get() on +-------------------+ when
+// +-new char--------| builderNode Pool |<-----------evicted
+// +-------------------+
type builderNodePool struct {
- all [][]builderNode
- nextOuter int
- nextInner int
-}
-
-func (p *builderNodePool) reset() {
- p.nextOuter = 0
- p.nextInner = 0
+ head *builderNode
}
-func (p *builderNodePool) alloc() *builderNode {
- if p.nextOuter >= len(p.all) {
- p.all = append(p.all, make([]builderNode, 256))
+func (p *builderNodePool) Get() *builderNode {
+ if p.head == nil {
+ return &builderNode{}
}
- rv := &p.all[p.nextOuter][p.nextInner]
- p.nextInner += 1
- if p.nextInner >= len(p.all[p.nextOuter]) {
- p.nextOuter += 1
- p.nextInner = 0
- }
- rv.finalOutput = 0
- rv.trans = rv.trans[:0]
- rv.final = false
- return rv
+ head := p.head
+ p.head = p.head.next
+ return head
}
-// the next transition to alloc() will be all[nextOuter][nextInner]
-type transitionPool struct {
- all [][]transition
- nextOuter int
- nextInner int
-}
-
-func (p *transitionPool) reset() {
- p.nextOuter = 0
- p.nextInner = 0
-}
-
-func (p *transitionPool) alloc() *transition {
- if p.nextOuter >= len(p.all) {
- p.all = append(p.all, make([]transition, 256))
- }
- rv := &p.all[p.nextOuter][p.nextInner]
- p.nextInner += 1
- if p.nextInner >= len(p.all[p.nextOuter]) {
- p.nextOuter += 1
- p.nextInner = 0
+func (p *builderNodePool) Put(v *builderNode) {
+ if v == nil {
+ return
}
- *rv = transition{}
- return rv
+ v.reset()
+ v.next = p.head
+ p.head = v
}
diff --git a/vendor/github.com/couchbase/vellum/decoder_v1.go b/vendor/github.com/couchbase/vellum/decoder_v1.go
index 5a0ea68871..d56e61db58 100644
--- a/vendor/github.com/couchbase/vellum/decoder_v1.go
+++ b/vendor/github.com/couchbase/vellum/decoder_v1.go
@@ -29,8 +29,6 @@ func init() {
type decoderV1 struct {
data []byte
- root uint64
- len uint64
}
func newDecoderV1(data []byte) *decoderV1 {
@@ -219,7 +217,7 @@ func (f *fstStateV1) Final() bool {
}
func (f *fstStateV1) FinalOutput() uint64 {
- if f.numTrans > 0 && f.final && f.outSize > 0 {
+ if f.final && f.outSize > 0 {
return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
}
return 0
diff --git a/vendor/github.com/couchbase/vellum/fst.go b/vendor/github.com/couchbase/vellum/fst.go
index ecc528395c..64ee21a410 100644
--- a/vendor/github.com/couchbase/vellum/fst.go
+++ b/vendor/github.com/couchbase/vellum/fst.go
@@ -74,8 +74,8 @@ func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
if err != nil {
return 0, false, err
}
- for i := range input {
- _, curr, output := state.TransitionFor(input[i])
+ for _, c := range input {
+ _, curr, output := state.TransitionFor(c)
if curr == noneAddr {
return 0, false, nil
}
@@ -243,6 +243,52 @@ func (f *FST) Reader() (*Reader, error) {
return &Reader{f: f}, nil
}
+func (f *FST) GetMinKey() ([]byte, error) {
+ var rv []byte
+
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ for !state.Final() {
+ nextTrans := state.TransitionAt(0)
+ _, curr, _ = state.TransitionFor(nextTrans)
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return nil, err
+ }
+
+ rv = append(rv, nextTrans)
+ }
+
+ return rv, nil
+}
+
+func (f *FST) GetMaxKey() ([]byte, error) {
+ var rv []byte
+
+ curr := f.decoder.getRoot()
+ state, err := f.decoder.stateAt(curr, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ for state.NumTransitions() > 0 {
+ nextTrans := state.TransitionAt(state.NumTransitions() - 1)
+ _, curr, _ = state.TransitionFor(nextTrans)
+ state, err = f.decoder.stateAt(curr, state)
+ if err != nil {
+ return nil, err
+ }
+
+ rv = append(rv, nextTrans)
+ }
+
+ return rv, nil
+}
+
// A Reader is meant for a single threaded use
type Reader struct {
f *FST
diff --git a/vendor/github.com/couchbase/vellum/fst_iterator.go b/vendor/github.com/couchbase/vellum/fst_iterator.go
index 389ac64aab..eb731395b2 100644
--- a/vendor/github.com/couchbase/vellum/fst_iterator.go
+++ b/vendor/github.com/couchbase/vellum/fst_iterator.go
@@ -76,7 +76,8 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
// Reset resets the Iterator' internal state to allow for iterator
// reuse (e.g. pooling).
-func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
+func (i *FSTIterator) Reset(f *FST,
+ startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
if aut == nil {
aut = alwaysMatchAutomaton
}
@@ -91,14 +92,14 @@ func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, a
// pointTo attempts to point us to the specified location
func (i *FSTIterator) pointTo(key []byte) error {
-
// tried to seek before start
if bytes.Compare(key, i.startKeyInclusive) < 0 {
key = i.startKeyInclusive
}
- // trid to see past end
- if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 {
+ // tried to see past end
+ if i.endKeyExclusive != nil &&
+ bytes.Compare(key, i.endKeyExclusive) > 0 {
key = i.endKeyExclusive
}
@@ -121,21 +122,23 @@ func (i *FSTIterator) pointTo(key []byte) error {
i.statesStack = append(i.statesStack, root)
i.autStatesStack = append(i.autStatesStack, autStart)
for j := 0; j < len(key); j++ {
+ keyJ := key[j]
curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
- pos, nextAddr, nextVal := curr.TransitionFor(key[j])
+ pos, nextAddr, nextVal := curr.TransitionFor(keyJ)
if nextAddr == noneAddr {
// needed transition doesn't exist
// find last trans before the one we needed
- for q := 0; q < curr.NumTransitions(); q++ {
- if curr.TransitionAt(q) < key[j] {
+ for q := curr.NumTransitions() - 1; q >= 0; q-- {
+ if curr.TransitionAt(q) < keyJ {
maxQ = q
+ break
}
}
break
}
- autNext := i.aut.Accept(autCurr, key[j])
+ autNext := i.aut.Accept(autCurr, keyJ)
next, err := i.f.decoder.stateAt(nextAddr, nil)
if err != nil {
@@ -143,14 +146,16 @@ func (i *FSTIterator) pointTo(key []byte) error {
}
i.statesStack = append(i.statesStack, next)
- i.keysStack = append(i.keysStack, key[j])
+ i.keysStack = append(i.keysStack, keyJ)
i.keysPosStack = append(i.keysPosStack, pos)
i.valsStack = append(i.valsStack, nextVal)
i.autStatesStack = append(i.autStatesStack, autNext)
continue
}
- if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 {
+ if !i.statesStack[len(i.statesStack)-1].Final() ||
+ !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
+ bytes.Compare(i.keysStack, key) < 0 {
return i.next(maxQ)
}
@@ -181,15 +186,12 @@ func (i *FSTIterator) Next() error {
}
func (i *FSTIterator) next(lastOffset int) error {
-
// remember where we started
- if cap(i.nextStart) < len(i.keysStack) {
- i.nextStart = make([]byte, len(i.keysStack))
- } else {
- i.nextStart = i.nextStart[0:len(i.keysStack)]
- }
- copy(i.nextStart, i.keysStack)
+ i.nextStart = append(i.nextStart[:0], i.keysStack...)
+ nextOffset := lastOffset + 1
+
+OUTER:
for true {
curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
@@ -200,58 +202,62 @@ func (i *FSTIterator) next(lastOffset int) error {
return nil
}
- nextOffset := lastOffset + 1
- if nextOffset < curr.NumTransitions() {
+ numTrans := curr.NumTransitions()
+
+ INNER:
+ for nextOffset < numTrans {
t := curr.TransitionAt(nextOffset)
autNext := i.aut.Accept(autCurr, t)
- if i.aut.CanMatch(autNext) {
- pos, nextAddr, v := curr.TransitionFor(t)
-
- // the next slot in the statesStack might have an
- // fstState instance that we can reuse
- var nextPrealloc fstState
- if len(i.statesStack) < cap(i.statesStack) {
- nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
- }
+ if !i.aut.CanMatch(autNext) {
+ nextOffset += 1
+ continue INNER
+ }
- // push onto stack
- next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
- if err != nil {
- return err
- }
- i.statesStack = append(i.statesStack, next)
- i.keysStack = append(i.keysStack, t)
- i.keysPosStack = append(i.keysPosStack, pos)
- i.valsStack = append(i.valsStack, v)
- i.autStatesStack = append(i.autStatesStack, autNext)
- lastOffset = -1
-
- // check to see if new keystack might have gone too far
- if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
- return ErrIteratorDone
- }
- } else {
- lastOffset = nextOffset
+ pos, nextAddr, v := curr.TransitionFor(t)
+
+ // the next slot in the statesStack might have an
+ // fstState instance that we can reuse
+ var nextPrealloc fstState
+ if len(i.statesStack) < cap(i.statesStack) {
+ nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
}
- continue
+ // push onto stack
+ next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
+ if err != nil {
+ return err
+ }
+
+ i.statesStack = append(i.statesStack, next)
+ i.keysStack = append(i.keysStack, t)
+ i.keysPosStack = append(i.keysPosStack, pos)
+ i.valsStack = append(i.valsStack, v)
+ i.autStatesStack = append(i.autStatesStack, autNext)
+
+ // check to see if new keystack might have gone too far
+ if i.endKeyExclusive != nil &&
+ bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
+ return ErrIteratorDone
+ }
+
+ nextOffset = 0
+ continue OUTER
}
- if len(i.statesStack) > 1 {
- // no transitions, and still room to pop
- i.statesStack = i.statesStack[:len(i.statesStack)-1]
- i.keysStack = i.keysStack[:len(i.keysStack)-1]
- lastOffset = i.keysPosStack[len(i.keysPosStack)-1]
-
- i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
- i.valsStack = i.valsStack[:len(i.valsStack)-1]
- i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
- continue
- } else {
+ if len(i.statesStack) <= 1 {
// stack len is 1 (root), can't go back further, we're done
break
}
+ // no transitions, and still room to pop
+ i.statesStack = i.statesStack[:len(i.statesStack)-1]
+ i.keysStack = i.keysStack[:len(i.keysStack)-1]
+
+ nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1
+
+ i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
+ i.valsStack = i.valsStack[:len(i.valsStack)-1]
+ i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
}
return ErrIteratorDone
@@ -262,15 +268,12 @@ func (i *FSTIterator) next(lastOffset int) error {
// seek operation would go past the last key, or outside the configured
// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
func (i *FSTIterator) Seek(key []byte) error {
- err := i.pointTo(key)
- if err != nil {
- return err
- }
- return nil
+ return i.pointTo(key)
}
// Close will free any resources held by this iterator.
func (i *FSTIterator) Close() error {
- // at the moment we don't do anything, but wanted this for API completeness
+ // at the moment we don't do anything,
+ // but wanted this for API completeness
return nil
}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE b/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
new file mode 100644
index 0000000000..6b0b1270ff
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
new file mode 100644
index 0000000000..4bf64fef2e
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
@@ -0,0 +1,125 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "fmt"
+ "sort"
+ "unicode/utf8"
+)
+
+type FullCharacteristicVector []uint32
+
+func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
+ bucketID := offset / 32
+ align := offset - bucketID*32
+ if align == 0 {
+ return fcv[bucketID] & mask
+ }
+ left := fcv[bucketID] >> align
+ right := fcv[bucketID+1] << (32 - align)
+ return (left | right) & mask
+}
+
+type tuple struct {
+ char rune
+ fcv FullCharacteristicVector
+}
+
+type sortRunes []rune
+
+func (s sortRunes) Less(i, j int) bool {
+ return s[i] < s[j]
+}
+
+func (s sortRunes) Swap(i, j int) {
+ s[i], s[j] = s[j], s[i]
+}
+
+func (s sortRunes) Len() int {
+ return len(s)
+}
+
+func sortRune(r []rune) []rune {
+ sort.Sort(sortRunes(r))
+ return r
+}
+
+type Alphabet struct {
+ charset []tuple
+ index uint32
+}
+
+func (a *Alphabet) resetNext() {
+ a.index = 0
+}
+
+func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
+ if int(a.index) >= len(a.charset) {
+ return 0, nil, fmt.Errorf("eof")
+ }
+
+ rv := a.charset[a.index]
+ a.index++
+ return rv.char, rv.fcv, nil
+}
+
+func dedupe(in string) string {
+ lookUp := make(map[rune]struct{}, len(in))
+ var rv string
+ for len(in) > 0 {
+ r, size := utf8.DecodeRuneInString(in)
+ in = in[size:]
+ if _, ok := lookUp[r]; !ok {
+ rv += string(r)
+ lookUp[r] = struct{}{}
+ }
+ }
+ return rv
+}
+
+func queryChars(qChars string) Alphabet {
+ chars := dedupe(qChars)
+ inChars := sortRune([]rune(chars))
+ charsets := make([]tuple, 0, len(inChars))
+
+ for _, c := range inChars {
+ tempChars := qChars
+ var bits []uint32
+ for len(tempChars) > 0 {
+ var chunk string
+ if len(tempChars) > 32 {
+ chunk = tempChars[0:32]
+ tempChars = tempChars[32:]
+ } else {
+ chunk = tempChars
+ tempChars = tempChars[:0]
+ }
+
+ chunkBits := uint32(0)
+ bit := uint32(1)
+ for _, chr := range chunk {
+ if chr == c {
+ chunkBits |= bit
+ }
+ bit <<= 1
+ }
+ bits = append(bits, chunkBits)
+ }
+ bits = append(bits, 0)
+ charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
+ }
+ return Alphabet{charset: charsets}
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
new file mode 100644
index 0000000000..e82a780a52
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
@@ -0,0 +1,250 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "fmt"
+ "math"
+)
+
+const SinkState = uint32(0)
+
+type DFA struct {
+ transitions [][256]uint32
+ distances []Distance
+ initState int
+ ed uint8
+}
+
+/// Returns the initial state
+func (d *DFA) initialState() int {
+ return d.initState
+}
+
+/// Returns the Levenshtein distance associated to the
+/// current state.
+func (d *DFA) distance(stateId int) Distance {
+ return d.distances[stateId]
+}
+
+/// Returns the number of states in the `DFA`.
+func (d *DFA) numStates() int {
+ return len(d.transitions)
+}
+
+/// Returns the destination state reached after consuming a given byte.
+func (d *DFA) transition(fromState int, b uint8) int {
+ return int(d.transitions[fromState][b])
+}
+
+func (d *DFA) eval(bytes []uint8) Distance {
+ state := d.initialState()
+
+ for _, b := range bytes {
+ state = d.transition(state, b)
+ }
+
+ return d.distance(state)
+}
+
+func (d *DFA) Start() int {
+ return int(d.initialState())
+}
+
+func (d *DFA) IsMatch(state int) bool {
+ if _, ok := d.distance(state).(Exact); ok {
+ return true
+ }
+ return false
+}
+
+func (d *DFA) CanMatch(state int) bool {
+ return state > 0 && state < d.numStates()
+}
+
+func (d *DFA) Accept(state int, b byte) int {
+ return int(d.transition(state, b))
+}
+
+// WillAlwaysMatch returns if the specified state will always end in a
+// matching state.
+func (d *DFA) WillAlwaysMatch(state int) bool {
+ return false
+}
+
+func fill(dest []uint32, val uint32) {
+ for i := range dest {
+ dest[i] = val
+ }
+}
+
+func fillTransitions(dest *[256]uint32, val uint32) {
+ for i := range dest {
+ dest[i] = val
+ }
+}
+
+type Utf8DFAStateBuilder struct {
+ dfaBuilder *Utf8DFABuilder
+ stateID uint32
+ defaultSuccessor []uint32
+}
+
+func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
+ toStateID uint32) {
+ sb.dfaBuilder.transitions[fromStateID][b] = toStateID
+}
+
+func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
+ fromStateID := sb.stateID
+ chars := []byte(string(in))
+ lastByte := chars[len(chars)-1]
+
+ for i, ch := range chars[:len(chars)-1] {
+ remNumBytes := len(chars) - i - 1
+ defaultSuccessor := sb.defaultSuccessor[remNumBytes]
+ intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
+
+ if intermediateStateID == defaultSuccessor {
+ intermediateStateID = sb.dfaBuilder.allocate()
+ fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
+ sb.defaultSuccessor[remNumBytes-1])
+ }
+
+ sb.addTransitionID(fromStateID, ch, intermediateStateID)
+ fromStateID = intermediateStateID
+ }
+
+ toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
+ sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
+}
+
+type Utf8StateId uint32
+
+func original(stateId uint32) Utf8StateId {
+ return predecessor(stateId, 0)
+}
+
+func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
+ return Utf8StateId(stateId*4 + uint32(numSteps))
+}
+
+// Utf8DFABuilder makes it possible to define a DFA
+// that takes unicode character, and build a `DFA`
+// that operates on utf-8 encoded
+type Utf8DFABuilder struct {
+ index []uint32
+ distances []Distance
+ transitions [][256]uint32
+ initialState uint32
+ numStates uint32
+ maxNumStates uint32
+}
+
+func withMaxStates(maxStates uint32) *Utf8DFABuilder {
+ rv := &Utf8DFABuilder{
+ index: make([]uint32, maxStates*2+100),
+ distances: make([]Distance, 0, maxStates),
+ transitions: make([][256]uint32, 0, maxStates),
+ maxNumStates: maxStates,
+ }
+
+ for i := range rv.index {
+ rv.index[i] = math.MaxUint32
+ }
+
+ return rv
+}
+
+func (dfab *Utf8DFABuilder) allocate() uint32 {
+ newState := dfab.numStates
+ dfab.numStates++
+
+ dfab.distances = append(dfab.distances, Atleast{d: 255})
+ dfab.transitions = append(dfab.transitions, [256]uint32{})
+
+ return newState
+}
+
+func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
+ if int(state) >= cap(dfab.index) {
+ cloneIndex := make([]uint32, int(state)*2)
+ copy(cloneIndex, dfab.index)
+ dfab.index = cloneIndex
+ }
+ if dfab.index[state] != math.MaxUint32 {
+ return dfab.index[state]
+ }
+
+ nstate := dfab.allocate()
+ dfab.index[state] = nstate
+
+ return nstate
+}
+
+func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
+ decodedID := dfab.getOrAllocate(original(iState))
+ dfab.initialState = decodedID
+}
+
+func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
+ return &DFA{
+ transitions: dfab.transitions,
+ distances: dfab.distances,
+ initState: int(dfab.initialState),
+ ed: ed,
+ }
+}
+
+func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
+ distance Distance) (*Utf8DFAStateBuilder, error) {
+ if state > dfab.maxNumStates {
+ return nil, fmt.Errorf("State id is larger than maxNumStates")
+ }
+
+ stateID := dfab.getOrAllocate(original(state))
+ dfab.distances[stateID] = distance
+
+ defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
+ // creates a chain of states of predecessors of `default_suc_orig`.
+ // Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
+ // leads to the `default_suc_orig` state.
+ predecessorStates := []uint32{defaultSuccID,
+ defaultSuccID,
+ defaultSuccID,
+ defaultSuccID}
+
+ for numBytes := uint8(1); numBytes < 4; numBytes++ {
+ predecessorState := predecessor(default_suc_orig, numBytes)
+ predecessorStateID := dfab.getOrAllocate(predecessorState)
+ predecessorStates[numBytes] = predecessorStateID
+ succ := predecessorStates[numBytes-1]
+ fillTransitions(&dfab.transitions[predecessorStateID], succ)
+ }
+
+ // 1-byte encoded chars.
+ fill(dfab.transitions[stateID][0:192], predecessorStates[0])
+ // 2-bytes encoded chars.
+ fill(dfab.transitions[stateID][192:224], predecessorStates[1])
+ // 3-bytes encoded chars.
+ fill(dfab.transitions[stateID][224:240], predecessorStates[2])
+ // 4-bytes encoded chars.
+ fill(dfab.transitions[stateID][240:256], predecessorStates[3])
+
+ return &Utf8DFAStateBuilder{
+ dfaBuilder: dfab,
+ stateID: stateID,
+ defaultSuccessor: predecessorStates}, nil
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
new file mode 100644
index 0000000000..1ca0aaa65b
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
@@ -0,0 +1,64 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import "fmt"
+
+// StateLimit is the maximum number of states allowed
+const StateLimit = 10000
+
+// ErrTooManyStates is returned if you attempt to build a Levenshtein
+// automaton which requires too many states.
+var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
+ StateLimit)
+
+// LevenshteinAutomatonBuilder wraps a precomputed
+// datastructure that allows to produce small (but not minimal) DFA.
+type LevenshteinAutomatonBuilder struct {
+ pDfa *ParametricDFA
+}
+
+// NewLevenshteinAutomatonBuilder creates a
+// reusable, threadsafe Levenshtein automaton builder.
+// `maxDistance` - maximum distance considered by the automaton.
+// `transposition` - assign a distance of 1 for transposition
+//
+// Building this automaton builder is computationally intensive.
+// While it takes only a few milliseconds for `d=2`, it grows
+// exponentially with `d`. It is only reasonable to `d <= 5`.
+func NewLevenshteinAutomatonBuilder(maxDistance uint8,
+ transposition bool) (*LevenshteinAutomatonBuilder, error) {
+ lnfa := newLevenshtein(maxDistance, transposition)
+
+ pdfa, err := fromNfa(lnfa)
+ if err != nil {
+ return nil, err
+ }
+
+ return &LevenshteinAutomatonBuilder{pDfa: pdfa}, nil
+}
+
+// BuildDfa builds the levenshtein automaton for serving
+// queries with a given edit distance.
+func (lab *LevenshteinAutomatonBuilder) BuildDfa(query string,
+ fuzziness uint8) (*DFA, error) {
+ return lab.pDfa.buildDfa(query, fuzziness, false)
+}
+
+// MaxDistance returns the MaxEdit distance supported by the
+// LevenshteinAutomatonBuilder builder.
+func (lab *LevenshteinAutomatonBuilder) MaxDistance() uint8 {
+ return lab.pDfa.maxDistance
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
new file mode 100644
index 0000000000..bed9b99d56
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
@@ -0,0 +1,292 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "math"
+ "sort"
+)
+
+/// Levenshtein Distance computed by a Levenshtein Automaton.
+///
+/// Levenshtein automata can only compute the exact Levenshtein distance
+/// up to a given `max_distance`.
+///
+/// Over this distance, the automaton will invariably
+/// return `Distance::AtLeast(max_distance + 1)`.
+type Distance interface {
+ distance() uint8
+}
+
+type Exact struct {
+ d uint8
+}
+
+func (e Exact) distance() uint8 {
+ return e.d
+}
+
+type Atleast struct {
+ d uint8
+}
+
+func (a Atleast) distance() uint8 {
+ return a.d
+}
+
+func characteristicVector(query []rune, c rune) uint64 {
+ chi := uint64(0)
+ for i := 0; i < len(query); i++ {
+ if query[i] == c {
+ chi |= 1 << uint64(i)
+ }
+ }
+ return chi
+}
+
+type NFAState struct {
+ Offset uint32
+ Distance uint8
+ InTranspose bool
+}
+
+type NFAStates []NFAState
+
+func (ns NFAStates) Len() int {
+ return len(ns)
+}
+
+func (ns NFAStates) Less(i, j int) bool {
+ if ns[i].Offset != ns[j].Offset {
+ return ns[i].Offset < ns[j].Offset
+ }
+
+ if ns[i].Distance != ns[j].Distance {
+ return ns[i].Distance < ns[j].Distance
+ }
+
+ return !ns[i].InTranspose && ns[j].InTranspose
+}
+
+func (ns NFAStates) Swap(i, j int) {
+ ns[i], ns[j] = ns[j], ns[i]
+}
+
+func (ns *NFAState) imply(other NFAState) bool {
+ transposeImply := ns.InTranspose
+ if !other.InTranspose {
+ transposeImply = !other.InTranspose
+ }
+
+ deltaOffset := ns.Offset - other.Offset
+ if ns.Offset < other.Offset {
+ deltaOffset = other.Offset - ns.Offset
+ }
+
+ if transposeImply {
+ return uint32(other.Distance) >= (uint32(ns.Distance) + deltaOffset)
+ }
+
+ return uint32(other.Distance) > (uint32(ns.Distance) + deltaOffset)
+}
+
+type MultiState struct {
+ states []NFAState
+}
+
+func (ms *MultiState) States() []NFAState {
+ return ms.states
+}
+
+func (ms *MultiState) Clear() {
+ ms.states = ms.states[:0]
+}
+
+func newMultiState() *MultiState {
+ return &MultiState{states: make([]NFAState, 0)}
+}
+
+func (ms *MultiState) normalize() uint32 {
+ minOffset := uint32(math.MaxUint32)
+
+ for _, s := range ms.states {
+ if s.Offset < minOffset {
+ minOffset = s.Offset
+ }
+ }
+ if minOffset == uint32(math.MaxUint32) {
+ minOffset = 0
+ }
+
+ for i := 0; i < len(ms.states); i++ {
+ ms.states[i].Offset -= minOffset
+ }
+
+ sort.Sort(NFAStates(ms.states))
+
+ return minOffset
+}
+
+func (ms *MultiState) addStates(nState NFAState) {
+
+ for _, s := range ms.states {
+ if s.imply(nState) {
+ return
+ }
+ }
+
+ i := 0
+ for i < len(ms.states) {
+ if nState.imply(ms.states[i]) {
+ ms.states = append(ms.states[:i], ms.states[i+1:]...)
+ } else {
+ i++
+ }
+ }
+ ms.states = append(ms.states, nState)
+
+}
+
+func extractBit(bitset uint64, pos uint8) bool {
+ shift := bitset >> pos
+ bit := shift & 1
+ return bit == uint64(1)
+}
+
+func dist(left, right uint32) uint32 {
+ if left > right {
+ return left - right
+ }
+ return right - left
+}
+
+type LevenshteinNFA struct {
+ mDistance uint8
+ damerau bool
+}
+
+func newLevenshtein(maxD uint8, transposition bool) *LevenshteinNFA {
+ return &LevenshteinNFA{mDistance: maxD,
+ damerau: transposition,
+ }
+}
+
+func (la *LevenshteinNFA) maxDistance() uint8 {
+ return la.mDistance
+}
+
+func (la *LevenshteinNFA) msDiameter() uint8 {
+ return 2*la.mDistance + 1
+}
+
+func (la *LevenshteinNFA) initialStates() *MultiState {
+ ms := MultiState{}
+ nfaState := NFAState{}
+ ms.addStates(nfaState)
+ return &ms
+}
+
+func (la *LevenshteinNFA) multistateDistance(ms *MultiState,
+ queryLen uint32) Distance {
+ minDistance := Atleast{d: la.mDistance + 1}
+ for _, s := range ms.states {
+ t := s.Distance + uint8(dist(queryLen, s.Offset))
+ if t <= uint8(la.mDistance) {
+ if minDistance.distance() > t {
+ minDistance.d = t
+ }
+ }
+ }
+
+ if minDistance.distance() == la.mDistance+1 {
+ return Atleast{d: la.mDistance + 1}
+ }
+
+ return minDistance
+}
+
+func (la *LevenshteinNFA) simpleTransition(state NFAState,
+ symbol uint64, ms *MultiState) {
+
+ if state.Distance < la.mDistance {
+ // insertion
+ ms.addStates(NFAState{Offset: state.Offset,
+ Distance: state.Distance + 1,
+ InTranspose: false})
+
+ // substitution
+ ms.addStates(NFAState{Offset: state.Offset + 1,
+ Distance: state.Distance + 1,
+ InTranspose: false})
+
+ n := la.mDistance + 1 - state.Distance
+ for d := uint8(1); d < n; d++ {
+ if extractBit(symbol, d) {
+ // for d > 0, as many deletion and character match
+ ms.addStates(NFAState{Offset: state.Offset + 1 + uint32(d),
+ Distance: state.Distance + d,
+ InTranspose: false})
+ }
+ }
+
+ if la.damerau && extractBit(symbol, 1) {
+ ms.addStates(NFAState{
+ Offset: state.Offset,
+ Distance: state.Distance + 1,
+ InTranspose: true})
+ }
+
+ }
+
+ if extractBit(symbol, 0) {
+ ms.addStates(NFAState{Offset: state.Offset + 1,
+ Distance: state.Distance,
+ InTranspose: false})
+ }
+
+ if state.InTranspose && extractBit(symbol, 0) {
+ ms.addStates(NFAState{Offset: state.Offset + 2,
+ Distance: state.Distance,
+ InTranspose: false})
+ }
+
+}
+
+func (la *LevenshteinNFA) transition(cState *MultiState,
+ dState *MultiState, scv uint64) {
+ dState.Clear()
+ mask := (uint64(1) << la.msDiameter()) - uint64(1)
+
+ for _, state := range cState.states {
+ cv := (scv >> state.Offset) & mask
+ la.simpleTransition(state, cv, dState)
+ }
+
+ sort.Sort(NFAStates(dState.states))
+}
+
+func (la *LevenshteinNFA) computeDistance(query, other []rune) Distance {
+ cState := la.initialStates()
+ nState := newMultiState()
+
+ for _, i := range other {
+ nState.Clear()
+ chi := characteristicVector(query, i)
+ la.transition(cState, nState, chi)
+ cState, nState = nState, cState
+ }
+
+ return la.multistateDistance(cState, uint32(len(query)))
+}
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go b/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
new file mode 100644
index 0000000000..ebd9311959
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "crypto/md5"
+ "encoding/json"
+ "fmt"
+ "math"
+)
+
+type ParametricState struct {
+ shapeID uint32
+ offset uint32
+}
+
+func newParametricState() ParametricState {
+ return ParametricState{}
+}
+
+func (ps *ParametricState) isDeadEnd() bool {
+ return ps.shapeID == 0
+}
+
+type Transition struct {
+ destShapeID uint32
+ deltaOffset uint32
+}
+
+func (t *Transition) apply(state ParametricState) ParametricState {
+ ps := ParametricState{
+ shapeID: t.destShapeID}
+ // don't need any offset if we are in the dead state,
+ // this ensures we have only one dead state.
+ if t.destShapeID != 0 {
+ ps.offset = state.offset + t.deltaOffset
+ }
+
+ return ps
+}
+
+type ParametricStateIndex struct {
+ stateIndex []uint32
+ stateQueue []ParametricState
+ numOffsets uint32
+}
+
+func newParametricStateIndex(queryLen,
+ numParamState uint32) ParametricStateIndex {
+ numOffsets := queryLen + 1
+ if numParamState == 0 {
+ numParamState = numOffsets
+ }
+ maxNumStates := numParamState * numOffsets
+ psi := ParametricStateIndex{
+ stateIndex: make([]uint32, maxNumStates),
+ stateQueue: make([]ParametricState, 0, 150),
+ numOffsets: numOffsets,
+ }
+
+ for i := uint32(0); i < maxNumStates; i++ {
+ psi.stateIndex[i] = math.MaxUint32
+ }
+ return psi
+}
+
+func (psi *ParametricStateIndex) numStates() int {
+ return len(psi.stateQueue)
+}
+
+func (psi *ParametricStateIndex) maxNumStates() int {
+ return len(psi.stateIndex)
+}
+
+func (psi *ParametricStateIndex) get(stateID uint32) ParametricState {
+ return psi.stateQueue[stateID]
+}
+
+func (psi *ParametricStateIndex) getOrAllocate(ps ParametricState) uint32 {
+ bucket := ps.shapeID*psi.numOffsets + ps.offset
+ if bucket < uint32(len(psi.stateIndex)) &&
+ psi.stateIndex[bucket] != math.MaxUint32 {
+ return psi.stateIndex[bucket]
+ }
+ nState := uint32(len(psi.stateQueue))
+ psi.stateQueue = append(psi.stateQueue, ps)
+
+ psi.stateIndex[bucket] = nState
+ return nState
+}
+
+type ParametricDFA struct {
+ distance []uint8
+ transitions []Transition
+ maxDistance uint8
+ transitionStride uint32
+ diameter uint32
+}
+
+func (pdfa *ParametricDFA) initialState() ParametricState {
+ return ParametricState{shapeID: 1}
+}
+
+// Returns true iff whatever characters come afterward,
+// we will never reach a shorter distance
+func (pdfa *ParametricDFA) isPrefixSink(state ParametricState, queryLen uint32) bool {
+ if state.isDeadEnd() {
+ return true
+ }
+
+ remOffset := queryLen - state.offset
+ if remOffset < pdfa.diameter {
+ stateDistances := pdfa.distance[pdfa.diameter*state.shapeID:]
+ prefixDistance := stateDistances[remOffset]
+ if prefixDistance > pdfa.maxDistance {
+ return false
+ }
+
+ for _, d := range stateDistances {
+ if d < prefixDistance {
+ return false
+ }
+ }
+ return true
+ }
+ return false
+}
+
+func (pdfa *ParametricDFA) numStates() int {
+ return len(pdfa.transitions) / int(pdfa.transitionStride)
+}
+
+func min(x, y uint32) uint32 {
+ if x < y {
+ return x
+ }
+ return y
+}
+
+func (pdfa *ParametricDFA) transition(state ParametricState,
+ chi uint32) Transition {
+ return pdfa.transitions[pdfa.transitionStride*state.shapeID+chi]
+}
+
+func (pdfa *ParametricDFA) getDistance(state ParametricState,
+ qLen uint32) Distance {
+ remainingOffset := qLen - state.offset
+ if state.isDeadEnd() || remainingOffset >= pdfa.diameter {
+ return Atleast{d: pdfa.maxDistance + 1}
+ }
+ dist := pdfa.distance[int(pdfa.diameter*state.shapeID)+int(remainingOffset)]
+ if dist > pdfa.maxDistance {
+ return Atleast{d: dist}
+ }
+ return Exact{d: dist}
+}
+
+func (pdfa *ParametricDFA) computeDistance(left, right string) Distance {
+ state := pdfa.initialState()
+ leftChars := []rune(left)
+ for _, chr := range []rune(right) {
+ start := state.offset
+ stop := min(start+pdfa.diameter, uint32(len(leftChars)))
+ chi := characteristicVector(leftChars[start:stop], chr)
+ transition := pdfa.transition(state, uint32(chi))
+ state = transition.apply(state)
+ if state.isDeadEnd() {
+ return Atleast{d: pdfa.maxDistance + 1}
+ }
+ }
+ return pdfa.getDistance(state, uint32(len(left)))
+}
+
+func (pdfa *ParametricDFA) buildDfa(query string, distance uint8,
+ prefix bool) (*DFA, error) {
+ qLen := uint32(len([]rune(query)))
+ alphabet := queryChars(query)
+
+ psi := newParametricStateIndex(qLen, uint32(pdfa.numStates()))
+ maxNumStates := psi.maxNumStates()
+ deadEndStateID := psi.getOrAllocate(newParametricState())
+ if deadEndStateID != 0 {
+ return nil, fmt.Errorf("Invalid dead end state")
+ }
+
+ initialStateID := psi.getOrAllocate(pdfa.initialState())
+ dfaBuilder := withMaxStates(uint32(maxNumStates))
+ mask := uint32((1 << pdfa.diameter) - 1)
+
+ var stateID int
+ for stateID = 0; stateID < StateLimit; stateID++ {
+ if stateID == psi.numStates() {
+ break
+ }
+ state := psi.get(uint32(stateID))
+ if prefix && pdfa.isPrefixSink(state, qLen) {
+ distance := pdfa.getDistance(state, qLen)
+ dfaBuilder.addState(uint32(stateID), uint32(stateID), distance)
+ } else {
+ transition := pdfa.transition(state, 0)
+ defSuccessor := transition.apply(state)
+ defSuccessorID := psi.getOrAllocate(defSuccessor)
+ distance := pdfa.getDistance(state, qLen)
+ stateBuilder, err := dfaBuilder.addState(uint32(stateID), defSuccessorID, distance)
+
+ if err != nil {
+ return nil, fmt.Errorf("parametric_dfa: buildDfa, err: %v", err)
+ }
+
+ alphabet.resetNext()
+ chr, cv, err := alphabet.next()
+ for err == nil {
+ chi := cv.shiftAndMask(state.offset, mask)
+
+ transition := pdfa.transition(state, chi)
+
+ destState := transition.apply(state)
+
+ destStateID := psi.getOrAllocate(destState)
+
+ stateBuilder.addTransition(chr, destStateID)
+
+ chr, cv, err = alphabet.next()
+ }
+ }
+ }
+
+ if stateID == StateLimit {
+ return nil, ErrTooManyStates
+ }
+
+ dfaBuilder.setInitialState(initialStateID)
+ return dfaBuilder.build(distance), nil
+}
+
+func fromNfa(nfa *LevenshteinNFA) (*ParametricDFA, error) {
+ lookUp := newHash()
+ lookUp.getOrAllocate(*newMultiState())
+ initialState := nfa.initialStates()
+ lookUp.getOrAllocate(*initialState)
+
+ maxDistance := nfa.maxDistance()
+ msDiameter := nfa.msDiameter()
+
+ numChi := 1 << msDiameter
+ chiValues := make([]uint64, numChi)
+ for i := 0; i < numChi; i++ {
+ chiValues[i] = uint64(i)
+ }
+
+ transitions := make([]Transition, 0, numChi*int(msDiameter))
+ var stateID int
+ for stateID = 0; stateID < StateLimit; stateID++ {
+ if stateID == len(lookUp.items) {
+ break
+ }
+
+ for _, chi := range chiValues {
+ destMs := newMultiState()
+
+ ms := lookUp.getFromID(stateID)
+
+ nfa.transition(ms, destMs, chi)
+
+ translation := destMs.normalize()
+
+ destID := lookUp.getOrAllocate(*destMs)
+
+ transitions = append(transitions, Transition{
+ destShapeID: uint32(destID),
+ deltaOffset: translation,
+ })
+ }
+ }
+
+ if stateID == StateLimit {
+ return nil, ErrTooManyStates
+ }
+
+ ns := len(lookUp.items)
+ diameter := int(msDiameter)
+
+ distances := make([]uint8, 0, diameter*ns)
+ for stateID := 0; stateID < ns; stateID++ {
+ ms := lookUp.getFromID(stateID)
+ for offset := 0; offset < diameter; offset++ {
+ dist := nfa.multistateDistance(ms, uint32(offset))
+ distances = append(distances, dist.distance())
+ }
+ }
+
+ return &ParametricDFA{
+ diameter: uint32(msDiameter),
+ transitions: transitions,
+ maxDistance: maxDistance,
+ transitionStride: uint32(numChi),
+ distance: distances,
+ }, nil
+}
+
+type hash struct {
+ index map[[16]byte]int
+ items []MultiState
+}
+
+func newHash() *hash {
+ return &hash{
+ index: make(map[[16]byte]int, 100),
+ items: make([]MultiState, 0, 100),
+ }
+}
+
+func (h *hash) getOrAllocate(m MultiState) int {
+ size := len(h.items)
+ var exists bool
+ var pos int
+ md5 := getHash(&m)
+ if pos, exists = h.index[md5]; !exists {
+ h.index[md5] = size
+ pos = size
+ h.items = append(h.items, m)
+ }
+ return pos
+}
+
+func (h *hash) getFromID(id int) *MultiState {
+ return &h.items[id]
+}
+
+func getHash(ms *MultiState) [16]byte {
+ msBytes := []byte{}
+ for _, state := range ms.states {
+ jsonBytes, _ := json.Marshal(&state)
+ msBytes = append(msBytes, jsonBytes...)
+ }
+ return md5.Sum(msBytes)
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/compile.go b/vendor/github.com/couchbase/vellum/regexp/compile.go
index 6922b749db..55280164c7 100644
--- a/vendor/github.com/couchbase/vellum/regexp/compile.go
+++ b/vendor/github.com/couchbase/vellum/regexp/compile.go
@@ -18,17 +18,27 @@ import (
"regexp/syntax"
"unicode"
+ unicode_utf8 "unicode/utf8"
+
"github.com/couchbase/vellum/utf8"
)
type compiler struct {
sizeLimit uint
insts prog
+ instsPool []inst
+
+ sequences utf8.Sequences
+ rangeStack utf8.RangeStack
+ startBytes []byte
+ endBytes []byte
}
func newCompiler(sizeLimit uint) *compiler {
return &compiler{
- sizeLimit: sizeLimit,
+ sizeLimit: sizeLimit,
+ startBytes: make([]byte, unicode_utf8.UTFMax),
+ endBytes: make([]byte, unicode_utf8.UTFMax),
}
}
@@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
if err != nil {
return nil, err
}
- c.insts = append(c.insts, &inst{
- op: OpMatch,
- })
+ inst := c.allocInst()
+ inst.op = OpMatch
+ c.insts = append(c.insts, inst)
return c.insts, nil
}
-func (c *compiler) c(ast *syntax.Regexp) error {
+func (c *compiler) c(ast *syntax.Regexp) (err error) {
if ast.Flags&syntax.NonGreedy > 1 {
return ErrNoLazy
}
@@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error {
next.Rune = next.Rune0[0:2]
return c.c(&next)
}
- seqs, err := utf8.NewSequences(r, r)
+ c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+ r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
- for _, seq := range seqs {
+ for _, seq := range c.sequences {
c.compileUtf8Ranges(seq)
}
}
@@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error {
if len(ast.Sub) == 0 {
return nil
}
- jmpsToEnd := []uint{}
-
+ jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
// does not handle last entry
for i := 0; i < len(ast.Sub)-1; i++ {
sub := ast.Sub[i]
@@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error {
return err
}
}
- var splits, starts []uint
+ splits := make([]uint, 0, ast.Max-ast.Min)
+ starts := make([]uint, 0, ast.Max-ast.Min)
for i := ast.Min; i < ast.Max; i++ {
splits = append(splits, c.emptySplit())
starts = append(starts, uint(len(c.insts)))
@@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
if len(ast.Rune) == 0 {
return nil
}
- var jmps []uint
-
+ jmps := make([]uint, 0, len(ast.Rune)-2)
// does not do last pair
for i := 0; i < len(ast.Rune)-2; i += 2 {
rstart := ast.Rune[i]
@@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
return nil
}
-func (c *compiler) compileClassRange(startR, endR rune) error {
- seqs, err := utf8.NewSequences(startR, endR)
+func (c *compiler) compileClassRange(startR, endR rune) (err error) {
+ c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+ startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
- var jmps []uint
-
+ jmps := make([]uint, 0, len(c.sequences)-1)
// does not do last entry
- for i := 0; i < len(seqs)-1; i++ {
- seq := seqs[i]
+ for i := 0; i < len(c.sequences)-1; i++ {
+ seq := c.sequences[i]
split := c.emptySplit()
j1 := c.top()
c.compileUtf8Ranges(seq)
@@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
c.setSplit(split, j1, j2)
}
// handle last entry
- c.compileUtf8Ranges(seqs[len(seqs)-1])
+ c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
end := c.top()
for _, jmp := range jmps {
c.setJump(jmp, end)
@@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
for _, r := range seq {
- c.insts = append(c.insts, &inst{
- op: OpRange,
- rangeStart: r.Start,
- rangeEnd: r.End,
- })
+ inst := c.allocInst()
+ inst.op = OpRange
+ inst.rangeStart = r.Start
+ inst.rangeEnd = r.End
+ c.insts = append(c.insts, inst)
}
}
func (c *compiler) emptySplit() uint {
- c.insts = append(c.insts, &inst{
- op: OpSplit,
- })
+ inst := c.allocInst()
+ inst.op = OpSplit
+ c.insts = append(c.insts, inst)
return c.top() - 1
}
func (c *compiler) emptyJump() uint {
- c.insts = append(c.insts, &inst{
- op: OpJmp,
- })
+ inst := c.allocInst()
+ inst.op = OpJmp
+ c.insts = append(c.insts, inst)
return c.top() - 1
}
@@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) {
func (c *compiler) top() uint {
return uint(len(c.insts))
}
+
+func (c *compiler) allocInst() *inst {
+ if len(c.instsPool) <= 0 {
+ c.instsPool = make([]inst, 16)
+ }
+ inst := &c.instsPool[0]
+ c.instsPool = c.instsPool[1:]
+ return inst
+}
diff --git a/vendor/github.com/couchbase/vellum/regexp/dfa.go b/vendor/github.com/couchbase/vellum/regexp/dfa.go
index 9864606b6a..7e6fb29dac 100644
--- a/vendor/github.com/couchbase/vellum/regexp/dfa.go
+++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go
@@ -23,7 +23,7 @@ import (
const StateLimit = 10000
// ErrTooManyStates is returned if you attempt to build a Levenshtein
-// automaton which requries too many states.
+// automaton which requires too many states.
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
StateLimit)
@@ -37,12 +37,12 @@ func newDfaBuilder(insts prog) *dfaBuilder {
d := &dfaBuilder{
dfa: &dfa{
insts: insts,
- states: make([]*state, 0, 16),
+ states: make([]state, 0, 16),
},
cache: make(map[string]int, 1024),
}
// add 0 state that is invalid
- d.dfa.states = append(d.dfa.states, &state{
+ d.dfa.states = append(d.dfa.states, state{
next: make([]int, 256),
match: false,
})
@@ -54,13 +54,15 @@ func (d *dfaBuilder) build() (*dfa, error) {
next := newSparseSet(uint(len(d.dfa.insts)))
d.dfa.add(cur, 0)
- states := intStack{d.cachedState(cur)}
+ ns, instsReuse := d.cachedState(cur, nil)
+ states := intStack{ns}
seen := make(map[int]struct{})
var s int
states, s = states.Pop()
for s != 0 {
for b := 0; b < 256; b++ {
- ns := d.runState(cur, next, s, byte(b))
+ var ns int
+ ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
if ns != 0 {
if _, ok := seen[ns]; !ok {
seen[ns] = struct{}{}
@@ -76,15 +78,17 @@ func (d *dfaBuilder) build() (*dfa, error) {
return d.dfa, nil
}
-func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
+func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
+ int, []uint) {
cur.Clear()
for _, ip := range d.dfa.states[state].insts {
cur.Add(ip)
}
d.dfa.run(cur, next, b)
- nextState := d.cachedState(next)
+ var nextState int
+ nextState, instsReuse = d.cachedState(next, instsReuse)
d.dfa.states[state].next[b] = nextState
- return nextState
+ return nextState, instsReuse
}
func instsKey(insts []uint, buf []byte) []byte {
@@ -99,8 +103,12 @@ func instsKey(insts []uint, buf []byte) []byte {
return buf
}
-func (d *dfaBuilder) cachedState(set *sparseSet) int {
- var insts []uint
+func (d *dfaBuilder) cachedState(set *sparseSet,
+ instsReuse []uint) (int, []uint) {
+ insts := instsReuse[:0]
+ if cap(insts) == 0 {
+ insts = make([]uint, 0, set.Len())
+ }
var isMatch bool
for i := uint(0); i < uint(set.Len()); i++ {
ip := set.Get(i)
@@ -113,26 +121,26 @@ func (d *dfaBuilder) cachedState(set *sparseSet) int {
}
}
if len(insts) == 0 {
- return 0
+ return 0, insts
}
d.keyBuf = instsKey(insts, d.keyBuf)
v, ok := d.cache[string(d.keyBuf)]
if ok {
- return v
+ return v, insts
}
- d.dfa.states = append(d.dfa.states, &state{
+ d.dfa.states = append(d.dfa.states, state{
insts: insts,
next: make([]int, 256),
match: isMatch,
})
newV := len(d.dfa.states) - 1
d.cache[string(d.keyBuf)] = newV
- return newV
+ return newV, nil
}
type dfa struct {
insts prog
- states []*state
+ states []state
}
func (d *dfa) add(set *sparseSet, ip uint) {
diff --git a/vendor/github.com/couchbase/vellum/regexp/inst.go b/vendor/github.com/couchbase/vellum/regexp/inst.go
index 61cbf2f333..36f2e602df 100644
--- a/vendor/github.com/couchbase/vellum/regexp/inst.go
+++ b/vendor/github.com/couchbase/vellum/regexp/inst.go
@@ -27,7 +27,7 @@ const (
OpRange
)
-// instSize is the approxmiate size of the an inst struct in bytes
+// instSize is the approximate size of the an inst struct in bytes
const instSize = 40
type inst struct {
diff --git a/vendor/github.com/couchbase/vellum/regexp/regexp.go b/vendor/github.com/couchbase/vellum/regexp/regexp.go
index ed0e7823e1..920ddc3708 100644
--- a/vendor/github.com/couchbase/vellum/regexp/regexp.go
+++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go
@@ -35,6 +35,8 @@ var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
// too many instructions
var ErrCompiledTooBig = fmt.Errorf("too many instructions")
+var DefaultLimit = uint(10 * (1 << 20))
+
// Regexp implements the vellum.Automaton interface for matcing a user
// specified regular expression.
type Regexp struct {
@@ -47,7 +49,7 @@ type Regexp struct {
// compiled finite state automaton. If this size is exceeded,
// ErrCompiledTooBig will be returned.
func New(expr string) (*Regexp, error) {
- return NewWithLimit(expr, 10*(1<<20))
+ return NewWithLimit(expr, DefaultLimit)
}
// NewRegexpWithLimit creates a new Regular Expression automaton with
@@ -59,6 +61,10 @@ func NewWithLimit(expr string, size uint) (*Regexp, error) {
if err != nil {
return nil, err
}
+ return NewParsedWithLimit(expr, parsed, size)
+}
+
+func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
compiler := newCompiler(size)
insts, err := compiler.compile(parsed)
if err != nil {
@@ -103,7 +109,7 @@ func (r *Regexp) WillAlwaysMatch(int) bool {
return false
}
-// Accept returns the new state, resulting from the transite byte b
+// Accept returns the new state, resulting from the transition byte b
// when currently in the state s.
func (r *Regexp) Accept(s int, b byte) int {
if s < len(r.dfa.states) {
diff --git a/vendor/github.com/couchbase/vellum/registry.go b/vendor/github.com/couchbase/vellum/registry.go
index 3721a7c9c3..f5b9b4d59c 100644
--- a/vendor/github.com/couchbase/vellum/registry.go
+++ b/vendor/github.com/couchbase/vellum/registry.go
@@ -14,39 +14,35 @@
package vellum
-import (
- "hash"
- "hash/fnv"
-)
-
type registryCell struct {
addr int
node *builderNode
}
type registry struct {
- table []registryCell
- tableSize uint
- mruSize uint
- hasher hash.Hash64
+ builderNodePool *builderNodePool
+ table []registryCell
+ tableSize uint
+ mruSize uint
}
-func newRegistry(tableSize, mruSize int) *registry {
+func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry {
nsize := tableSize * mruSize
rv := &registry{
- table: make([]registryCell, nsize),
- tableSize: uint(tableSize),
- mruSize: uint(mruSize),
- hasher: fnv.New64a(),
+ builderNodePool: p,
+ table: make([]registryCell, nsize),
+ tableSize: uint(tableSize),
+ mruSize: uint(mruSize),
}
return rv
}
func (r *registry) Reset() {
- for i := 0; i < len(r.table); i++ {
- r.table[i] = registryCell{}
+ var empty registryCell
+ for i := range r.table {
+ r.builderNodePool.Put(r.table[i].node)
+ r.table[i] = empty
}
- r.hasher.Reset()
}
func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
@@ -57,7 +53,7 @@ func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
start := r.mruSize * uint(bucket)
end := start + r.mruSize
rc := registryCache(r.table[start:end])
- return rc.entry(node)
+ return rc.entry(node, r.builderNodePool)
}
const fnvPrime = 1099511628211
@@ -81,11 +77,12 @@ func (r *registry) hash(b *builderNode) int {
type registryCache []registryCell
-func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
+func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) {
if len(r) == 1 {
if r[0].node != nil && r[0].node.equiv(node) {
return true, r[0].addr, nil
}
+ pool.Put(r[0].node)
r[0].node = node
return false, 0, &r[0]
}
@@ -98,6 +95,7 @@ func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
}
// no match
last := len(r) - 1
+ pool.Put(r[last].node)
r[last].node = node // discard LRU
r.promote(last)
return false, 0, &r[0]
diff --git a/vendor/github.com/couchbase/vellum/utf8/utf8.go b/vendor/github.com/couchbase/vellum/utf8/utf8.go
index 47dbe9d1c5..54e23b937c 100644
--- a/vendor/github.com/couchbase/vellum/utf8/utf8.go
+++ b/vendor/github.com/couchbase/vellum/utf8/utf8.go
@@ -25,19 +25,39 @@ type Sequences []Sequence
// NewSequences constructs a collection of Sequence which describe the
// byte ranges covered between the start and end runes.
func NewSequences(start, end rune) (Sequences, error) {
- var rv Sequences
+ rv, _, err := NewSequencesPrealloc(start, end, nil, nil, nil, nil)
+ return rv, err
+}
+
+func NewSequencesPrealloc(start, end rune,
+ preallocSequences Sequences,
+ preallocRangeStack RangeStack,
+ preallocStartBytes, preallocEndBytes []byte) (Sequences, RangeStack, error) {
+ rv := preallocSequences[:0]
+
+ startBytes := preallocStartBytes
+ if cap(startBytes) < utf8.UTFMax {
+ startBytes = make([]byte, utf8.UTFMax)
+ }
+ startBytes = startBytes[:utf8.UTFMax]
- var rangeStack rangeStack
- rangeStack = rangeStack.Push(&scalarRange{start, end})
+ endBytes := preallocEndBytes
+ if cap(endBytes) < utf8.UTFMax {
+ endBytes = make([]byte, utf8.UTFMax)
+ }
+ endBytes = endBytes[:utf8.UTFMax]
+
+ rangeStack := preallocRangeStack[:0]
+ rangeStack = rangeStack.Push(scalarRange{start, end})
rangeStack, r := rangeStack.Pop()
TOP:
- for r != nil {
+ for r != nilScalarRange {
INNER:
for {
r1, r2 := r.split()
- if r1 != nil {
- rangeStack = rangeStack.Push(&scalarRange{r2.start, r2.end})
+ if r1 != nilScalarRange {
+ rangeStack = rangeStack.Push(scalarRange{r2.start, r2.end})
r.start = r1.start
r.end = r1.end
continue INNER
@@ -49,13 +69,13 @@ TOP:
for i := 1; i < utf8.UTFMax; i++ {
max := maxScalarValue(i)
if r.start <= max && max < r.end {
- rangeStack = rangeStack.Push(&scalarRange{max + 1, r.end})
+ rangeStack = rangeStack.Push(scalarRange{max + 1, r.end})
r.end = max
continue INNER
}
}
asciiRange := r.ascii()
- if asciiRange != nil {
+ if asciiRange != nilRange {
rv = append(rv, Sequence{
asciiRange,
})
@@ -66,23 +86,21 @@ TOP:
m := rune((1 << (6 * i)) - 1)
if (r.start & ^m) != (r.end & ^m) {
if (r.start & m) != 0 {
- rangeStack = rangeStack.Push(&scalarRange{(r.start | m) + 1, r.end})
+ rangeStack = rangeStack.Push(scalarRange{(r.start | m) + 1, r.end})
r.end = r.start | m
continue INNER
}
if (r.end & m) != m {
- rangeStack = rangeStack.Push(&scalarRange{r.end & ^m, r.end})
+ rangeStack = rangeStack.Push(scalarRange{r.end & ^m, r.end})
r.end = (r.end & ^m) - 1
continue INNER
}
}
}
- start := make([]byte, utf8.UTFMax)
- end := make([]byte, utf8.UTFMax)
- n, m := r.encode(start, end)
- seq, err := SequenceFromEncodedRange(start[0:n], end[0:m])
+ n, m := r.encode(startBytes, endBytes)
+ seq, err := SequenceFromEncodedRange(startBytes[0:n], endBytes[0:m])
if err != nil {
- return nil, err
+ return nil, nil, err
}
rv = append(rv, seq)
rangeStack, r = rangeStack.Pop()
@@ -90,11 +108,11 @@ TOP:
}
}
- return rv, nil
+ return rv, rangeStack, nil
}
-// Sequence is a collection of *Range
-type Sequence []*Range
+// Sequence is a collection of Range
+type Sequence []Range
// SequenceFromEncodedRange creates sequence from the encoded bytes
func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
@@ -104,21 +122,21 @@ func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
switch len(start) {
case 2:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
}, nil
case 3:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
- &Range{start[2], end[2]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
+ Range{start[2], end[2]},
}, nil
case 4:
return Sequence{
- &Range{start[0], end[0]},
- &Range{start[1], end[1]},
- &Range{start[2], end[2]},
- &Range{start[3], end[3]},
+ Range{start[0], end[0]},
+ Range{start[1], end[1]},
+ Range{start[2], end[2]},
+ Range{start[3], end[3]},
}, nil
}
@@ -159,6 +177,8 @@ type Range struct {
End byte
}
+var nilRange = Range{0xff, 0}
+
func (u Range) matches(b byte) bool {
if u.Start <= b && b <= u.End {
return true
@@ -178,37 +198,39 @@ type scalarRange struct {
end rune
}
+var nilScalarRange = scalarRange{0xffff, 0}
+
func (s *scalarRange) String() string {
return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end)
}
// split this scalar range if it overlaps with a surrogate codepoint
-func (s *scalarRange) split() (*scalarRange, *scalarRange) {
+func (s *scalarRange) split() (scalarRange, scalarRange) {
if s.start < 0xe000 && s.end > 0xd7ff {
- return &scalarRange{
+ return scalarRange{
start: s.start,
end: 0xd7ff,
},
- &scalarRange{
+ scalarRange{
start: 0xe000,
end: s.end,
}
}
- return nil, nil
+ return nilScalarRange, nilScalarRange
}
func (s *scalarRange) valid() bool {
return s.start <= s.end
}
-func (s *scalarRange) ascii() *Range {
+func (s *scalarRange) ascii() Range {
if s.valid() && s.end <= 0x7f {
- return &Range{
+ return Range{
Start: byte(s.start),
End: byte(s.end),
}
}
- return nil
+ return nilRange
}
// start and end MUST have capacity for utf8.UTFMax bytes
@@ -218,16 +240,16 @@ func (s *scalarRange) encode(start, end []byte) (int, int) {
return n, m
}
-type rangeStack []*scalarRange
+type RangeStack []scalarRange
-func (s rangeStack) Push(v *scalarRange) rangeStack {
+func (s RangeStack) Push(v scalarRange) RangeStack {
return append(s, v)
}
-func (s rangeStack) Pop() (rangeStack, *scalarRange) {
+func (s RangeStack) Pop() (RangeStack, scalarRange) {
l := len(s)
if l < 1 {
- return s, nil
+ return s, nilScalarRange
}
return s[:l-1], s[l-1]
}
diff --git a/vendor/github.com/boltdb/bolt/LICENSE b/vendor/github.com/etcd-io/bbolt/LICENSE
index 004e77fe5d..004e77fe5d 100644
--- a/vendor/github.com/boltdb/bolt/LICENSE
+++ b/vendor/github.com/etcd-io/bbolt/LICENSE
diff --git a/vendor/github.com/boltdb/bolt/bolt_386.go b/vendor/github.com/etcd-io/bbolt/bolt_386.go
index 820d533c15..4d35ee7cf3 100644
--- a/vendor/github.com/boltdb/bolt/bolt_386.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_386.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
diff --git a/vendor/github.com/boltdb/bolt/bolt_amd64.go b/vendor/github.com/etcd-io/bbolt/bolt_amd64.go
index 98fafdb47d..60a52dad56 100644
--- a/vendor/github.com/boltdb/bolt/bolt_amd64.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_amd64.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
diff --git a/vendor/github.com/boltdb/bolt/bolt_arm.go b/vendor/github.com/etcd-io/bbolt/bolt_arm.go
index 7e5cb4b941..105d27ddb7 100644
--- a/vendor/github.com/boltdb/bolt/bolt_arm.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_arm.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import "unsafe"
diff --git a/vendor/github.com/boltdb/bolt/bolt_arm64.go b/vendor/github.com/etcd-io/bbolt/bolt_arm64.go
index b26d84f91b..f5aa2a5ee2 100644
--- a/vendor/github.com/boltdb/bolt/bolt_arm64.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_arm64.go
@@ -1,6 +1,6 @@
// +build arm64
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
diff --git a/vendor/github.com/boltdb/bolt/bolt_linux.go b/vendor/github.com/etcd-io/bbolt/bolt_linux.go
index 2b67666140..7707bcacf0 100644
--- a/vendor/github.com/boltdb/bolt/bolt_linux.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_linux.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"syscall"
diff --git a/vendor/github.com/boltdb/bolt/bolt_mips64.go b/vendor/github.com/etcd-io/bbolt/bolt_mips64x.go
index 9f5060942a..baeb289fd9 100644
--- a/vendor/github.com/boltdb/bolt/bolt_mips64.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_mips64x.go
@@ -1,11 +1,12 @@
-// +build mips64
-package bolt
+// +build mips64 mips64le
+
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
-const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+const maxMapSize = 0x8000000000 // 512GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
-// brokenUnaligned Are unaligned load/stores broken on this arch?
+// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false
diff --git a/vendor/github.com/boltdb/bolt/bolt_mips.go b/vendor/github.com/etcd-io/bbolt/bolt_mipsx.go
index 1c06342ea7..2d9b1a91f3 100644
--- a/vendor/github.com/boltdb/bolt/bolt_mips.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_mipsx.go
@@ -1,5 +1,6 @@
-// +build mips
-package bolt
+// +build mips mipsle
+
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x40000000 // 1GB
@@ -7,5 +8,5 @@ const maxMapSize = 0x40000000 // 1GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
-// brokenUnaligned Are unaligned load/stores broken on this arch?
+// Are unaligned load/stores broken on this arch?
var brokenUnaligned = false
diff --git a/vendor/github.com/boltdb/bolt/bolt_openbsd.go b/vendor/github.com/etcd-io/bbolt/bolt_openbsd.go
index 7058c3d734..d7f50358ef 100644
--- a/vendor/github.com/boltdb/bolt/bolt_openbsd.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_openbsd.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"syscall"
diff --git a/vendor/github.com/boltdb/bolt/bolt_ppc.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc.go
index 645ddc3edc..69804714aa 100644
--- a/vendor/github.com/boltdb/bolt/bolt_ppc.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc.go
@@ -1,9 +1,12 @@
// +build ppc
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF
+
+// Are unaligned load/stores broken on this arch?
+var brokenUnaligned = false
diff --git a/vendor/github.com/boltdb/bolt/bolt_ppc64.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc64.go
index 9331d9771e..3565908576 100644
--- a/vendor/github.com/boltdb/bolt/bolt_ppc64.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc64.go
@@ -1,6 +1,6 @@
// +build ppc64
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
diff --git a/vendor/github.com/boltdb/bolt/bolt_ppc64le.go b/vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go
index 8c143bc5d1..422c7c69d6 100644
--- a/vendor/github.com/boltdb/bolt/bolt_ppc64le.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_ppc64le.go
@@ -1,6 +1,6 @@
// +build ppc64le
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
diff --git a/vendor/github.com/boltdb/bolt/bolt_s390x.go b/vendor/github.com/etcd-io/bbolt/bolt_s390x.go
index d7c39af925..6d3fcb825d 100644
--- a/vendor/github.com/boltdb/bolt/bolt_s390x.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_s390x.go
@@ -1,6 +1,6 @@
// +build s390x
-package bolt
+package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
diff --git a/vendor/github.com/boltdb/bolt/bolt_unix.go b/vendor/github.com/etcd-io/bbolt/bolt_unix.go
index cad62dda1e..5f2bb51451 100644
--- a/vendor/github.com/boltdb/bolt/bolt_unix.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_unix.go
@@ -1,41 +1,43 @@
// +build !windows,!plan9,!solaris
-package bolt
+package bbolt
import (
"fmt"
- "os"
"syscall"
"time"
"unsafe"
)
// flock acquires an advisory lock on a file descriptor.
-func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
var t time.Time
+ if timeout != 0 {
+ t = time.Now()
+ }
+ fd := db.file.Fd()
+ flag := syscall.LOCK_NB
+ if exclusive {
+ flag |= syscall.LOCK_EX
+ } else {
+ flag |= syscall.LOCK_SH
+ }
for {
- // If we're beyond our timeout then return an error.
- // This can only occur after we've attempted a flock once.
- if t.IsZero() {
- t = time.Now()
- } else if timeout > 0 && time.Since(t) > timeout {
- return ErrTimeout
- }
- flag := syscall.LOCK_SH
- if exclusive {
- flag = syscall.LOCK_EX
- }
-
- // Otherwise attempt to obtain an exclusive lock.
- err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB)
+ // Attempt to obtain an exclusive lock.
+ err := syscall.Flock(int(fd), flag)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
return err
}
+ // If we timed out then return an error.
+ if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+ return ErrTimeout
+ }
+
// Wait for a bit and try again.
- time.Sleep(50 * time.Millisecond)
+ time.Sleep(flockRetryTimeout)
}
}
@@ -53,7 +55,9 @@ func mmap(db *DB, sz int) error {
}
// Advise the kernel that the mmap is accessed randomly.
- if err := madvise(b, syscall.MADV_RANDOM); err != nil {
+ err = madvise(b, syscall.MADV_RANDOM)
+ if err != nil && err != syscall.ENOSYS {
+ // Ignore not implemented error in kernel because it still works.
return fmt.Errorf("madvise: %s", err)
}
diff --git a/vendor/github.com/boltdb/bolt/bolt_unix_solaris.go b/vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go
index 307bf2b3ee..babad65786 100644
--- a/vendor/github.com/boltdb/bolt/bolt_unix_solaris.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_unix_solaris.go
@@ -1,8 +1,7 @@
-package bolt
+package bbolt
import (
"fmt"
- "os"
"syscall"
"time"
"unsafe"
@@ -11,36 +10,35 @@ import (
)
// flock acquires an advisory lock on a file descriptor.
-func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
var t time.Time
+ if timeout != 0 {
+ t = time.Now()
+ }
+ fd := db.file.Fd()
+ var lockType int16
+ if exclusive {
+ lockType = syscall.F_WRLCK
+ } else {
+ lockType = syscall.F_RDLCK
+ }
for {
- // If we're beyond our timeout then return an error.
- // This can only occur after we've attempted a flock once.
- if t.IsZero() {
- t = time.Now()
- } else if timeout > 0 && time.Since(t) > timeout {
- return ErrTimeout
- }
- var lock syscall.Flock_t
- lock.Start = 0
- lock.Len = 0
- lock.Pid = 0
- lock.Whence = 0
- lock.Pid = 0
- if exclusive {
- lock.Type = syscall.F_WRLCK
- } else {
- lock.Type = syscall.F_RDLCK
- }
- err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock)
+ // Attempt to obtain an exclusive lock.
+ lock := syscall.Flock_t{Type: lockType}
+ err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
if err == nil {
return nil
} else if err != syscall.EAGAIN {
return err
}
+ // If we timed out then return an error.
+ if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+ return ErrTimeout
+ }
+
// Wait for a bit and try again.
- time.Sleep(50 * time.Millisecond)
+ time.Sleep(flockRetryTimeout)
}
}
diff --git a/vendor/github.com/boltdb/bolt/bolt_windows.go b/vendor/github.com/etcd-io/bbolt/bolt_windows.go
index b00fb0720a..fca178bd29 100644
--- a/vendor/github.com/boltdb/bolt/bolt_windows.go
+++ b/vendor/github.com/etcd-io/bbolt/bolt_windows.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"fmt"
@@ -16,8 +16,6 @@ var (
)
const (
- lockExt = ".lock"
-
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
flagLockExclusive = 2
flagLockFailImmediately = 1
@@ -48,48 +46,47 @@ func fdatasync(db *DB) error {
}
// flock acquires an advisory lock on a file descriptor.
-func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
- // Create a separate lock file on windows because a process
- // cannot share an exclusive lock on the same file. This is
- // needed during Tx.WriteTo().
- f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
- if err != nil {
- return err
- }
- db.lockfile = f
-
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
var t time.Time
+ if timeout != 0 {
+ t = time.Now()
+ }
+ var flag uint32 = flagLockFailImmediately
+ if exclusive {
+ flag |= flagLockExclusive
+ }
for {
- // If we're beyond our timeout then return an error.
- // This can only occur after we've attempted a flock once.
- if t.IsZero() {
- t = time.Now()
- } else if timeout > 0 && time.Since(t) > timeout {
- return ErrTimeout
- }
-
- var flag uint32 = flagLockFailImmediately
- if exclusive {
- flag |= flagLockExclusive
- }
+ // Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
+ // -1..0 as the lock on the database file.
+ var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+ err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
+ Offset: m1,
+ OffsetHigh: m1,
+ })
- err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
if err == nil {
return nil
} else if err != errLockViolation {
return err
}
+ // If we timed oumercit then return an error.
+ if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+ return ErrTimeout
+ }
+
// Wait for a bit and try again.
- time.Sleep(50 * time.Millisecond)
+ time.Sleep(flockRetryTimeout)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(db *DB) error {
- err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
- db.lockfile.Close()
- os.Remove(db.path + lockExt)
+ var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+ err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
+ Offset: m1,
+ OffsetHigh: m1,
+ })
return err
}
diff --git a/vendor/github.com/boltdb/bolt/boltsync_unix.go b/vendor/github.com/etcd-io/bbolt/boltsync_unix.go
index f50442523c..9587afefee 100644
--- a/vendor/github.com/boltdb/bolt/boltsync_unix.go
+++ b/vendor/github.com/etcd-io/bbolt/boltsync_unix.go
@@ -1,6 +1,6 @@
// +build !windows,!plan9,!linux,!openbsd
-package bolt
+package bbolt
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
diff --git a/vendor/github.com/boltdb/bolt/bucket.go b/vendor/github.com/etcd-io/bbolt/bucket.go
index 0c5bf27463..84bfd4d6a2 100644
--- a/vendor/github.com/boltdb/bolt/bucket.go
+++ b/vendor/github.com/etcd-io/bbolt/bucket.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"bytes"
@@ -14,13 +14,6 @@ const (
MaxValueSize = (1 << 31) - 2
)
-const (
- maxUint = ^uint(0)
- minUint = 0
- maxInt = int(^uint(0) >> 1)
- minInt = -maxInt - 1
-)
-
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const (
@@ -323,7 +316,12 @@ func (b *Bucket) Delete(key []byte) error {
// Move cursor to correct position.
c := b.Cursor()
- _, _, flags := c.seek(key)
+ k, _, flags := c.seek(key)
+
+ // Return nil if the key doesn't exist.
+ if !bytes.Equal(key, k) {
+ return nil
+ }
// Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 {
diff --git a/vendor/github.com/boltdb/bolt/cursor.go b/vendor/github.com/etcd-io/bbolt/cursor.go
index 1be9f35e3e..3000aced6c 100644
--- a/vendor/github.com/boltdb/bolt/cursor.go
+++ b/vendor/github.com/etcd-io/bbolt/cursor.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"bytes"
@@ -157,12 +157,6 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
c.search(seek, c.bucket.root)
- ref := &c.stack[len(c.stack)-1]
-
- // If the cursor is pointing to the end of page/node then return nil.
- if ref.index >= ref.count() {
- return nil, nil, 0
- }
// If this is a bucket then return a nil value.
return c.keyValue()
@@ -339,6 +333,8 @@ func (c *Cursor) nsearch(key []byte) {
// keyValue returns the key and value of the current leaf element.
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
ref := &c.stack[len(c.stack)-1]
+
+ // If the cursor is pointing to the end of page/node then return nil.
if ref.count() == 0 || ref.index >= ref.count() {
return nil, nil, 0
}
diff --git a/vendor/github.com/boltdb/bolt/db.go b/vendor/github.com/etcd-io/bbolt/db.go
index f352ff14fe..962248c99f 100644
--- a/vendor/github.com/boltdb/bolt/db.go
+++ b/vendor/github.com/etcd-io/bbolt/db.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"errors"
@@ -7,8 +7,7 @@ import (
"log"
"os"
"runtime"
- "runtime/debug"
- "strings"
+ "sort"
"sync"
"time"
"unsafe"
@@ -23,6 +22,8 @@ const version = 2
// Represents a marker value to indicate that a file is a Bolt DB.
const magic uint32 = 0xED0CDAED
+const pgidNoFreelist pgid = 0xffffffffffffffff
+
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
@@ -39,6 +40,19 @@ const (
// default page size for db is set to the OS page size.
var defaultPageSize = os.Getpagesize()
+// The time elapsed between consecutive file locking attempts.
+const flockRetryTimeout = 50 * time.Millisecond
+
+// FreelistType is the type of the freelist backend
+type FreelistType string
+
+const (
+ // FreelistArrayType indicates backend freelist type is array
+ FreelistArrayType = FreelistType("array")
+ // FreelistMapType indicates backend freelist type is hashmap
+ FreelistMapType = FreelistType("hashmap")
+)
+
// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
@@ -61,6 +75,18 @@ type DB struct {
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
+ // When true, skips syncing freelist to disk. This improves the database
+ // write performance under normal operation, but requires a full database
+ // re-sync during recovery.
+ NoFreelistSync bool
+
+ // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
+ // dramatic performance degradation if database is large and framentation in freelist is common.
+ // The alternative one is using hashmap, it is faster in almost all circumstances
+ // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
+ // The default type is array
+ FreelistType FreelistType
+
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
@@ -96,8 +122,7 @@ type DB struct {
path string
file *os.File
- lockfile *os.File // windows only
- dataref []byte // mmap'ed readonly, write throws SEGV
+ dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
filesz int // current on disk file size
@@ -107,9 +132,11 @@ type DB struct {
opened bool
rwtx *Tx
txs []*Tx
- freelist *freelist
stats Stats
+ freelist *freelist
+ freelistLoad sync.Once
+
pagePool sync.Pool
batchMu sync.Mutex
@@ -148,14 +175,18 @@ func (db *DB) String() string {
// If the file does not exist then it will be created automatically.
// Passing in nil options will cause Bolt to open the database with the default options.
func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
- var db = &DB{opened: true}
-
+ db := &DB{
+ opened: true,
+ }
// Set default options if no options are provided.
if options == nil {
options = DefaultOptions
}
+ db.NoSync = options.NoSync
db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags
+ db.NoFreelistSync = options.NoFreelistSync
+ db.FreelistType = options.FreelistType
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
@@ -183,7 +214,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// if !options.ReadOnly.
// The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set).
- if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
+ if err := flock(db, !db.readOnly, options.Timeout); err != nil {
_ = db.close()
return nil, err
}
@@ -191,31 +222,41 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// Default values for test hooks
db.ops.writeAt = db.file.WriteAt
+ if db.pageSize = options.PageSize; db.pageSize == 0 {
+ // Set the default page size to the OS page size.
+ db.pageSize = defaultPageSize
+ }
+
// Initialize the database if it doesn't exist.
if info, err := db.file.Stat(); err != nil {
+ _ = db.close()
return nil, err
} else if info.Size() == 0 {
// Initialize new files with meta pages.
if err := db.init(); err != nil {
+ // clean up file descriptor on initialization fail
+ _ = db.close()
return nil, err
}
} else {
// Read the first meta page to determine the page size.
var buf [0x1000]byte
- if _, err := db.file.ReadAt(buf[:], 0); err == nil {
- m := db.pageInBuffer(buf[:], 0).meta()
- if err := m.validate(); err != nil {
- // If we can't read the page size, we can assume it's the same
- // as the OS -- since that's how the page size was chosen in the
- // first place.
- //
- // If the first page is invalid and this OS uses a different
- // page size than what the database was created with then we
- // are out of luck and cannot access the database.
- db.pageSize = os.Getpagesize()
- } else {
+ // If we can't read the page size, but can read a page, assume
+ // it's the same as the OS or one given -- since that's how the
+ // page size was chosen in the first place.
+ //
+ // If the first page is invalid and this OS uses a different
+ // page size than what the database was created with then we
+ // are out of luck and cannot access the database.
+ //
+ // TODO: scan for next page
+ if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
+ if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
db.pageSize = int(m.pageSize)
}
+ } else {
+ _ = db.close()
+ return nil, ErrInvalid
}
}
@@ -232,14 +273,50 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
return nil, err
}
- // Read in the freelist.
- db.freelist = newFreelist()
- db.freelist.read(db.page(db.meta().freelist))
+ if db.readOnly {
+ return db, nil
+ }
+
+ db.loadFreelist()
+
+ // Flush freelist when transitioning from no sync to sync so
+ // NoFreelistSync unaware boltdb can open the db later.
+ if !db.NoFreelistSync && !db.hasSyncedFreelist() {
+ tx, err := db.Begin(true)
+ if tx != nil {
+ err = tx.Commit()
+ }
+ if err != nil {
+ _ = db.close()
+ return nil, err
+ }
+ }
// Mark the database as opened and return.
return db, nil
}
+// loadFreelist reads the freelist if it is synced, or reconstructs it
+// by scanning the DB if it is not synced. It assumes there are no
+// concurrent accesses being made to the freelist.
+func (db *DB) loadFreelist() {
+ db.freelistLoad.Do(func() {
+ db.freelist = newFreelist(db.FreelistType)
+ if !db.hasSyncedFreelist() {
+ // Reconstruct free list by scanning the DB.
+ db.freelist.readIDs(db.freepages())
+ } else {
+ // Read free list from freelist page.
+ db.freelist.read(db.page(db.meta().freelist))
+ }
+ db.stats.FreePageN = db.freelist.free_count()
+ })
+}
+
+func (db *DB) hasSyncedFreelist() bool {
+ return db.meta().freelist != pgidNoFreelist
+}
+
// mmap opens the underlying memory-mapped file and initializes the meta references.
// minsz is the minimum size that the new mmap can be.
func (db *DB) mmap(minsz int) error {
@@ -341,9 +418,6 @@ func (db *DB) mmapSize(size int) (int, error) {
// init creates a new database file and initializes its meta pages.
func (db *DB) init() error {
- // Set the page size to the OS page size.
- db.pageSize = os.Getpagesize()
-
// Create two meta pages on a buffer.
buf := make([]byte, db.pageSize*4)
for i := 0; i < 2; i++ {
@@ -387,7 +461,8 @@ func (db *DB) init() error {
}
// Close releases all database resources.
-// All transactions must be closed before closing the database.
+// It will block waiting for any open transactions to finish
+// before closing the database and returning.
func (db *DB) Close() error {
db.rwlock.Lock()
defer db.rwlock.Unlock()
@@ -395,8 +470,8 @@ func (db *DB) Close() error {
db.metalock.Lock()
defer db.metalock.Unlock()
- db.mmaplock.RLock()
- defer db.mmaplock.RUnlock()
+ db.mmaplock.Lock()
+ defer db.mmaplock.Unlock()
return db.close()
}
@@ -526,21 +601,36 @@ func (db *DB) beginRWTx() (*Tx, error) {
t := &Tx{writable: true}
t.init(db)
db.rwtx = t
+ db.freePages()
+ return t, nil
+}
- // Free any pages associated with closed read-only transactions.
- var minid txid = 0xFFFFFFFFFFFFFFFF
- for _, t := range db.txs {
- if t.meta.txid < minid {
- minid = t.meta.txid
- }
+// freePages releases any pages associated with closed read-only transactions.
+func (db *DB) freePages() {
+ // Free all pending pages prior to earliest open transaction.
+ sort.Sort(txsById(db.txs))
+ minid := txid(0xFFFFFFFFFFFFFFFF)
+ if len(db.txs) > 0 {
+ minid = db.txs[0].meta.txid
}
if minid > 0 {
db.freelist.release(minid - 1)
}
-
- return t, nil
+ // Release unused txid extents.
+ for _, t := range db.txs {
+ db.freelist.releaseRange(minid, t.meta.txid-1)
+ minid = t.meta.txid + 1
+ }
+ db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
+ // Any page both allocated and freed in an extent is safe to release.
}
+type txsById []*Tx
+
+func (t txsById) Len() int { return len(t) }
+func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
+func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
+
// removeTx removes a transaction from the database.
func (db *DB) removeTx(tx *Tx) {
// Release the read lock on the mmap.
@@ -633,11 +723,7 @@ func (db *DB) View(fn func(*Tx) error) error {
return err
}
- if err := t.Rollback(); err != nil {
- return err
- }
-
- return nil
+ return t.Rollback()
}
// Batch calls fn as part of a batch. It behaves similar to Update,
@@ -737,9 +823,7 @@ retry:
// pass success, or bolt internal errors, to all callers
for _, c := range b.calls {
- if c.err != nil {
- c.err <- err
- }
+ c.err <- err
}
break retry
}
@@ -826,7 +910,7 @@ func (db *DB) meta() *meta {
}
// allocate returns a contiguous block of memory starting at a given page.
-func (db *DB) allocate(count int) (*page, error) {
+func (db *DB) allocate(txid txid, count int) (*page, error) {
// Allocate a temporary buffer for the page.
var buf []byte
if count == 1 {
@@ -838,7 +922,7 @@ func (db *DB) allocate(count int) (*page, error) {
p.overflow = uint32(count - 1)
// Use pages from the freelist if they are available.
- if p.id = db.freelist.allocate(count); p.id != 0 {
+ if p.id = db.freelist.allocate(txid, count); p.id != 0 {
return p, nil
}
@@ -893,6 +977,38 @@ func (db *DB) IsReadOnly() bool {
return db.readOnly
}
+func (db *DB) freepages() []pgid {
+ tx, err := db.beginTx()
+ defer func() {
+ err = tx.Rollback()
+ if err != nil {
+ panic("freepages: failed to rollback tx")
+ }
+ }()
+ if err != nil {
+ panic("freepages: failed to open read only tx")
+ }
+
+ reachable := make(map[pgid]*page)
+ nofreed := make(map[pgid]bool)
+ ech := make(chan error)
+ go func() {
+ for e := range ech {
+ panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
+ }
+ }()
+ tx.checkBucket(&tx.root, reachable, nofreed, ech)
+ close(ech)
+
+ var fids []pgid
+ for i := pgid(2); i < db.meta().pgid; i++ {
+ if _, ok := reachable[i]; !ok {
+ fids = append(fids, i)
+ }
+ }
+ return fids
+}
+
// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
@@ -903,6 +1019,17 @@ type Options struct {
// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool
+ // Do not sync freelist to disk. This improves the database write performance
+ // under normal operation, but requires a full database re-sync during recovery.
+ NoFreelistSync bool
+
+ // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
+ // dramatic performance degradation if database is large and framentation in freelist is common.
+ // The alternative one is using hashmap, it is faster in almost all circumstances
+ // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
+ // The default type is array
+ FreelistType FreelistType
+
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
@@ -919,13 +1046,22 @@ type Options struct {
// If initialMmapSize is smaller than the previous database size,
// it takes no effect.
InitialMmapSize int
+
+ // PageSize overrides the default OS page size.
+ PageSize int
+
+ // NoSync sets the initial value of DB.NoSync. Normally this can just be
+ // set directly on the DB itself when returned from Open(), but this option
+ // is useful in APIs which expose Options but not the underlying DB.
+ NoSync bool
}
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
- Timeout: 0,
- NoGrowSync: false,
+ Timeout: 0,
+ NoGrowSync: false,
+ FreelistType: FreelistArrayType,
}
// Stats represents statistics about the database.
@@ -960,10 +1096,6 @@ func (s *Stats) Sub(other *Stats) Stats {
return diff
}
-func (s *Stats) add(other *Stats) {
- s.TxStats.add(&other.TxStats)
-}
-
type Info struct {
Data uintptr
PageSize int
@@ -1002,7 +1134,8 @@ func (m *meta) copy(dest *meta) {
func (m *meta) write(p *page) {
if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
- } else if m.freelist >= m.pgid {
+ } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
+ // TODO: reject pgidNoFreeList if !NoFreelistSync
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
}
@@ -1029,11 +1162,3 @@ func _assert(condition bool, msg string, v ...interface{}) {
panic(fmt.Sprintf("assertion failed: "+msg, v...))
}
}
-
-func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
-func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
-
-func printstack() {
- stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
- fmt.Fprintln(os.Stderr, stack)
-}
diff --git a/vendor/github.com/boltdb/bolt/doc.go b/vendor/github.com/etcd-io/bbolt/doc.go
index cc937845db..95f25f01c6 100644
--- a/vendor/github.com/boltdb/bolt/doc.go
+++ b/vendor/github.com/etcd-io/bbolt/doc.go
@@ -1,5 +1,5 @@
/*
-Package bolt implements a low-level key/value store in pure Go. It supports
+package bbolt implements a low-level key/value store in pure Go. It supports
fully serializable transactions, ACID semantics, and lock-free MVCC with
multiple readers and a single writer. Bolt can be used for projects that
want a simple data store without the need to add large dependencies such as
@@ -41,4 +41,4 @@ point to different data or can point to invalid memory which will cause a panic.
*/
-package bolt
+package bbolt
diff --git a/vendor/github.com/boltdb/bolt/errors.go b/vendor/github.com/etcd-io/bbolt/errors.go
index a3620a3ebb..48758ca577 100644
--- a/vendor/github.com/boltdb/bolt/errors.go
+++ b/vendor/github.com/etcd-io/bbolt/errors.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import "errors"
diff --git a/vendor/github.com/etcd-io/bbolt/freelist.go b/vendor/github.com/etcd-io/bbolt/freelist.go
new file mode 100644
index 0000000000..93fd85d504
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/freelist.go
@@ -0,0 +1,370 @@
+package bbolt
+
+import (
+ "fmt"
+ "sort"
+ "unsafe"
+)
+
+// txPending holds a list of pgids and corresponding allocation txns
+// that are pending to be freed.
+type txPending struct {
+ ids []pgid
+ alloctx []txid // txids allocating the ids
+ lastReleaseBegin txid // beginning txid of last matching releaseRange
+}
+
+// pidSet holds the set of starting pgids which have the same span size
+type pidSet map[pgid]struct{}
+
+// freelist represents a list of all pages that are available for allocation.
+// It also tracks pages that have been freed but are still in use by open transactions.
+type freelist struct {
+ freelistType FreelistType // freelist type
+ ids []pgid // all free and available free page ids.
+ allocs map[pgid]txid // mapping of txid that allocated a pgid.
+ pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
+ cache map[pgid]bool // fast lookup of all free and pending page ids.
+ freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
+ forwardMap map[pgid]uint64 // key is start pgid, value is its span size
+ backwardMap map[pgid]uint64 // key is end pgid, value is its span size
+ allocate func(txid txid, n int) pgid // the freelist allocate func
+ free_count func() int // the function which gives you free page number
+ mergeSpans func(ids pgids) // the mergeSpan func
+ getFreePageIDs func() []pgid // get free pgids func
+ readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
+}
+
+// newFreelist returns an empty, initialized freelist.
+func newFreelist(freelistType FreelistType) *freelist {
+ f := &freelist{
+ freelistType: freelistType,
+ allocs: make(map[pgid]txid),
+ pending: make(map[txid]*txPending),
+ cache: make(map[pgid]bool),
+ freemaps: make(map[uint64]pidSet),
+ forwardMap: make(map[pgid]uint64),
+ backwardMap: make(map[pgid]uint64),
+ }
+
+ if freelistType == FreelistMapType {
+ f.allocate = f.hashmapAllocate
+ f.free_count = f.hashmapFreeCount
+ f.mergeSpans = f.hashmapMergeSpans
+ f.getFreePageIDs = f.hashmapGetFreePageIDs
+ f.readIDs = f.hashmapReadIDs
+ } else {
+ f.allocate = f.arrayAllocate
+ f.free_count = f.arrayFreeCount
+ f.mergeSpans = f.arrayMergeSpans
+ f.getFreePageIDs = f.arrayGetFreePageIDs
+ f.readIDs = f.arrayReadIDs
+ }
+
+ return f
+}
+
+// size returns the size of the page after serialization.
+func (f *freelist) size() int {
+ n := f.count()
+ if n >= 0xFFFF {
+ // The first element will be used to store the count. See freelist.write.
+ n++
+ }
+ return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
+}
+
+// count returns count of pages on the freelist
+func (f *freelist) count() int {
+ return f.free_count() + f.pending_count()
+}
+
+// arrayFreeCount returns count of free pages(array version)
+func (f *freelist) arrayFreeCount() int {
+ return len(f.ids)
+}
+
+// pending_count returns count of pending pages
+func (f *freelist) pending_count() int {
+ var count int
+ for _, txp := range f.pending {
+ count += len(txp.ids)
+ }
+ return count
+}
+
+// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
+// f.count returns the minimum length required for dst.
+func (f *freelist) copyall(dst []pgid) {
+ m := make(pgids, 0, f.pending_count())
+ for _, txp := range f.pending {
+ m = append(m, txp.ids...)
+ }
+ sort.Sort(m)
+ mergepgids(dst, f.getFreePageIDs(), m)
+}
+
+// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
+// If a contiguous block cannot be found then 0 is returned.
+func (f *freelist) arrayAllocate(txid txid, n int) pgid {
+ if len(f.ids) == 0 {
+ return 0
+ }
+
+ var initial, previd pgid
+ for i, id := range f.ids {
+ if id <= 1 {
+ panic(fmt.Sprintf("invalid page allocation: %d", id))
+ }
+
+ // Reset initial page if this is not contiguous.
+ if previd == 0 || id-previd != 1 {
+ initial = id
+ }
+
+ // If we found a contiguous block then remove it and return it.
+ if (id-initial)+1 == pgid(n) {
+ // If we're allocating off the beginning then take the fast path
+ // and just adjust the existing slice. This will use extra memory
+ // temporarily but the append() in free() will realloc the slice
+ // as is necessary.
+ if (i + 1) == n {
+ f.ids = f.ids[i+1:]
+ } else {
+ copy(f.ids[i-n+1:], f.ids[i+1:])
+ f.ids = f.ids[:len(f.ids)-n]
+ }
+
+ // Remove from the free cache.
+ for i := pgid(0); i < pgid(n); i++ {
+ delete(f.cache, initial+i)
+ }
+ f.allocs[initial] = txid
+ return initial
+ }
+
+ previd = id
+ }
+ return 0
+}
+
+// free releases a page and its overflow for a given transaction id.
+// If the page is already free then a panic will occur.
+func (f *freelist) free(txid txid, p *page) {
+ if p.id <= 1 {
+ panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
+ }
+
+ // Free page and all its overflow pages.
+ txp := f.pending[txid]
+ if txp == nil {
+ txp = &txPending{}
+ f.pending[txid] = txp
+ }
+ allocTxid, ok := f.allocs[p.id]
+ if ok {
+ delete(f.allocs, p.id)
+ } else if (p.flags & freelistPageFlag) != 0 {
+ // Freelist is always allocated by prior tx.
+ allocTxid = txid - 1
+ }
+
+ for id := p.id; id <= p.id+pgid(p.overflow); id++ {
+ // Verify that page is not already free.
+ if f.cache[id] {
+ panic(fmt.Sprintf("page %d already freed", id))
+ }
+ // Add to the freelist and cache.
+ txp.ids = append(txp.ids, id)
+ txp.alloctx = append(txp.alloctx, allocTxid)
+ f.cache[id] = true
+ }
+}
+
+// release moves all page ids for a transaction id (or older) to the freelist.
+func (f *freelist) release(txid txid) {
+ m := make(pgids, 0)
+ for tid, txp := range f.pending {
+ if tid <= txid {
+ // Move transaction's pending pages to the available freelist.
+ // Don't remove from the cache since the page is still free.
+ m = append(m, txp.ids...)
+ delete(f.pending, tid)
+ }
+ }
+ f.mergeSpans(m)
+}
+
+// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
+func (f *freelist) releaseRange(begin, end txid) {
+ if begin > end {
+ return
+ }
+ var m pgids
+ for tid, txp := range f.pending {
+ if tid < begin || tid > end {
+ continue
+ }
+ // Don't recompute freed pages if ranges haven't updated.
+ if txp.lastReleaseBegin == begin {
+ continue
+ }
+ for i := 0; i < len(txp.ids); i++ {
+ if atx := txp.alloctx[i]; atx < begin || atx > end {
+ continue
+ }
+ m = append(m, txp.ids[i])
+ txp.ids[i] = txp.ids[len(txp.ids)-1]
+ txp.ids = txp.ids[:len(txp.ids)-1]
+ txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
+ txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
+ i--
+ }
+ txp.lastReleaseBegin = begin
+ if len(txp.ids) == 0 {
+ delete(f.pending, tid)
+ }
+ }
+ f.mergeSpans(m)
+}
+
+// rollback removes the pages from a given pending tx.
+func (f *freelist) rollback(txid txid) {
+ // Remove page ids from cache.
+ txp := f.pending[txid]
+ if txp == nil {
+ return
+ }
+ var m pgids
+ for i, pgid := range txp.ids {
+ delete(f.cache, pgid)
+ tx := txp.alloctx[i]
+ if tx == 0 {
+ continue
+ }
+ if tx != txid {
+ // Pending free aborted; restore page back to alloc list.
+ f.allocs[pgid] = tx
+ } else {
+ // Freed page was allocated by this txn; OK to throw away.
+ m = append(m, pgid)
+ }
+ }
+ // Remove pages from pending list and mark as free if allocated by txid.
+ delete(f.pending, txid)
+ f.mergeSpans(m)
+}
+
+// freed returns whether a given page is in the free list.
+func (f *freelist) freed(pgid pgid) bool {
+ return f.cache[pgid]
+}
+
+// read initializes the freelist from a freelist page.
+func (f *freelist) read(p *page) {
+ if (p.flags & freelistPageFlag) == 0 {
+ panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
+ }
+ // If the page.count is at the max uint16 value (64k) then it's considered
+ // an overflow and the size of the freelist is stored as the first element.
+ idx, count := 0, int(p.count)
+ if count == 0xFFFF {
+ idx = 1
+ count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
+ }
+
+ // Copy the list of page ids from the freelist.
+ if count == 0 {
+ f.ids = nil
+ } else {
+ ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
+
+ // copy the ids, so we don't modify on the freelist page directly
+ idsCopy := make([]pgid, count)
+ copy(idsCopy, ids)
+ // Make sure they're sorted.
+ sort.Sort(pgids(idsCopy))
+
+ f.readIDs(idsCopy)
+ }
+}
+
+// arrayReadIDs initializes the freelist from a given list of ids.
+func (f *freelist) arrayReadIDs(ids []pgid) {
+ f.ids = ids
+ f.reindex()
+}
+
+func (f *freelist) arrayGetFreePageIDs() []pgid {
+ return f.ids
+}
+
+// write writes the page ids onto a freelist page. All free and pending ids are
+// saved to disk since in the event of a program crash, all pending ids will
+// become free.
+func (f *freelist) write(p *page) error {
+ // Combine the old free pgids and pgids waiting on an open transaction.
+
+ // Update the header flag.
+ p.flags |= freelistPageFlag
+
+ // The page.count can only hold up to 64k elements so if we overflow that
+ // number then we handle it by putting the size in the first element.
+ lenids := f.count()
+ if lenids == 0 {
+ p.count = uint16(lenids)
+ } else if lenids < 0xFFFF {
+ p.count = uint16(lenids)
+ f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
+ } else {
+ p.count = 0xFFFF
+ ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
+ f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
+ }
+
+ return nil
+}
+
+// reload reads the freelist from a page and filters out pending items.
+func (f *freelist) reload(p *page) {
+ f.read(p)
+
+ // Build a cache of only pending pages.
+ pcache := make(map[pgid]bool)
+ for _, txp := range f.pending {
+ for _, pendingID := range txp.ids {
+ pcache[pendingID] = true
+ }
+ }
+
+ // Check each page in the freelist and build a new available freelist
+ // with any pages not in the pending lists.
+ var a []pgid
+ for _, id := range f.getFreePageIDs() {
+ if !pcache[id] {
+ a = append(a, id)
+ }
+ }
+
+ f.readIDs(a)
+}
+
+// reindex rebuilds the free cache based on available and pending free lists.
+func (f *freelist) reindex() {
+ ids := f.getFreePageIDs()
+ f.cache = make(map[pgid]bool, len(ids))
+ for _, id := range ids {
+ f.cache[id] = true
+ }
+ for _, txp := range f.pending {
+ for _, pendingID := range txp.ids {
+ f.cache[pendingID] = true
+ }
+ }
+}
+
+// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
+func (f *freelist) arrayMergeSpans(ids pgids) {
+ sort.Sort(ids)
+ f.ids = pgids(f.ids).merge(ids)
+}
diff --git a/vendor/github.com/etcd-io/bbolt/freelist_hmap.go b/vendor/github.com/etcd-io/bbolt/freelist_hmap.go
new file mode 100644
index 0000000000..6a03a6c3c8
--- /dev/null
+++ b/vendor/github.com/etcd-io/bbolt/freelist_hmap.go
@@ -0,0 +1,178 @@
+package bbolt
+
+import "sort"
+
+// hashmapFreeCount returns count of free pages(hashmap version)
+func (f *freelist) hashmapFreeCount() int {
+ // use the forwardmap to get the total count
+ count := 0
+ for _, size := range f.forwardMap {
+ count += int(size)
+ }
+ return count
+}
+
+// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend
+func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
+ if n == 0 {
+ return 0
+ }
+
+ // if we have a exact size match just return short path
+ if bm, ok := f.freemaps[uint64(n)]; ok {
+ for pid := range bm {
+ // remove the span
+ f.delSpan(pid, uint64(n))
+
+ f.allocs[pid] = txid
+
+ for i := pgid(0); i < pgid(n); i++ {
+ delete(f.cache, pid+pgid(i))
+ }
+ return pid
+ }
+ }
+
+ // lookup the map to find larger span
+ for size, bm := range f.freemaps {
+ if size < uint64(n) {
+ continue
+ }
+
+ for pid := range bm {
+ // remove the initial
+ f.delSpan(pid, uint64(size))
+
+ f.allocs[pid] = txid
+
+ remain := size - uint64(n)
+
+ // add remain span
+ f.addSpan(pid+pgid(n), remain)
+
+ for i := pgid(0); i < pgid(n); i++ {
+ delete(f.cache, pid+pgid(i))
+ }
+ return pid
+ }
+ }
+
+ return 0
+}
+
+// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version)
+func (f *freelist) hashmapReadIDs(pgids []pgid) {
+ f.init(pgids)
+
+ // Rebuild the page cache.
+ f.reindex()
+}
+
+// hashmapGetFreePageIDs returns the sorted free page ids
+func (f *freelist) hashmapGetFreePageIDs() []pgid {
+ count := f.free_count()
+ if count == 0 {
+ return nil
+ }
+
+ m := make([]pgid, 0, count)
+ for start, size := range f.forwardMap {
+ for i := 0; i < int(size); i++ {
+ m = append(m, start+pgid(i))
+ }
+ }
+ sort.Sort(pgids(m))
+
+ return m
+}
+
+// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans
+func (f *freelist) hashmapMergeSpans(ids pgids) {
+ for _, id := range ids {
+ // try to see if we can merge and update
+ f.mergeWithExistingSpan(id)
+ }
+}
+
+// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
+func (f *freelist) mergeWithExistingSpan(pid pgid) {
+ prev := pid - 1
+ next := pid + 1
+
+ preSize, mergeWithPrev := f.backwardMap[prev]
+ nextSize, mergeWithNext := f.forwardMap[next]
+ newStart := pid
+ newSize := uint64(1)
+
+ if mergeWithPrev {
+ //merge with previous span
+ start := prev + 1 - pgid(preSize)
+ f.delSpan(start, preSize)
+
+ newStart -= pgid(preSize)
+ newSize += preSize
+ }
+
+ if mergeWithNext {
+ // merge with next span
+ f.delSpan(next, nextSize)
+ newSize += nextSize
+ }
+
+ f.addSpan(newStart, newSize)
+}
+
+func (f *freelist) addSpan(start pgid, size uint64) {
+ f.backwardMap[start-1+pgid(size)] = size
+ f.forwardMap[start] = size
+ if _, ok := f.freemaps[size]; !ok {
+ f.freemaps[size] = make(map[pgid]struct{})
+ }
+
+ f.freemaps[size][start] = struct{}{}
+}
+
+func (f *freelist) delSpan(start pgid, size uint64) {
+ delete(f.forwardMap, start)
+ delete(f.backwardMap, start+pgid(size-1))
+ delete(f.freemaps[size], start)
+ if len(f.freemaps[size]) == 0 {
+ delete(f.freemaps, size)
+ }
+}
+
+// initial from pgids using when use hashmap version
+// pgids must be sorted
+func (f *freelist) init(pgids []pgid) {
+ if len(pgids) == 0 {
+ return
+ }
+
+ size := uint64(1)
+ start := pgids[0]
+
+ if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
+ panic("pgids not sorted")
+ }
+
+ f.freemaps = make(map[uint64]pidSet)
+ f.forwardMap = make(map[pgid]uint64)
+ f.backwardMap = make(map[pgid]uint64)
+
+ for i := 1; i < len(pgids); i++ {
+ // continuous page
+ if pgids[i] == pgids[i-1]+1 {
+ size++
+ } else {
+ f.addSpan(start, size)
+
+ size = 1
+ start = pgids[i]
+ }
+ }
+
+ // init the tail
+ if size != 0 && start != 0 {
+ f.addSpan(start, size)
+ }
+}
diff --git a/vendor/github.com/boltdb/bolt/node.go b/vendor/github.com/etcd-io/bbolt/node.go
index 159318b229..6c3fa553ea 100644
--- a/vendor/github.com/boltdb/bolt/node.go
+++ b/vendor/github.com/etcd-io/bbolt/node.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"bytes"
@@ -365,7 +365,7 @@ func (n *node) spill() error {
}
// Allocate contiguous space for the node.
- p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
+ p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
if err != nil {
return err
}
diff --git a/vendor/github.com/boltdb/bolt/page.go b/vendor/github.com/etcd-io/bbolt/page.go
index cde403ae86..bca9615f0f 100644
--- a/vendor/github.com/boltdb/bolt/page.go
+++ b/vendor/github.com/etcd-io/bbolt/page.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"fmt"
diff --git a/vendor/github.com/boltdb/bolt/tx.go b/vendor/github.com/etcd-io/bbolt/tx.go
index 6700308a29..f508641427 100644
--- a/vendor/github.com/boltdb/bolt/tx.go
+++ b/vendor/github.com/etcd-io/bbolt/tx.go
@@ -1,4 +1,4 @@
-package bolt
+package bbolt
import (
"fmt"
@@ -126,10 +126,7 @@ func (tx *Tx) DeleteBucket(name []byte) error {
// the error is returned to the caller.
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
return tx.root.ForEach(func(k, v []byte) error {
- if err := fn(k, tx.root.Bucket(k)); err != nil {
- return err
- }
- return nil
+ return fn(k, tx.root.Bucket(k))
})
}
@@ -169,28 +166,18 @@ func (tx *Tx) Commit() error {
// Free the old root bucket.
tx.meta.root.root = tx.root.root
- opgid := tx.meta.pgid
-
- // Free the freelist and allocate new pages for it. This will overestimate
- // the size of the freelist but not underestimate the size (which would be bad).
- tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
- p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
- if err != nil {
- tx.rollback()
- return err
- }
- if err := tx.db.freelist.write(p); err != nil {
- tx.rollback()
- return err
+ // Free the old freelist because commit writes out a fresh freelist.
+ if tx.meta.freelist != pgidNoFreelist {
+ tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
}
- tx.meta.freelist = p.id
- // If the high water mark has moved up then attempt to grow the database.
- if tx.meta.pgid > opgid {
- if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
- tx.rollback()
+ if !tx.db.NoFreelistSync {
+ err := tx.commitFreelist()
+ if err != nil {
return err
}
+ } else {
+ tx.meta.freelist = pgidNoFreelist
}
// Write dirty pages to disk.
@@ -235,6 +222,31 @@ func (tx *Tx) Commit() error {
return nil
}
+func (tx *Tx) commitFreelist() error {
+ // Allocate new pages for the new free list. This will overestimate
+ // the size of the freelist but not underestimate the size (which would be bad).
+ opgid := tx.meta.pgid
+ p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
+ if err != nil {
+ tx.rollback()
+ return err
+ }
+ if err := tx.db.freelist.write(p); err != nil {
+ tx.rollback()
+ return err
+ }
+ tx.meta.freelist = p.id
+ // If the high water mark has moved up then attempt to grow the database.
+ if tx.meta.pgid > opgid {
+ if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
+ tx.rollback()
+ return err
+ }
+ }
+
+ return nil
+}
+
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
@@ -291,7 +303,9 @@ func (tx *Tx) close() {
}
// Copy writes the entire database to a writer.
-// This function exists for backwards compatibility. Use WriteTo() instead.
+// This function exists for backwards compatibility.
+//
+// Deprecated; Use WriteTo() instead.
func (tx *Tx) Copy(w io.Writer) error {
_, err := tx.WriteTo(w)
return err
@@ -305,7 +319,11 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
if err != nil {
return 0, err
}
- defer func() { _ = f.Close() }()
+ defer func() {
+ if cerr := f.Close(); err == nil {
+ err = cerr
+ }
+ }()
// Generate a meta page. We use the same page data for both meta pages.
buf := make([]byte, tx.db.pageSize)
@@ -333,7 +351,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
}
// Move past the meta pages in the file.
- if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
+ if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
return n, fmt.Errorf("seek: %s", err)
}
@@ -344,7 +362,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
return n, err
}
- return n, f.Close()
+ return n, nil
}
// CopyFile copies the entire database to file at the given path.
@@ -379,6 +397,9 @@ func (tx *Tx) Check() <-chan error {
}
func (tx *Tx) check(ch chan error) {
+ // Force loading free list if opened in ReadOnly mode.
+ tx.db.loadFreelist()
+
// Check if any pages are double freed.
freed := make(map[pgid]bool)
all := make([]pgid, tx.db.freelist.count())
@@ -394,8 +415,10 @@ func (tx *Tx) check(ch chan error) {
reachable := make(map[pgid]*page)
reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1
- for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
- reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
+ if tx.meta.freelist != pgidNoFreelist {
+ for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
+ reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
+ }
}
// Recursively check buckets.
@@ -453,7 +476,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
// allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) {
- p, err := tx.db.allocate(count)
+ p, err := tx.db.allocate(tx.meta.txid, count)
if err != nil {
return nil, err
}
@@ -462,7 +485,7 @@ func (tx *Tx) allocate(count int) (*page, error) {
tx.pages[p.id] = p
// Update statistics.
- tx.stats.PageCount++
+ tx.stats.PageCount += count
tx.stats.PageAlloc += count * tx.db.pageSize
return p, nil