123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- // Copyright (c) 2014 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package searcher
-
- import (
- "fmt"
- "math"
- "sort"
-
- "github.com/blevesearch/bleve/index"
- "github.com/blevesearch/bleve/search"
- "github.com/blevesearch/bleve/search/scorer"
- )
-
- // DisjunctionMaxClauseCount is a compile time setting that applications can
- // adjust to non-zero value to cause the DisjunctionSearcher to return an
- // error instead of exeucting searches when the size exceeds this value.
- var DisjunctionMaxClauseCount = 0
-
- type DisjunctionSearcher struct {
- indexReader index.IndexReader
- searchers OrderedSearcherList
- numSearchers int
- queryNorm float64
- currs []*search.DocumentMatch
- scorer *scorer.DisjunctionQueryScorer
- min int
- matching []*search.DocumentMatch
- matchingIdxs []int
- initialized bool
- }
-
- func tooManyClauses(count int) bool {
- if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
- return true
- }
- return false
- }
-
- func tooManyClausesErr() error {
- return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
- DisjunctionMaxClauseCount)
- }
-
- func NewDisjunctionSearcher(indexReader index.IndexReader,
- qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
- *DisjunctionSearcher, error) {
- return newDisjunctionSearcher(indexReader, qsearchers, min, options,
- true)
- }
-
- func newDisjunctionSearcher(indexReader index.IndexReader,
- qsearchers []search.Searcher, min float64, options search.SearcherOptions,
- limit bool) (
- *DisjunctionSearcher, error) {
- if limit && tooManyClauses(len(qsearchers)) {
- return nil, tooManyClausesErr()
- }
- // build the downstream searchers
- searchers := make(OrderedSearcherList, len(qsearchers))
- for i, searcher := range qsearchers {
- searchers[i] = searcher
- }
- // sort the searchers
- sort.Sort(sort.Reverse(searchers))
- // build our searcher
- rv := DisjunctionSearcher{
- indexReader: indexReader,
- searchers: searchers,
- numSearchers: len(searchers),
- currs: make([]*search.DocumentMatch, len(searchers)),
- scorer: scorer.NewDisjunctionQueryScorer(options),
- min: int(min),
- matching: make([]*search.DocumentMatch, len(searchers)),
- matchingIdxs: make([]int, len(searchers)),
- }
- rv.computeQueryNorm()
- return &rv, nil
- }
-
- func (s *DisjunctionSearcher) computeQueryNorm() {
- // first calculate sum of squared weights
- sumOfSquaredWeights := 0.0
- for _, termSearcher := range s.searchers {
- sumOfSquaredWeights += termSearcher.Weight()
- }
- // now compute query norm from this
- s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
- // finally tell all the downstream searchers the norm
- for _, termSearcher := range s.searchers {
- termSearcher.SetQueryNorm(s.queryNorm)
- }
- }
-
- func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
- var err error
- // get all searchers pointing at their first match
- for i, termSearcher := range s.searchers {
- if s.currs[i] != nil {
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = termSearcher.Next(ctx)
- if err != nil {
- return err
- }
- }
-
- err = s.updateMatches()
- if err != nil {
- return err
- }
-
- s.initialized = true
- return nil
- }
-
- func (s *DisjunctionSearcher) updateMatches() error {
- matching := s.matching[:0]
- matchingIdxs := s.matchingIdxs[:0]
-
- for i := 0; i < len(s.currs); i++ {
- curr := s.currs[i]
- if curr == nil {
- continue
- }
-
- if len(matching) > 0 {
- cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
- if cmp > 0 {
- continue
- }
-
- if cmp < 0 {
- matching = matching[:0]
- matchingIdxs = matchingIdxs[:0]
- }
- }
-
- matching = append(matching, curr)
- matchingIdxs = append(matchingIdxs, i)
- }
-
- s.matching = matching
- s.matchingIdxs = matchingIdxs
-
- return nil
- }
-
- func (s *DisjunctionSearcher) Weight() float64 {
- var rv float64
- for _, searcher := range s.searchers {
- rv += searcher.Weight()
- }
- return rv
- }
-
- func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
- for _, searcher := range s.searchers {
- searcher.SetQueryNorm(qnorm)
- }
- }
-
- func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
- *search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
- var err error
- var rv *search.DocumentMatch
-
- found := false
- for !found && len(s.matching) > 0 {
- if len(s.matching) >= s.min {
- found = true
- // score this match
- rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
- }
-
- // invoke next on all the matching searchers
- for _, i := range s.matchingIdxs {
- searcher := s.searchers[i]
- if s.currs[i] != rv {
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = searcher.Next(ctx)
- if err != nil {
- return nil, err
- }
- }
-
- err = s.updateMatches()
- if err != nil {
- return nil, err
- }
- }
- return rv, nil
- }
-
- func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
- ID index.IndexInternalID) (*search.DocumentMatch, error) {
- if !s.initialized {
- err := s.initSearchers(ctx)
- if err != nil {
- return nil, err
- }
- }
- // get all searchers pointing at their first match
- var err error
- for i, termSearcher := range s.searchers {
- if s.currs[i] != nil {
- ctx.DocumentMatchPool.Put(s.currs[i])
- }
- s.currs[i], err = termSearcher.Advance(ctx, ID)
- if err != nil {
- return nil, err
- }
- }
-
- err = s.updateMatches()
- if err != nil {
- return nil, err
- }
-
- return s.Next(ctx)
- }
-
- func (s *DisjunctionSearcher) Count() uint64 {
- // for now return a worst case
- var sum uint64
- for _, searcher := range s.searchers {
- sum += searcher.Count()
- }
- return sum
- }
-
- func (s *DisjunctionSearcher) Close() (rv error) {
- for _, searcher := range s.searchers {
- err := searcher.Close()
- if err != nil && rv == nil {
- rv = err
- }
- }
- return rv
- }
-
- func (s *DisjunctionSearcher) Min() int {
- return s.min
- }
-
- func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
- rv := len(s.currs)
- for _, s := range s.searchers {
- rv += s.DocumentMatchPoolSize()
- }
- return rv
- }
|