You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

search_disjunction.go 6.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "fmt"
  17. "math"
  18. "sort"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/search"
  21. "github.com/blevesearch/bleve/search/scorer"
  22. )
  23. // DisjunctionMaxClauseCount is a compile time setting that applications can
  24. // adjust to non-zero value to cause the DisjunctionSearcher to return an
  25. // error instead of exeucting searches when the size exceeds this value.
  26. var DisjunctionMaxClauseCount = 0
  27. type DisjunctionSearcher struct {
  28. indexReader index.IndexReader
  29. searchers OrderedSearcherList
  30. numSearchers int
  31. queryNorm float64
  32. currs []*search.DocumentMatch
  33. scorer *scorer.DisjunctionQueryScorer
  34. min int
  35. matching []*search.DocumentMatch
  36. matchingIdxs []int
  37. initialized bool
  38. }
  39. func tooManyClauses(count int) bool {
  40. if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
  41. return true
  42. }
  43. return false
  44. }
  45. func tooManyClausesErr() error {
  46. return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
  47. DisjunctionMaxClauseCount)
  48. }
  49. func NewDisjunctionSearcher(indexReader index.IndexReader,
  50. qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
  51. *DisjunctionSearcher, error) {
  52. return newDisjunctionSearcher(indexReader, qsearchers, min, options,
  53. true)
  54. }
  55. func newDisjunctionSearcher(indexReader index.IndexReader,
  56. qsearchers []search.Searcher, min float64, options search.SearcherOptions,
  57. limit bool) (
  58. *DisjunctionSearcher, error) {
  59. if limit && tooManyClauses(len(qsearchers)) {
  60. return nil, tooManyClausesErr()
  61. }
  62. // build the downstream searchers
  63. searchers := make(OrderedSearcherList, len(qsearchers))
  64. for i, searcher := range qsearchers {
  65. searchers[i] = searcher
  66. }
  67. // sort the searchers
  68. sort.Sort(sort.Reverse(searchers))
  69. // build our searcher
  70. rv := DisjunctionSearcher{
  71. indexReader: indexReader,
  72. searchers: searchers,
  73. numSearchers: len(searchers),
  74. currs: make([]*search.DocumentMatch, len(searchers)),
  75. scorer: scorer.NewDisjunctionQueryScorer(options),
  76. min: int(min),
  77. matching: make([]*search.DocumentMatch, len(searchers)),
  78. matchingIdxs: make([]int, len(searchers)),
  79. }
  80. rv.computeQueryNorm()
  81. return &rv, nil
  82. }
  83. func (s *DisjunctionSearcher) computeQueryNorm() {
  84. // first calculate sum of squared weights
  85. sumOfSquaredWeights := 0.0
  86. for _, termSearcher := range s.searchers {
  87. sumOfSquaredWeights += termSearcher.Weight()
  88. }
  89. // now compute query norm from this
  90. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  91. // finally tell all the downstream searchers the norm
  92. for _, termSearcher := range s.searchers {
  93. termSearcher.SetQueryNorm(s.queryNorm)
  94. }
  95. }
  96. func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
  97. var err error
  98. // get all searchers pointing at their first match
  99. for i, termSearcher := range s.searchers {
  100. if s.currs[i] != nil {
  101. ctx.DocumentMatchPool.Put(s.currs[i])
  102. }
  103. s.currs[i], err = termSearcher.Next(ctx)
  104. if err != nil {
  105. return err
  106. }
  107. }
  108. err = s.updateMatches()
  109. if err != nil {
  110. return err
  111. }
  112. s.initialized = true
  113. return nil
  114. }
  115. func (s *DisjunctionSearcher) updateMatches() error {
  116. matching := s.matching[:0]
  117. matchingIdxs := s.matchingIdxs[:0]
  118. for i := 0; i < len(s.currs); i++ {
  119. curr := s.currs[i]
  120. if curr == nil {
  121. continue
  122. }
  123. if len(matching) > 0 {
  124. cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
  125. if cmp > 0 {
  126. continue
  127. }
  128. if cmp < 0 {
  129. matching = matching[:0]
  130. matchingIdxs = matchingIdxs[:0]
  131. }
  132. }
  133. matching = append(matching, curr)
  134. matchingIdxs = append(matchingIdxs, i)
  135. }
  136. s.matching = matching
  137. s.matchingIdxs = matchingIdxs
  138. return nil
  139. }
  140. func (s *DisjunctionSearcher) Weight() float64 {
  141. var rv float64
  142. for _, searcher := range s.searchers {
  143. rv += searcher.Weight()
  144. }
  145. return rv
  146. }
  147. func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
  148. for _, searcher := range s.searchers {
  149. searcher.SetQueryNorm(qnorm)
  150. }
  151. }
  152. func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
  153. *search.DocumentMatch, error) {
  154. if !s.initialized {
  155. err := s.initSearchers(ctx)
  156. if err != nil {
  157. return nil, err
  158. }
  159. }
  160. var err error
  161. var rv *search.DocumentMatch
  162. found := false
  163. for !found && len(s.matching) > 0 {
  164. if len(s.matching) >= s.min {
  165. found = true
  166. // score this match
  167. rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
  168. }
  169. // invoke next on all the matching searchers
  170. for _, i := range s.matchingIdxs {
  171. searcher := s.searchers[i]
  172. if s.currs[i] != rv {
  173. ctx.DocumentMatchPool.Put(s.currs[i])
  174. }
  175. s.currs[i], err = searcher.Next(ctx)
  176. if err != nil {
  177. return nil, err
  178. }
  179. }
  180. err = s.updateMatches()
  181. if err != nil {
  182. return nil, err
  183. }
  184. }
  185. return rv, nil
  186. }
  187. func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
  188. ID index.IndexInternalID) (*search.DocumentMatch, error) {
  189. if !s.initialized {
  190. err := s.initSearchers(ctx)
  191. if err != nil {
  192. return nil, err
  193. }
  194. }
  195. // get all searchers pointing at their first match
  196. var err error
  197. for i, termSearcher := range s.searchers {
  198. if s.currs[i] != nil {
  199. ctx.DocumentMatchPool.Put(s.currs[i])
  200. }
  201. s.currs[i], err = termSearcher.Advance(ctx, ID)
  202. if err != nil {
  203. return nil, err
  204. }
  205. }
  206. err = s.updateMatches()
  207. if err != nil {
  208. return nil, err
  209. }
  210. return s.Next(ctx)
  211. }
  212. func (s *DisjunctionSearcher) Count() uint64 {
  213. // for now return a worst case
  214. var sum uint64
  215. for _, searcher := range s.searchers {
  216. sum += searcher.Count()
  217. }
  218. return sum
  219. }
  220. func (s *DisjunctionSearcher) Close() (rv error) {
  221. for _, searcher := range s.searchers {
  222. err := searcher.Close()
  223. if err != nil && rv == nil {
  224. rv = err
  225. }
  226. }
  227. return rv
  228. }
  229. func (s *DisjunctionSearcher) Min() int {
  230. return s.min
  231. }
  232. func (s *DisjunctionSearcher) DocumentMatchPoolSize() int {
  233. rv := len(s.currs)
  234. for _, s := range s.searchers {
  235. rv += s.DocumentMatchPoolSize()
  236. }
  237. return rv
  238. }