You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

search_conjunction.go 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "math"
  17. "sort"
  18. "github.com/blevesearch/bleve/index"
  19. "github.com/blevesearch/bleve/search"
  20. "github.com/blevesearch/bleve/search/scorer"
  21. )
  22. type ConjunctionSearcher struct {
  23. indexReader index.IndexReader
  24. searchers OrderedSearcherList
  25. queryNorm float64
  26. currs []*search.DocumentMatch
  27. maxIDIdx int
  28. scorer *scorer.ConjunctionQueryScorer
  29. initialized bool
  30. options search.SearcherOptions
  31. }
  32. func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
  33. // build the downstream searchers
  34. searchers := make(OrderedSearcherList, len(qsearchers))
  35. for i, searcher := range qsearchers {
  36. searchers[i] = searcher
  37. }
  38. // sort the searchers
  39. sort.Sort(searchers)
  40. // build our searcher
  41. rv := ConjunctionSearcher{
  42. indexReader: indexReader,
  43. options: options,
  44. searchers: searchers,
  45. currs: make([]*search.DocumentMatch, len(searchers)),
  46. scorer: scorer.NewConjunctionQueryScorer(options),
  47. }
  48. rv.computeQueryNorm()
  49. return &rv, nil
  50. }
  51. func (s *ConjunctionSearcher) computeQueryNorm() {
  52. // first calculate sum of squared weights
  53. sumOfSquaredWeights := 0.0
  54. for _, termSearcher := range s.searchers {
  55. sumOfSquaredWeights += termSearcher.Weight()
  56. }
  57. // now compute query norm from this
  58. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  59. // finally tell all the downstream searchers the norm
  60. for _, termSearcher := range s.searchers {
  61. termSearcher.SetQueryNorm(s.queryNorm)
  62. }
  63. }
  64. func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
  65. var err error
  66. // get all searchers pointing at their first match
  67. for i, termSearcher := range s.searchers {
  68. if s.currs[i] != nil {
  69. ctx.DocumentMatchPool.Put(s.currs[i])
  70. }
  71. s.currs[i], err = termSearcher.Next(ctx)
  72. if err != nil {
  73. return err
  74. }
  75. }
  76. s.initialized = true
  77. return nil
  78. }
  79. func (s *ConjunctionSearcher) Weight() float64 {
  80. var rv float64
  81. for _, searcher := range s.searchers {
  82. rv += searcher.Weight()
  83. }
  84. return rv
  85. }
  86. func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
  87. for _, searcher := range s.searchers {
  88. searcher.SetQueryNorm(qnorm)
  89. }
  90. }
  91. func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
  92. if !s.initialized {
  93. err := s.initSearchers(ctx)
  94. if err != nil {
  95. return nil, err
  96. }
  97. }
  98. var rv *search.DocumentMatch
  99. var err error
  100. OUTER:
  101. for s.currs[s.maxIDIdx] != nil {
  102. maxID := s.currs[s.maxIDIdx].IndexInternalID
  103. i := 0
  104. for i < len(s.currs) {
  105. if s.currs[i] == nil {
  106. return nil, nil
  107. }
  108. if i == s.maxIDIdx {
  109. i++
  110. continue
  111. }
  112. cmp := maxID.Compare(s.currs[i].IndexInternalID)
  113. if cmp == 0 {
  114. i++
  115. continue
  116. }
  117. if cmp < 0 {
  118. // maxID < currs[i], so we found a new maxIDIdx
  119. s.maxIDIdx = i
  120. // advance the positions where [0 <= x < i], since we
  121. // know they were equal to the former max entry
  122. maxID = s.currs[s.maxIDIdx].IndexInternalID
  123. for x := 0; x < i; x++ {
  124. err = s.advanceChild(ctx, x, maxID)
  125. if err != nil {
  126. return nil, err
  127. }
  128. }
  129. continue OUTER
  130. }
  131. // maxID > currs[i], so need to advance searchers[i]
  132. err = s.advanceChild(ctx, i, maxID)
  133. if err != nil {
  134. return nil, err
  135. }
  136. // don't bump i, so that we'll examine the just-advanced
  137. // currs[i] again
  138. }
  139. // if we get here, a doc matched all readers, so score and add it
  140. rv = s.scorer.Score(ctx, s.currs)
  141. // we know all the searchers are pointing at the same thing
  142. // so they all need to be bumped
  143. for i, termSearcher := range s.searchers {
  144. if s.currs[i] != rv {
  145. ctx.DocumentMatchPool.Put(s.currs[i])
  146. }
  147. s.currs[i], err = termSearcher.Next(ctx)
  148. if err != nil {
  149. return nil, err
  150. }
  151. }
  152. // don't continue now, wait for the next call to Next()
  153. break
  154. }
  155. return rv, nil
  156. }
  157. func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
  158. if !s.initialized {
  159. err := s.initSearchers(ctx)
  160. if err != nil {
  161. return nil, err
  162. }
  163. }
  164. for i := range s.searchers {
  165. err := s.advanceChild(ctx, i, ID)
  166. if err != nil {
  167. return nil, err
  168. }
  169. }
  170. return s.Next(ctx)
  171. }
  172. func (s *ConjunctionSearcher) advanceChild(ctx *search.SearchContext, i int, ID index.IndexInternalID) (err error) {
  173. if s.currs[i] != nil {
  174. ctx.DocumentMatchPool.Put(s.currs[i])
  175. }
  176. s.currs[i], err = s.searchers[i].Advance(ctx, ID)
  177. return err
  178. }
  179. func (s *ConjunctionSearcher) Count() uint64 {
  180. // for now return a worst case
  181. var sum uint64
  182. for _, searcher := range s.searchers {
  183. sum += searcher.Count()
  184. }
  185. return sum
  186. }
  187. func (s *ConjunctionSearcher) Close() (rv error) {
  188. for _, searcher := range s.searchers {
  189. err := searcher.Close()
  190. if err != nil && rv == nil {
  191. rv = err
  192. }
  193. }
  194. return rv
  195. }
  196. func (s *ConjunctionSearcher) Min() int {
  197. return 0
  198. }
  199. func (s *ConjunctionSearcher) DocumentMatchPoolSize() int {
  200. rv := len(s.currs)
  201. for _, s := range s.searchers {
  202. rv += s.DocumentMatchPoolSize()
  203. }
  204. return rv
  205. }