Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

search_boolean.go 9.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "math"
  17. "github.com/blevesearch/bleve/index"
  18. "github.com/blevesearch/bleve/search"
  19. "github.com/blevesearch/bleve/search/scorer"
  20. )
  21. type BooleanSearcher struct {
  22. indexReader index.IndexReader
  23. mustSearcher search.Searcher
  24. shouldSearcher search.Searcher
  25. mustNotSearcher search.Searcher
  26. queryNorm float64
  27. currMust *search.DocumentMatch
  28. currShould *search.DocumentMatch
  29. currMustNot *search.DocumentMatch
  30. currentID index.IndexInternalID
  31. min uint64
  32. scorer *scorer.ConjunctionQueryScorer
  33. matches []*search.DocumentMatch
  34. initialized bool
  35. }
  36. func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
  37. // build our searcher
  38. rv := BooleanSearcher{
  39. indexReader: indexReader,
  40. mustSearcher: mustSearcher,
  41. shouldSearcher: shouldSearcher,
  42. mustNotSearcher: mustNotSearcher,
  43. scorer: scorer.NewConjunctionQueryScorer(options),
  44. matches: make([]*search.DocumentMatch, 2),
  45. }
  46. rv.computeQueryNorm()
  47. return &rv, nil
  48. }
  49. func (s *BooleanSearcher) computeQueryNorm() {
  50. // first calculate sum of squared weights
  51. sumOfSquaredWeights := 0.0
  52. if s.mustSearcher != nil {
  53. sumOfSquaredWeights += s.mustSearcher.Weight()
  54. }
  55. if s.shouldSearcher != nil {
  56. sumOfSquaredWeights += s.shouldSearcher.Weight()
  57. }
  58. // now compute query norm from this
  59. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  60. // finally tell all the downstream searchers the norm
  61. if s.mustSearcher != nil {
  62. s.mustSearcher.SetQueryNorm(s.queryNorm)
  63. }
  64. if s.shouldSearcher != nil {
  65. s.shouldSearcher.SetQueryNorm(s.queryNorm)
  66. }
  67. }
  68. func (s *BooleanSearcher) initSearchers(ctx *search.SearchContext) error {
  69. var err error
  70. // get all searchers pointing at their first match
  71. if s.mustSearcher != nil {
  72. if s.currMust != nil {
  73. ctx.DocumentMatchPool.Put(s.currMust)
  74. }
  75. s.currMust, err = s.mustSearcher.Next(ctx)
  76. if err != nil {
  77. return err
  78. }
  79. }
  80. if s.shouldSearcher != nil {
  81. if s.currShould != nil {
  82. ctx.DocumentMatchPool.Put(s.currShould)
  83. }
  84. s.currShould, err = s.shouldSearcher.Next(ctx)
  85. if err != nil {
  86. return err
  87. }
  88. }
  89. if s.mustNotSearcher != nil {
  90. if s.currMustNot != nil {
  91. ctx.DocumentMatchPool.Put(s.currMustNot)
  92. }
  93. s.currMustNot, err = s.mustNotSearcher.Next(ctx)
  94. if err != nil {
  95. return err
  96. }
  97. }
  98. if s.mustSearcher != nil && s.currMust != nil {
  99. s.currentID = s.currMust.IndexInternalID
  100. } else if s.mustSearcher == nil && s.currShould != nil {
  101. s.currentID = s.currShould.IndexInternalID
  102. } else {
  103. s.currentID = nil
  104. }
  105. s.initialized = true
  106. return nil
  107. }
  108. func (s *BooleanSearcher) advanceNextMust(ctx *search.SearchContext, skipReturn *search.DocumentMatch) error {
  109. var err error
  110. if s.mustSearcher != nil {
  111. if s.currMust != skipReturn {
  112. ctx.DocumentMatchPool.Put(s.currMust)
  113. }
  114. s.currMust, err = s.mustSearcher.Next(ctx)
  115. if err != nil {
  116. return err
  117. }
  118. } else {
  119. if s.currShould != skipReturn {
  120. ctx.DocumentMatchPool.Put(s.currShould)
  121. }
  122. s.currShould, err = s.shouldSearcher.Next(ctx)
  123. if err != nil {
  124. return err
  125. }
  126. }
  127. if s.mustSearcher != nil && s.currMust != nil {
  128. s.currentID = s.currMust.IndexInternalID
  129. } else if s.mustSearcher == nil && s.currShould != nil {
  130. s.currentID = s.currShould.IndexInternalID
  131. } else {
  132. s.currentID = nil
  133. }
  134. return nil
  135. }
  136. func (s *BooleanSearcher) Weight() float64 {
  137. var rv float64
  138. if s.mustSearcher != nil {
  139. rv += s.mustSearcher.Weight()
  140. }
  141. if s.shouldSearcher != nil {
  142. rv += s.shouldSearcher.Weight()
  143. }
  144. return rv
  145. }
  146. func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
  147. if s.mustSearcher != nil {
  148. s.mustSearcher.SetQueryNorm(qnorm)
  149. }
  150. if s.shouldSearcher != nil {
  151. s.shouldSearcher.SetQueryNorm(qnorm)
  152. }
  153. }
  154. func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
  155. if !s.initialized {
  156. err := s.initSearchers(ctx)
  157. if err != nil {
  158. return nil, err
  159. }
  160. }
  161. var err error
  162. var rv *search.DocumentMatch
  163. for s.currentID != nil {
  164. if s.currMustNot != nil {
  165. cmp := s.currMustNot.IndexInternalID.Compare(s.currentID)
  166. if cmp < 0 {
  167. ctx.DocumentMatchPool.Put(s.currMustNot)
  168. // advance must not searcher to our candidate entry
  169. s.currMustNot, err = s.mustNotSearcher.Advance(ctx, s.currentID)
  170. if err != nil {
  171. return nil, err
  172. }
  173. if s.currMustNot != nil && s.currMustNot.IndexInternalID.Equals(s.currentID) {
  174. // the candidate is excluded
  175. err = s.advanceNextMust(ctx, nil)
  176. if err != nil {
  177. return nil, err
  178. }
  179. continue
  180. }
  181. } else if cmp == 0 {
  182. // the candidate is excluded
  183. err = s.advanceNextMust(ctx, nil)
  184. if err != nil {
  185. return nil, err
  186. }
  187. continue
  188. }
  189. }
  190. shouldCmpOrNil := 1 // NOTE: shouldCmp will also be 1 when currShould == nil.
  191. if s.currShould != nil {
  192. shouldCmpOrNil = s.currShould.IndexInternalID.Compare(s.currentID)
  193. }
  194. if shouldCmpOrNil < 0 {
  195. ctx.DocumentMatchPool.Put(s.currShould)
  196. // advance should searcher to our candidate entry
  197. s.currShould, err = s.shouldSearcher.Advance(ctx, s.currentID)
  198. if err != nil {
  199. return nil, err
  200. }
  201. if s.currShould != nil && s.currShould.IndexInternalID.Equals(s.currentID) {
  202. // score bonus matches should
  203. var cons []*search.DocumentMatch
  204. if s.currMust != nil {
  205. cons = s.matches
  206. cons[0] = s.currMust
  207. cons[1] = s.currShould
  208. } else {
  209. cons = s.matches[0:1]
  210. cons[0] = s.currShould
  211. }
  212. rv = s.scorer.Score(ctx, cons)
  213. err = s.advanceNextMust(ctx, rv)
  214. if err != nil {
  215. return nil, err
  216. }
  217. break
  218. } else if s.shouldSearcher.Min() == 0 {
  219. // match is OK anyway
  220. cons := s.matches[0:1]
  221. cons[0] = s.currMust
  222. rv = s.scorer.Score(ctx, cons)
  223. err = s.advanceNextMust(ctx, rv)
  224. if err != nil {
  225. return nil, err
  226. }
  227. break
  228. }
  229. } else if shouldCmpOrNil == 0 {
  230. // score bonus matches should
  231. var cons []*search.DocumentMatch
  232. if s.currMust != nil {
  233. cons = s.matches
  234. cons[0] = s.currMust
  235. cons[1] = s.currShould
  236. } else {
  237. cons = s.matches[0:1]
  238. cons[0] = s.currShould
  239. }
  240. rv = s.scorer.Score(ctx, cons)
  241. err = s.advanceNextMust(ctx, rv)
  242. if err != nil {
  243. return nil, err
  244. }
  245. break
  246. } else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
  247. // match is OK anyway
  248. cons := s.matches[0:1]
  249. cons[0] = s.currMust
  250. rv = s.scorer.Score(ctx, cons)
  251. err = s.advanceNextMust(ctx, rv)
  252. if err != nil {
  253. return nil, err
  254. }
  255. break
  256. }
  257. err = s.advanceNextMust(ctx, nil)
  258. if err != nil {
  259. return nil, err
  260. }
  261. }
  262. return rv, nil
  263. }
  264. func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
  265. if !s.initialized {
  266. err := s.initSearchers(ctx)
  267. if err != nil {
  268. return nil, err
  269. }
  270. }
  271. var err error
  272. if s.mustSearcher != nil {
  273. if s.currMust != nil {
  274. ctx.DocumentMatchPool.Put(s.currMust)
  275. }
  276. s.currMust, err = s.mustSearcher.Advance(ctx, ID)
  277. if err != nil {
  278. return nil, err
  279. }
  280. }
  281. if s.shouldSearcher != nil {
  282. if s.currShould != nil {
  283. ctx.DocumentMatchPool.Put(s.currShould)
  284. }
  285. s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
  286. if err != nil {
  287. return nil, err
  288. }
  289. }
  290. if s.mustNotSearcher != nil {
  291. if s.currMustNot != nil {
  292. ctx.DocumentMatchPool.Put(s.currMustNot)
  293. }
  294. s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
  295. if err != nil {
  296. return nil, err
  297. }
  298. }
  299. if s.mustSearcher != nil && s.currMust != nil {
  300. s.currentID = s.currMust.IndexInternalID
  301. } else if s.mustSearcher == nil && s.currShould != nil {
  302. s.currentID = s.currShould.IndexInternalID
  303. } else {
  304. s.currentID = nil
  305. }
  306. return s.Next(ctx)
  307. }
  308. func (s *BooleanSearcher) Count() uint64 {
  309. // for now return a worst case
  310. var sum uint64
  311. if s.mustSearcher != nil {
  312. sum += s.mustSearcher.Count()
  313. }
  314. if s.shouldSearcher != nil {
  315. sum += s.shouldSearcher.Count()
  316. }
  317. return sum
  318. }
  319. func (s *BooleanSearcher) Close() error {
  320. var err0, err1, err2 error
  321. if s.mustSearcher != nil {
  322. err0 = s.mustSearcher.Close()
  323. }
  324. if s.shouldSearcher != nil {
  325. err1 = s.shouldSearcher.Close()
  326. }
  327. if s.mustNotSearcher != nil {
  328. err2 = s.mustNotSearcher.Close()
  329. }
  330. if err0 != nil {
  331. return err0
  332. }
  333. if err1 != nil {
  334. return err1
  335. }
  336. if err2 != nil {
  337. return err2
  338. }
  339. return nil
  340. }
  341. func (s *BooleanSearcher) Min() int {
  342. return 0
  343. }
  344. func (s *BooleanSearcher) DocumentMatchPoolSize() int {
  345. rv := 3
  346. if s.mustSearcher != nil {
  347. rv += s.mustSearcher.DocumentMatchPoolSize()
  348. }
  349. if s.shouldSearcher != nil {
  350. rv += s.shouldSearcher.DocumentMatchPoolSize()
  351. }
  352. if s.mustNotSearcher != nil {
  353. rv += s.mustNotSearcher.DocumentMatchPoolSize()
  354. }
  355. return rv
  356. }