You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

search.go 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "reflect"
  19. "sort"
  20. "time"
  21. "github.com/blevesearch/bleve/v2/analysis"
  22. "github.com/blevesearch/bleve/v2/analysis/datetime/optional"
  23. "github.com/blevesearch/bleve/v2/document"
  24. "github.com/blevesearch/bleve/v2/registry"
  25. "github.com/blevesearch/bleve/v2/search"
  26. "github.com/blevesearch/bleve/v2/search/collector"
  27. "github.com/blevesearch/bleve/v2/search/query"
  28. "github.com/blevesearch/bleve/v2/size"
  29. )
  30. var reflectStaticSizeSearchResult int
  31. var reflectStaticSizeSearchStatus int
  32. func init() {
  33. var sr SearchResult
  34. reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
  35. var ss SearchStatus
  36. reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
  37. }
  38. var cache = registry.NewCache()
  39. const defaultDateTimeParser = optional.Name
  40. type numericRange struct {
  41. Name string `json:"name,omitempty"`
  42. Min *float64 `json:"min,omitempty"`
  43. Max *float64 `json:"max,omitempty"`
  44. }
  45. type dateTimeRange struct {
  46. Name string `json:"name,omitempty"`
  47. Start time.Time `json:"start,omitempty"`
  48. End time.Time `json:"end,omitempty"`
  49. startString *string
  50. endString *string
  51. }
  52. func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time) {
  53. start = dr.Start
  54. if dr.Start.IsZero() && dr.startString != nil {
  55. s, err := dateTimeParser.ParseDateTime(*dr.startString)
  56. if err == nil {
  57. start = s
  58. }
  59. }
  60. end = dr.End
  61. if dr.End.IsZero() && dr.endString != nil {
  62. e, err := dateTimeParser.ParseDateTime(*dr.endString)
  63. if err == nil {
  64. end = e
  65. }
  66. }
  67. return start, end
  68. }
  69. func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
  70. var temp struct {
  71. Name string `json:"name,omitempty"`
  72. Start *string `json:"start,omitempty"`
  73. End *string `json:"end,omitempty"`
  74. }
  75. err := json.Unmarshal(input, &temp)
  76. if err != nil {
  77. return err
  78. }
  79. dr.Name = temp.Name
  80. if temp.Start != nil {
  81. dr.startString = temp.Start
  82. }
  83. if temp.End != nil {
  84. dr.endString = temp.End
  85. }
  86. return nil
  87. }
  88. func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
  89. rv := map[string]interface{}{
  90. "name": dr.Name,
  91. "start": dr.Start,
  92. "end": dr.End,
  93. }
  94. if dr.Start.IsZero() && dr.startString != nil {
  95. rv["start"] = dr.startString
  96. }
  97. if dr.End.IsZero() && dr.endString != nil {
  98. rv["end"] = dr.endString
  99. }
  100. return json.Marshal(rv)
  101. }
  102. // A FacetRequest describes a facet or aggregation
  103. // of the result document set you would like to be
  104. // built.
  105. type FacetRequest struct {
  106. Size int `json:"size"`
  107. Field string `json:"field"`
  108. NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
  109. DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
  110. }
  111. func (fr *FacetRequest) Validate() error {
  112. nrCount := len(fr.NumericRanges)
  113. drCount := len(fr.DateTimeRanges)
  114. if nrCount > 0 && drCount > 0 {
  115. return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
  116. }
  117. if nrCount > 0 {
  118. nrNames := map[string]interface{}{}
  119. for _, nr := range fr.NumericRanges {
  120. if _, ok := nrNames[nr.Name]; ok {
  121. return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
  122. }
  123. nrNames[nr.Name] = struct{}{}
  124. if nr.Min == nil && nr.Max == nil {
  125. return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
  126. }
  127. }
  128. } else {
  129. dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
  130. if err != nil {
  131. return err
  132. }
  133. drNames := map[string]interface{}{}
  134. for _, dr := range fr.DateTimeRanges {
  135. if _, ok := drNames[dr.Name]; ok {
  136. return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
  137. }
  138. drNames[dr.Name] = struct{}{}
  139. start, end := dr.ParseDates(dateTimeParser)
  140. if start.IsZero() && end.IsZero() {
  141. return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
  142. }
  143. }
  144. }
  145. return nil
  146. }
  147. // NewFacetRequest creates a facet on the specified
  148. // field that limits the number of entries to the
  149. // specified size.
  150. func NewFacetRequest(field string, size int) *FacetRequest {
  151. return &FacetRequest{
  152. Field: field,
  153. Size: size,
  154. }
  155. }
  156. // AddDateTimeRange adds a bucket to a field
  157. // containing date values. Documents with a
  158. // date value falling into this range are tabulated
  159. // as part of this bucket/range.
  160. func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
  161. if fr.DateTimeRanges == nil {
  162. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  163. }
  164. fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
  165. }
  166. // AddDateTimeRangeString adds a bucket to a field
  167. // containing date values.
  168. func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
  169. if fr.DateTimeRanges == nil {
  170. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  171. }
  172. fr.DateTimeRanges = append(fr.DateTimeRanges,
  173. &dateTimeRange{Name: name, startString: start, endString: end})
  174. }
  175. // AddNumericRange adds a bucket to a field
  176. // containing numeric values. Documents with a
  177. // numeric value falling into this range are
  178. // tabulated as part of this bucket/range.
  179. func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
  180. if fr.NumericRanges == nil {
  181. fr.NumericRanges = make([]*numericRange, 0, 1)
  182. }
  183. fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
  184. }
  185. // FacetsRequest groups together all the
  186. // FacetRequest objects for a single query.
  187. type FacetsRequest map[string]*FacetRequest
  188. func (fr FacetsRequest) Validate() error {
  189. for _, v := range fr {
  190. err := v.Validate()
  191. if err != nil {
  192. return err
  193. }
  194. }
  195. return nil
  196. }
  197. // HighlightRequest describes how field matches
  198. // should be highlighted.
  199. type HighlightRequest struct {
  200. Style *string `json:"style"`
  201. Fields []string `json:"fields"`
  202. }
  203. // NewHighlight creates a default
  204. // HighlightRequest.
  205. func NewHighlight() *HighlightRequest {
  206. return &HighlightRequest{}
  207. }
  208. // NewHighlightWithStyle creates a HighlightRequest
  209. // with an alternate style.
  210. func NewHighlightWithStyle(style string) *HighlightRequest {
  211. return &HighlightRequest{
  212. Style: &style,
  213. }
  214. }
  215. func (h *HighlightRequest) AddField(field string) {
  216. if h.Fields == nil {
  217. h.Fields = make([]string, 0, 1)
  218. }
  219. h.Fields = append(h.Fields, field)
  220. }
  221. // A SearchRequest describes all the parameters
  222. // needed to search the index.
  223. // Query is required.
  224. // Size/From describe how much and which part of the
  225. // result set to return.
  226. // Highlight describes optional search result
  227. // highlighting.
  228. // Fields describes a list of field values which
  229. // should be retrieved for result documents, provided they
  230. // were stored while indexing.
  231. // Facets describe the set of facets to be computed.
  232. // Explain triggers inclusion of additional search
  233. // result score explanations.
  234. // Sort describes the desired order for the results to be returned.
  235. // Score controls the kind of scoring performed
  236. // SearchAfter supports deep paging by providing a minimum sort key
  237. // SearchBefore supports deep paging by providing a maximum sort key
  238. // sortFunc specifies the sort implementation to use for sorting results.
  239. //
  240. // A special field named "*" can be used to return all fields.
  241. type SearchRequest struct {
  242. Query query.Query `json:"query"`
  243. Size int `json:"size"`
  244. From int `json:"from"`
  245. Highlight *HighlightRequest `json:"highlight"`
  246. Fields []string `json:"fields"`
  247. Facets FacetsRequest `json:"facets"`
  248. Explain bool `json:"explain"`
  249. Sort search.SortOrder `json:"sort"`
  250. IncludeLocations bool `json:"includeLocations"`
  251. Score string `json:"score,omitempty"`
  252. SearchAfter []string `json:"search_after"`
  253. SearchBefore []string `json:"search_before"`
  254. sortFunc func(sort.Interface)
  255. }
  256. func (r *SearchRequest) Validate() error {
  257. if srq, ok := r.Query.(query.ValidatableQuery); ok {
  258. err := srq.Validate()
  259. if err != nil {
  260. return err
  261. }
  262. }
  263. if r.SearchAfter != nil && r.SearchBefore != nil {
  264. return fmt.Errorf("cannot use search after and search before together")
  265. }
  266. if r.SearchAfter != nil {
  267. if r.From != 0 {
  268. return fmt.Errorf("cannot use search after with from !=0")
  269. }
  270. if len(r.SearchAfter) != len(r.Sort) {
  271. return fmt.Errorf("search after must have same size as sort order")
  272. }
  273. }
  274. if r.SearchBefore != nil {
  275. if r.From != 0 {
  276. return fmt.Errorf("cannot use search before with from !=0")
  277. }
  278. if len(r.SearchBefore) != len(r.Sort) {
  279. return fmt.Errorf("search before must have same size as sort order")
  280. }
  281. }
  282. return r.Facets.Validate()
  283. }
  284. // AddFacet adds a FacetRequest to this SearchRequest
  285. func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
  286. if r.Facets == nil {
  287. r.Facets = make(FacetsRequest, 1)
  288. }
  289. r.Facets[facetName] = f
  290. }
  291. // SortBy changes the request to use the requested sort order
  292. // this form uses the simplified syntax with an array of strings
  293. // each string can either be a field name
  294. // or the magic value _id and _score which refer to the doc id and search score
  295. // any of these values can optionally be prefixed with - to reverse the order
  296. func (r *SearchRequest) SortBy(order []string) {
  297. so := search.ParseSortOrderStrings(order)
  298. r.Sort = so
  299. }
  300. // SortByCustom changes the request to use the requested sort order
  301. func (r *SearchRequest) SortByCustom(order search.SortOrder) {
  302. r.Sort = order
  303. }
  304. // SetSearchAfter sets the request to skip over hits with a sort
  305. // value less than the provided sort after key
  306. func (r *SearchRequest) SetSearchAfter(after []string) {
  307. r.SearchAfter = after
  308. }
  309. // SetSearchBefore sets the request to skip over hits with a sort
  310. // value greater than the provided sort before key
  311. func (r *SearchRequest) SetSearchBefore(before []string) {
  312. r.SearchBefore = before
  313. }
  314. // UnmarshalJSON deserializes a JSON representation of
  315. // a SearchRequest
  316. func (r *SearchRequest) UnmarshalJSON(input []byte) error {
  317. var temp struct {
  318. Q json.RawMessage `json:"query"`
  319. Size *int `json:"size"`
  320. From int `json:"from"`
  321. Highlight *HighlightRequest `json:"highlight"`
  322. Fields []string `json:"fields"`
  323. Facets FacetsRequest `json:"facets"`
  324. Explain bool `json:"explain"`
  325. Sort []json.RawMessage `json:"sort"`
  326. IncludeLocations bool `json:"includeLocations"`
  327. Score string `json:"score"`
  328. SearchAfter []string `json:"search_after"`
  329. SearchBefore []string `json:"search_before"`
  330. }
  331. err := json.Unmarshal(input, &temp)
  332. if err != nil {
  333. return err
  334. }
  335. if temp.Size == nil {
  336. r.Size = 10
  337. } else {
  338. r.Size = *temp.Size
  339. }
  340. if temp.Sort == nil {
  341. r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
  342. } else {
  343. r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
  344. if err != nil {
  345. return err
  346. }
  347. }
  348. r.From = temp.From
  349. r.Explain = temp.Explain
  350. r.Highlight = temp.Highlight
  351. r.Fields = temp.Fields
  352. r.Facets = temp.Facets
  353. r.IncludeLocations = temp.IncludeLocations
  354. r.Score = temp.Score
  355. r.SearchAfter = temp.SearchAfter
  356. r.SearchBefore = temp.SearchBefore
  357. r.Query, err = query.ParseQuery(temp.Q)
  358. if err != nil {
  359. return err
  360. }
  361. if r.Size < 0 {
  362. r.Size = 10
  363. }
  364. if r.From < 0 {
  365. r.From = 0
  366. }
  367. return nil
  368. }
  369. // NewSearchRequest creates a new SearchRequest
  370. // for the Query, using default values for all
  371. // other search parameters.
  372. func NewSearchRequest(q query.Query) *SearchRequest {
  373. return NewSearchRequestOptions(q, 10, 0, false)
  374. }
  375. // NewSearchRequestOptions creates a new SearchRequest
  376. // for the Query, with the requested size, from
  377. // and explanation search parameters.
  378. // By default results are ordered by score, descending.
  379. func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
  380. return &SearchRequest{
  381. Query: q,
  382. Size: size,
  383. From: from,
  384. Explain: explain,
  385. Sort: search.SortOrder{&search.SortScore{Desc: true}},
  386. }
  387. }
  388. // IndexErrMap tracks errors with the name of the index where it occurred
  389. type IndexErrMap map[string]error
  390. // MarshalJSON seralizes the error into a string for JSON consumption
  391. func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
  392. tmp := make(map[string]string, len(iem))
  393. for k, v := range iem {
  394. tmp[k] = v.Error()
  395. }
  396. return json.Marshal(tmp)
  397. }
  398. func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
  399. var tmp map[string]string
  400. err := json.Unmarshal(data, &tmp)
  401. if err != nil {
  402. return err
  403. }
  404. for k, v := range tmp {
  405. iem[k] = fmt.Errorf("%s", v)
  406. }
  407. return nil
  408. }
  409. // SearchStatus is a secion in the SearchResult reporting how many
  410. // underlying indexes were queried, how many were successful/failed
  411. // and a map of any errors that were encountered
  412. type SearchStatus struct {
  413. Total int `json:"total"`
  414. Failed int `json:"failed"`
  415. Successful int `json:"successful"`
  416. Errors IndexErrMap `json:"errors,omitempty"`
  417. }
  418. // Merge will merge together multiple SearchStatuses during a MultiSearch
  419. func (ss *SearchStatus) Merge(other *SearchStatus) {
  420. ss.Total += other.Total
  421. ss.Failed += other.Failed
  422. ss.Successful += other.Successful
  423. if len(other.Errors) > 0 {
  424. if ss.Errors == nil {
  425. ss.Errors = make(map[string]error)
  426. }
  427. for otherIndex, otherError := range other.Errors {
  428. ss.Errors[otherIndex] = otherError
  429. }
  430. }
  431. }
  432. // A SearchResult describes the results of executing
  433. // a SearchRequest.
  434. type SearchResult struct {
  435. Status *SearchStatus `json:"status"`
  436. Request *SearchRequest `json:"request"`
  437. Hits search.DocumentMatchCollection `json:"hits"`
  438. Total uint64 `json:"total_hits"`
  439. MaxScore float64 `json:"max_score"`
  440. Took time.Duration `json:"took"`
  441. Facets search.FacetResults `json:"facets"`
  442. }
  443. func (sr *SearchResult) Size() int {
  444. sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
  445. reflectStaticSizeSearchStatus
  446. for _, entry := range sr.Hits {
  447. if entry != nil {
  448. sizeInBytes += entry.Size()
  449. }
  450. }
  451. for k, v := range sr.Facets {
  452. sizeInBytes += size.SizeOfString + len(k) +
  453. v.Size()
  454. }
  455. return sizeInBytes
  456. }
  457. func (sr *SearchResult) String() string {
  458. rv := ""
  459. if sr.Total > 0 {
  460. if sr.Request.Size > 0 {
  461. rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
  462. for i, hit := range sr.Hits {
  463. rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
  464. for fragmentField, fragments := range hit.Fragments {
  465. rv += fmt.Sprintf("\t%s\n", fragmentField)
  466. for _, fragment := range fragments {
  467. rv += fmt.Sprintf("\t\t%s\n", fragment)
  468. }
  469. }
  470. for otherFieldName, otherFieldValue := range hit.Fields {
  471. if _, ok := hit.Fragments[otherFieldName]; !ok {
  472. rv += fmt.Sprintf("\t%s\n", otherFieldName)
  473. rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
  474. }
  475. }
  476. }
  477. } else {
  478. rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
  479. }
  480. } else {
  481. rv = "No matches"
  482. }
  483. if len(sr.Facets) > 0 {
  484. rv += fmt.Sprintf("Facets:\n")
  485. for fn, f := range sr.Facets {
  486. rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
  487. for _, t := range f.Terms {
  488. rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
  489. }
  490. if f.Other != 0 {
  491. rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
  492. }
  493. }
  494. }
  495. return rv
  496. }
  497. // Merge will merge together multiple SearchResults during a MultiSearch
  498. func (sr *SearchResult) Merge(other *SearchResult) {
  499. sr.Status.Merge(other.Status)
  500. sr.Hits = append(sr.Hits, other.Hits...)
  501. sr.Total += other.Total
  502. if other.MaxScore > sr.MaxScore {
  503. sr.MaxScore = other.MaxScore
  504. }
  505. if sr.Facets == nil && len(other.Facets) != 0 {
  506. sr.Facets = other.Facets
  507. return
  508. }
  509. sr.Facets.Merge(other.Facets)
  510. }
  511. // MemoryNeededForSearchResult is an exported helper function to determine the RAM
  512. // needed to accommodate the results for a given search request.
  513. func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
  514. if req == nil {
  515. return 0
  516. }
  517. numDocMatches := req.Size + req.From
  518. if req.Size+req.From > collector.PreAllocSizeSkipCap {
  519. numDocMatches = collector.PreAllocSizeSkipCap
  520. }
  521. estimate := 0
  522. // overhead from the SearchResult structure
  523. var sr SearchResult
  524. estimate += sr.Size()
  525. var dm search.DocumentMatch
  526. sizeOfDocumentMatch := dm.Size()
  527. // overhead from results
  528. estimate += numDocMatches * sizeOfDocumentMatch
  529. // overhead from facet results
  530. if req.Facets != nil {
  531. var fr search.FacetResult
  532. estimate += len(req.Facets) * fr.Size()
  533. }
  534. // overhead from fields, highlighting
  535. var d document.Document
  536. if len(req.Fields) > 0 || req.Highlight != nil {
  537. numDocsApplicable := req.Size
  538. if numDocsApplicable > collector.PreAllocSizeSkipCap {
  539. numDocsApplicable = collector.PreAllocSizeSkipCap
  540. }
  541. estimate += numDocsApplicable * d.Size()
  542. }
  543. return uint64(estimate)
  544. }
  545. // SetSortFunc sets the sort implementation to use when sorting hits.
  546. //
  547. // SearchRequests can specify a custom sort implementation to meet
  548. // their needs. For instance, by specifying a parallel sort
  549. // that uses all available cores.
  550. func (r *SearchRequest) SetSortFunc(s func(sort.Interface)) {
  551. r.sortFunc = s
  552. }
  553. // SortFunc returns the sort implementation to use when sorting hits.
  554. // Defaults to sort.Sort.
  555. func (r *SearchRequest) SortFunc() func(data sort.Interface) {
  556. if r.sortFunc != nil {
  557. return r.sortFunc
  558. }
  559. return sort.Sort
  560. }