Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

search.go 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "time"
  19. "github.com/blevesearch/bleve/analysis"
  20. "github.com/blevesearch/bleve/analysis/datetime/optional"
  21. "github.com/blevesearch/bleve/registry"
  22. "github.com/blevesearch/bleve/search"
  23. "github.com/blevesearch/bleve/search/query"
  24. )
  25. var cache = registry.NewCache()
  26. const defaultDateTimeParser = optional.Name
  27. type numericRange struct {
  28. Name string `json:"name,omitempty"`
  29. Min *float64 `json:"min,omitempty"`
  30. Max *float64 `json:"max,omitempty"`
  31. }
  32. type dateTimeRange struct {
  33. Name string `json:"name,omitempty"`
  34. Start time.Time `json:"start,omitempty"`
  35. End time.Time `json:"end,omitempty"`
  36. startString *string
  37. endString *string
  38. }
  39. func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time) {
  40. start = dr.Start
  41. if dr.Start.IsZero() && dr.startString != nil {
  42. s, err := dateTimeParser.ParseDateTime(*dr.startString)
  43. if err == nil {
  44. start = s
  45. }
  46. }
  47. end = dr.End
  48. if dr.End.IsZero() && dr.endString != nil {
  49. e, err := dateTimeParser.ParseDateTime(*dr.endString)
  50. if err == nil {
  51. end = e
  52. }
  53. }
  54. return start, end
  55. }
  56. func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
  57. var temp struct {
  58. Name string `json:"name,omitempty"`
  59. Start *string `json:"start,omitempty"`
  60. End *string `json:"end,omitempty"`
  61. }
  62. err := json.Unmarshal(input, &temp)
  63. if err != nil {
  64. return err
  65. }
  66. dr.Name = temp.Name
  67. if temp.Start != nil {
  68. dr.startString = temp.Start
  69. }
  70. if temp.End != nil {
  71. dr.endString = temp.End
  72. }
  73. return nil
  74. }
  75. func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
  76. rv := map[string]interface{}{
  77. "name": dr.Name,
  78. "start": dr.Start,
  79. "end": dr.End,
  80. }
  81. if dr.Start.IsZero() && dr.startString != nil {
  82. rv["start"] = dr.startString
  83. }
  84. if dr.End.IsZero() && dr.endString != nil {
  85. rv["end"] = dr.endString
  86. }
  87. return json.Marshal(rv)
  88. }
  89. // A FacetRequest describes a facet or aggregation
  90. // of the result document set you would like to be
  91. // built.
  92. type FacetRequest struct {
  93. Size int `json:"size"`
  94. Field string `json:"field"`
  95. NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
  96. DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
  97. }
  98. func (fr *FacetRequest) Validate() error {
  99. nrCount := len(fr.NumericRanges)
  100. drCount := len(fr.DateTimeRanges)
  101. if nrCount > 0 && drCount > 0 {
  102. return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
  103. }
  104. if nrCount > 0 {
  105. nrNames := map[string]interface{}{}
  106. for _, nr := range fr.NumericRanges {
  107. if _, ok := nrNames[nr.Name]; ok {
  108. return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
  109. }
  110. nrNames[nr.Name] = struct{}{}
  111. if nr.Min == nil && nr.Max == nil {
  112. return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
  113. }
  114. }
  115. } else {
  116. dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
  117. if err != nil {
  118. return err
  119. }
  120. drNames := map[string]interface{}{}
  121. for _, dr := range fr.DateTimeRanges {
  122. if _, ok := drNames[dr.Name]; ok {
  123. return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
  124. }
  125. drNames[dr.Name] = struct{}{}
  126. start, end := dr.ParseDates(dateTimeParser)
  127. if start.IsZero() && end.IsZero() {
  128. return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
  129. }
  130. }
  131. }
  132. return nil
  133. }
  134. // NewFacetRequest creates a facet on the specified
  135. // field that limits the number of entries to the
  136. // specified size.
  137. func NewFacetRequest(field string, size int) *FacetRequest {
  138. return &FacetRequest{
  139. Field: field,
  140. Size: size,
  141. }
  142. }
  143. // AddDateTimeRange adds a bucket to a field
  144. // containing date values. Documents with a
  145. // date value falling into this range are tabulated
  146. // as part of this bucket/range.
  147. func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
  148. if fr.DateTimeRanges == nil {
  149. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  150. }
  151. fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
  152. }
  153. // AddDateTimeRangeString adds a bucket to a field
  154. // containing date values.
  155. func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
  156. if fr.DateTimeRanges == nil {
  157. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  158. }
  159. fr.DateTimeRanges = append(fr.DateTimeRanges,
  160. &dateTimeRange{Name: name, startString: start, endString: end})
  161. }
  162. // AddNumericRange adds a bucket to a field
  163. // containing numeric values. Documents with a
  164. // numeric value falling into this range are
  165. // tabulated as part of this bucket/range.
  166. func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
  167. if fr.NumericRanges == nil {
  168. fr.NumericRanges = make([]*numericRange, 0, 1)
  169. }
  170. fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
  171. }
  172. // FacetsRequest groups together all the
  173. // FacetRequest objects for a single query.
  174. type FacetsRequest map[string]*FacetRequest
  175. func (fr FacetsRequest) Validate() error {
  176. for _, v := range fr {
  177. err := v.Validate()
  178. if err != nil {
  179. return err
  180. }
  181. }
  182. return nil
  183. }
  184. // HighlightRequest describes how field matches
  185. // should be highlighted.
  186. type HighlightRequest struct {
  187. Style *string `json:"style"`
  188. Fields []string `json:"fields"`
  189. }
  190. // NewHighlight creates a default
  191. // HighlightRequest.
  192. func NewHighlight() *HighlightRequest {
  193. return &HighlightRequest{}
  194. }
  195. // NewHighlightWithStyle creates a HighlightRequest
  196. // with an alternate style.
  197. func NewHighlightWithStyle(style string) *HighlightRequest {
  198. return &HighlightRequest{
  199. Style: &style,
  200. }
  201. }
  202. func (h *HighlightRequest) AddField(field string) {
  203. if h.Fields == nil {
  204. h.Fields = make([]string, 0, 1)
  205. }
  206. h.Fields = append(h.Fields, field)
  207. }
  208. // A SearchRequest describes all the parameters
  209. // needed to search the index.
  210. // Query is required.
  211. // Size/From describe how much and which part of the
  212. // result set to return.
  213. // Highlight describes optional search result
  214. // highlighting.
  215. // Fields describes a list of field values which
  216. // should be retrieved for result documents, provided they
  217. // were stored while indexing.
  218. // Facets describe the set of facets to be computed.
  219. // Explain triggers inclusion of additional search
  220. // result score explanations.
  221. // Sort describes the desired order for the results to be returned.
  222. //
  223. // A special field named "*" can be used to return all fields.
  224. type SearchRequest struct {
  225. Query query.Query `json:"query"`
  226. Size int `json:"size"`
  227. From int `json:"from"`
  228. Highlight *HighlightRequest `json:"highlight"`
  229. Fields []string `json:"fields"`
  230. Facets FacetsRequest `json:"facets"`
  231. Explain bool `json:"explain"`
  232. Sort search.SortOrder `json:"sort"`
  233. IncludeLocations bool `json:"includeLocations"`
  234. }
  235. func (r *SearchRequest) Validate() error {
  236. if srq, ok := r.Query.(query.ValidatableQuery); ok {
  237. err := srq.Validate()
  238. if err != nil {
  239. return err
  240. }
  241. }
  242. return r.Facets.Validate()
  243. }
  244. // AddFacet adds a FacetRequest to this SearchRequest
  245. func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
  246. if r.Facets == nil {
  247. r.Facets = make(FacetsRequest, 1)
  248. }
  249. r.Facets[facetName] = f
  250. }
  251. // SortBy changes the request to use the requested sort order
  252. // this form uses the simplified syntax with an array of strings
  253. // each string can either be a field name
  254. // or the magic value _id and _score which refer to the doc id and search score
  255. // any of these values can optionally be prefixed with - to reverse the order
  256. func (r *SearchRequest) SortBy(order []string) {
  257. so := search.ParseSortOrderStrings(order)
  258. r.Sort = so
  259. }
  260. // SortByCustom changes the request to use the requested sort order
  261. func (r *SearchRequest) SortByCustom(order search.SortOrder) {
  262. r.Sort = order
  263. }
  264. // UnmarshalJSON deserializes a JSON representation of
  265. // a SearchRequest
  266. func (r *SearchRequest) UnmarshalJSON(input []byte) error {
  267. var temp struct {
  268. Q json.RawMessage `json:"query"`
  269. Size *int `json:"size"`
  270. From int `json:"from"`
  271. Highlight *HighlightRequest `json:"highlight"`
  272. Fields []string `json:"fields"`
  273. Facets FacetsRequest `json:"facets"`
  274. Explain bool `json:"explain"`
  275. Sort []json.RawMessage `json:"sort"`
  276. IncludeLocations bool `json:"includeLocations"`
  277. }
  278. err := json.Unmarshal(input, &temp)
  279. if err != nil {
  280. return err
  281. }
  282. if temp.Size == nil {
  283. r.Size = 10
  284. } else {
  285. r.Size = *temp.Size
  286. }
  287. if temp.Sort == nil {
  288. r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
  289. } else {
  290. r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
  291. if err != nil {
  292. return err
  293. }
  294. }
  295. r.From = temp.From
  296. r.Explain = temp.Explain
  297. r.Highlight = temp.Highlight
  298. r.Fields = temp.Fields
  299. r.Facets = temp.Facets
  300. r.IncludeLocations = temp.IncludeLocations
  301. r.Query, err = query.ParseQuery(temp.Q)
  302. if err != nil {
  303. return err
  304. }
  305. if r.Size < 0 {
  306. r.Size = 10
  307. }
  308. if r.From < 0 {
  309. r.From = 0
  310. }
  311. return nil
  312. }
  313. // NewSearchRequest creates a new SearchRequest
  314. // for the Query, using default values for all
  315. // other search parameters.
  316. func NewSearchRequest(q query.Query) *SearchRequest {
  317. return NewSearchRequestOptions(q, 10, 0, false)
  318. }
  319. // NewSearchRequestOptions creates a new SearchRequest
  320. // for the Query, with the requested size, from
  321. // and explanation search parameters.
  322. // By default results are ordered by score, descending.
  323. func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
  324. return &SearchRequest{
  325. Query: q,
  326. Size: size,
  327. From: from,
  328. Explain: explain,
  329. Sort: search.SortOrder{&search.SortScore{Desc: true}},
  330. }
  331. }
  332. // IndexErrMap tracks errors with the name of the index where it occurred
  333. type IndexErrMap map[string]error
  334. // MarshalJSON seralizes the error into a string for JSON consumption
  335. func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
  336. tmp := make(map[string]string, len(iem))
  337. for k, v := range iem {
  338. tmp[k] = v.Error()
  339. }
  340. return json.Marshal(tmp)
  341. }
  342. func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
  343. var tmp map[string]string
  344. err := json.Unmarshal(data, &tmp)
  345. if err != nil {
  346. return err
  347. }
  348. for k, v := range tmp {
  349. iem[k] = fmt.Errorf("%s", v)
  350. }
  351. return nil
  352. }
  353. // SearchStatus is a secion in the SearchResult reporting how many
  354. // underlying indexes were queried, how many were successful/failed
  355. // and a map of any errors that were encountered
  356. type SearchStatus struct {
  357. Total int `json:"total"`
  358. Failed int `json:"failed"`
  359. Successful int `json:"successful"`
  360. Errors IndexErrMap `json:"errors,omitempty"`
  361. }
  362. // Merge will merge together multiple SearchStatuses during a MultiSearch
  363. func (ss *SearchStatus) Merge(other *SearchStatus) {
  364. ss.Total += other.Total
  365. ss.Failed += other.Failed
  366. ss.Successful += other.Successful
  367. if len(other.Errors) > 0 {
  368. if ss.Errors == nil {
  369. ss.Errors = make(map[string]error)
  370. }
  371. for otherIndex, otherError := range other.Errors {
  372. ss.Errors[otherIndex] = otherError
  373. }
  374. }
  375. }
  376. // A SearchResult describes the results of executing
  377. // a SearchRequest.
  378. type SearchResult struct {
  379. Status *SearchStatus `json:"status"`
  380. Request *SearchRequest `json:"request"`
  381. Hits search.DocumentMatchCollection `json:"hits"`
  382. Total uint64 `json:"total_hits"`
  383. MaxScore float64 `json:"max_score"`
  384. Took time.Duration `json:"took"`
  385. Facets search.FacetResults `json:"facets"`
  386. }
  387. func (sr *SearchResult) String() string {
  388. rv := ""
  389. if sr.Total > 0 {
  390. if sr.Request.Size > 0 {
  391. rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
  392. for i, hit := range sr.Hits {
  393. rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
  394. for fragmentField, fragments := range hit.Fragments {
  395. rv += fmt.Sprintf("\t%s\n", fragmentField)
  396. for _, fragment := range fragments {
  397. rv += fmt.Sprintf("\t\t%s\n", fragment)
  398. }
  399. }
  400. for otherFieldName, otherFieldValue := range hit.Fields {
  401. if _, ok := hit.Fragments[otherFieldName]; !ok {
  402. rv += fmt.Sprintf("\t%s\n", otherFieldName)
  403. rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
  404. }
  405. }
  406. }
  407. } else {
  408. rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
  409. }
  410. } else {
  411. rv = "No matches"
  412. }
  413. if len(sr.Facets) > 0 {
  414. rv += fmt.Sprintf("Facets:\n")
  415. for fn, f := range sr.Facets {
  416. rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
  417. for _, t := range f.Terms {
  418. rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
  419. }
  420. if f.Other != 0 {
  421. rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
  422. }
  423. }
  424. }
  425. return rv
  426. }
  427. // Merge will merge together multiple SearchResults during a MultiSearch
  428. func (sr *SearchResult) Merge(other *SearchResult) {
  429. sr.Status.Merge(other.Status)
  430. sr.Hits = append(sr.Hits, other.Hits...)
  431. sr.Total += other.Total
  432. if other.MaxScore > sr.MaxScore {
  433. sr.MaxScore = other.MaxScore
  434. }
  435. sr.Facets.Merge(other.Facets)
  436. }