Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

elasticsearch.go 8.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package elasticsearch
  4. import (
  5. "context"
  6. "fmt"
  7. "strconv"
  8. "strings"
  9. "code.gitea.io/gitea/modules/graceful"
  10. indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
  11. inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
  12. "code.gitea.io/gitea/modules/indexer/issues/internal"
  13. "github.com/olivere/elastic/v7"
  14. )
  15. const (
  16. issueIndexerLatestVersion = 1
  17. // multi-match-types, currently only 2 types are used
  18. // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
  19. esMultiMatchTypeBestFields = "best_fields"
  20. esMultiMatchTypePhrasePrefix = "phrase_prefix"
  21. )
  22. var _ internal.Indexer = &Indexer{}
  23. // Indexer implements Indexer interface
  24. type Indexer struct {
  25. inner *inner_elasticsearch.Indexer
  26. indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
  27. }
  28. // NewIndexer creates a new elasticsearch indexer
  29. func NewIndexer(url, indexerName string) *Indexer {
  30. inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
  31. indexer := &Indexer{
  32. inner: inner,
  33. Indexer: inner,
  34. }
  35. return indexer
  36. }
  37. const (
  38. defaultMapping = `
  39. {
  40. "mappings": {
  41. "properties": {
  42. "id": { "type": "integer", "index": true },
  43. "repo_id": { "type": "integer", "index": true },
  44. "is_public": { "type": "boolean", "index": true },
  45. "title": { "type": "text", "index": true },
  46. "content": { "type": "text", "index": true },
  47. "comments": { "type" : "text", "index": true },
  48. "is_pull": { "type": "boolean", "index": true },
  49. "is_closed": { "type": "boolean", "index": true },
  50. "label_ids": { "type": "integer", "index": true },
  51. "no_label": { "type": "boolean", "index": true },
  52. "milestone_id": { "type": "integer", "index": true },
  53. "project_id": { "type": "integer", "index": true },
  54. "project_board_id": { "type": "integer", "index": true },
  55. "poster_id": { "type": "integer", "index": true },
  56. "assignee_id": { "type": "integer", "index": true },
  57. "mention_ids": { "type": "integer", "index": true },
  58. "reviewed_ids": { "type": "integer", "index": true },
  59. "review_requested_ids": { "type": "integer", "index": true },
  60. "subscriber_ids": { "type": "integer", "index": true },
  61. "updated_unix": { "type": "integer", "index": true },
  62. "created_unix": { "type": "integer", "index": true },
  63. "deadline_unix": { "type": "integer", "index": true },
  64. "comment_count": { "type": "integer", "index": true }
  65. }
  66. }
  67. }
  68. `
  69. )
  70. // Index will save the index data
  71. func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
  72. if len(issues) == 0 {
  73. return nil
  74. } else if len(issues) == 1 {
  75. issue := issues[0]
  76. _, err := b.inner.Client.Index().
  77. Index(b.inner.VersionedIndexName()).
  78. Id(fmt.Sprintf("%d", issue.ID)).
  79. BodyJson(issue).
  80. Do(ctx)
  81. return err
  82. }
  83. reqs := make([]elastic.BulkableRequest, 0)
  84. for _, issue := range issues {
  85. reqs = append(reqs,
  86. elastic.NewBulkIndexRequest().
  87. Index(b.inner.VersionedIndexName()).
  88. Id(fmt.Sprintf("%d", issue.ID)).
  89. Doc(issue),
  90. )
  91. }
  92. _, err := b.inner.Client.Bulk().
  93. Index(b.inner.VersionedIndexName()).
  94. Add(reqs...).
  95. Do(graceful.GetManager().HammerContext())
  96. return err
  97. }
  98. // Delete deletes indexes by ids
  99. func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
  100. if len(ids) == 0 {
  101. return nil
  102. } else if len(ids) == 1 {
  103. _, err := b.inner.Client.Delete().
  104. Index(b.inner.VersionedIndexName()).
  105. Id(fmt.Sprintf("%d", ids[0])).
  106. Do(ctx)
  107. return err
  108. }
  109. reqs := make([]elastic.BulkableRequest, 0)
  110. for _, id := range ids {
  111. reqs = append(reqs,
  112. elastic.NewBulkDeleteRequest().
  113. Index(b.inner.VersionedIndexName()).
  114. Id(fmt.Sprintf("%d", id)),
  115. )
  116. }
  117. _, err := b.inner.Client.Bulk().
  118. Index(b.inner.VersionedIndexName()).
  119. Add(reqs...).
  120. Do(graceful.GetManager().HammerContext())
  121. return err
  122. }
  123. // Search searches for issues by given conditions.
  124. // Returns the matching issue IDs
  125. func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
  126. query := elastic.NewBoolQuery()
  127. if options.Keyword != "" {
  128. searchType := esMultiMatchTypePhrasePrefix
  129. if options.IsFuzzyKeyword {
  130. searchType = esMultiMatchTypeBestFields
  131. }
  132. query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
  133. }
  134. if len(options.RepoIDs) > 0 {
  135. q := elastic.NewBoolQuery()
  136. q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
  137. if options.AllPublic {
  138. q.Should(elastic.NewTermQuery("is_public", true))
  139. }
  140. query.Must(q)
  141. }
  142. if options.IsPull.Has() {
  143. query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
  144. }
  145. if options.IsClosed.Has() {
  146. query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
  147. }
  148. if options.NoLabelOnly {
  149. query.Must(elastic.NewTermQuery("no_label", true))
  150. } else {
  151. if len(options.IncludedLabelIDs) > 0 {
  152. q := elastic.NewBoolQuery()
  153. for _, labelID := range options.IncludedLabelIDs {
  154. q.Must(elastic.NewTermQuery("label_ids", labelID))
  155. }
  156. query.Must(q)
  157. } else if len(options.IncludedAnyLabelIDs) > 0 {
  158. query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
  159. }
  160. if len(options.ExcludedLabelIDs) > 0 {
  161. q := elastic.NewBoolQuery()
  162. for _, labelID := range options.ExcludedLabelIDs {
  163. q.MustNot(elastic.NewTermQuery("label_ids", labelID))
  164. }
  165. query.Must(q)
  166. }
  167. }
  168. if len(options.MilestoneIDs) > 0 {
  169. query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
  170. }
  171. if options.ProjectID.Has() {
  172. query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
  173. }
  174. if options.ProjectBoardID.Has() {
  175. query.Must(elastic.NewTermQuery("project_board_id", options.ProjectBoardID.Value()))
  176. }
  177. if options.PosterID.Has() {
  178. query.Must(elastic.NewTermQuery("poster_id", options.PosterID.Value()))
  179. }
  180. if options.AssigneeID.Has() {
  181. query.Must(elastic.NewTermQuery("assignee_id", options.AssigneeID.Value()))
  182. }
  183. if options.MentionID.Has() {
  184. query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
  185. }
  186. if options.ReviewedID.Has() {
  187. query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
  188. }
  189. if options.ReviewRequestedID.Has() {
  190. query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
  191. }
  192. if options.SubscriberID.Has() {
  193. query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
  194. }
  195. if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
  196. q := elastic.NewRangeQuery("updated_unix")
  197. if options.UpdatedAfterUnix.Has() {
  198. q.Gte(options.UpdatedAfterUnix.Value())
  199. }
  200. if options.UpdatedBeforeUnix.Has() {
  201. q.Lte(options.UpdatedBeforeUnix.Value())
  202. }
  203. query.Must(q)
  204. }
  205. if options.SortBy == "" {
  206. options.SortBy = internal.SortByCreatedAsc
  207. }
  208. sortBy := []elastic.Sorter{
  209. parseSortBy(options.SortBy),
  210. elastic.NewFieldSort("id").Desc(),
  211. }
  212. // See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
  213. // TODO: make it configurable since it's configurable in elasticsearch
  214. const maxPageSize = 10000
  215. skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
  216. searchResult, err := b.inner.Client.Search().
  217. Index(b.inner.VersionedIndexName()).
  218. Query(query).
  219. SortBy(sortBy...).
  220. From(skip).Size(limit).
  221. Do(ctx)
  222. if err != nil {
  223. return nil, err
  224. }
  225. hits := make([]internal.Match, 0, limit)
  226. for _, hit := range searchResult.Hits.Hits {
  227. id, _ := strconv.ParseInt(hit.Id, 10, 64)
  228. hits = append(hits, internal.Match{
  229. ID: id,
  230. })
  231. }
  232. return &internal.SearchResult{
  233. Total: searchResult.TotalHits(),
  234. Hits: hits,
  235. }, nil
  236. }
  237. func toAnySlice[T any](s []T) []any {
  238. ret := make([]any, 0, len(s))
  239. for _, item := range s {
  240. ret = append(ret, item)
  241. }
  242. return ret
  243. }
  244. func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
  245. field := strings.TrimPrefix(string(sortBy), "-")
  246. ret := elastic.NewFieldSort(field)
  247. if strings.HasPrefix(string(sortBy), "-") {
  248. ret.Desc()
  249. }
  250. return ret
  251. }