您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符


  1. // Copyright 2018 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package bleve
  4. import (
  5. "context"
  6. indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
  7. inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
  8. "code.gitea.io/gitea/modules/indexer/issues/internal"
  9. "github.com/blevesearch/bleve/v2"
  10. "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
  11. "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
  12. "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
  13. "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
  14. "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
  15. "github.com/blevesearch/bleve/v2/mapping"
  16. "github.com/blevesearch/bleve/v2/search/query"
  17. )
  18. const (
  19. issueIndexerAnalyzer = "issueIndexer"
  20. issueIndexerDocType = "issueIndexerDocType"
  21. issueIndexerLatestVersion = 4
  22. )
  23. const unicodeNormalizeName = "unicodeNormalize"
  24. func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
  25. return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
  26. "type": unicodenorm.Name,
  27. "form": unicodenorm.NFC,
  28. })
  29. }
  30. const maxBatchSize = 16
  31. // IndexerData an update to the issue indexer
  32. type IndexerData internal.IndexerData
  33. // Type returns the document type, for bleve's mapping.Classifier interface.
  34. func (i *IndexerData) Type() string {
  35. return issueIndexerDocType
  36. }
  37. // generateIssueIndexMapping generates the bleve index mapping for issues
  38. func generateIssueIndexMapping() (mapping.IndexMapping, error) {
  39. mapping := bleve.NewIndexMapping()
  40. docMapping := bleve.NewDocumentMapping()
  41. numericFieldMapping := bleve.NewNumericFieldMapping()
  42. numericFieldMapping.Store = false
  43. numericFieldMapping.IncludeInAll = false
  44. docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
  45. textFieldMapping := bleve.NewTextFieldMapping()
  46. textFieldMapping.Store = false
  47. textFieldMapping.IncludeInAll = false
  48. boolFieldMapping := bleve.NewBooleanFieldMapping()
  49. boolFieldMapping.Store = false
  50. boolFieldMapping.IncludeInAll = false
  51. numberFieldMapping := bleve.NewNumericFieldMapping()
  52. numberFieldMapping.Store = false
  53. numberFieldMapping.IncludeInAll = false
  54. docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
  55. docMapping.AddFieldMappingsAt("title", textFieldMapping)
  56. docMapping.AddFieldMappingsAt("content", textFieldMapping)
  57. docMapping.AddFieldMappingsAt("comments", textFieldMapping)
  58. docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
  59. docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
  60. docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
  61. docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
  62. docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
  63. docMapping.AddFieldMappingsAt("project_id", numberFieldMapping)
  64. docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping)
  65. docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
  66. docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
  67. docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
  68. docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
  69. docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
  70. docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
  71. docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
  72. docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
  73. docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
  74. docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
  75. if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
  76. return nil, err
  77. } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
  78. "type": custom.Name,
  79. "char_filters": []string{},
  80. "tokenizer": unicode.Name,
  81. "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
  82. }); err != nil {
  83. return nil, err
  84. }
  85. mapping.DefaultAnalyzer = issueIndexerAnalyzer
  86. mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
  87. mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
  88. mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
  89. return mapping, nil
  90. }
  91. var _ internal.Indexer = &Indexer{}
  92. // Indexer implements Indexer interface
  93. type Indexer struct {
  94. inner *inner_bleve.Indexer
  95. indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
  96. }
  97. // NewIndexer creates a new bleve local indexer
  98. func NewIndexer(indexDir string) *Indexer {
  99. inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
  100. return &Indexer{
  101. Indexer: inner,
  102. inner: inner,
  103. }
  104. }
  105. // Index will save the index data
  106. func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
  107. batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
  108. for _, issue := range issues {
  109. if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
  110. return err
  111. }
  112. }
  113. return batch.Flush()
  114. }
  115. // Delete deletes indexes by ids
  116. func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
  117. batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
  118. for _, id := range ids {
  119. if err := batch.Delete(indexer_internal.Base36(id)); err != nil {
  120. return err
  121. }
  122. }
  123. return batch.Flush()
  124. }
  125. // Search searches for issues by given conditions.
  126. // Returns the matching issue IDs
  127. func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
  128. var queries []query.Query
  129. if options.Keyword != "" {
  130. if options.IsFuzzyKeyword {
  131. queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
  132. inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
  133. inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
  134. inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
  135. }...))
  136. } else {
  137. queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
  138. inner_bleve.PrefixQuery(options.Keyword, "title"),
  139. inner_bleve.PrefixQuery(options.Keyword, "content"),
  140. inner_bleve.PrefixQuery(options.Keyword, "comments"),
  141. }...))
  142. }
  143. }
  144. if len(options.RepoIDs) > 0 || options.AllPublic {
  145. var repoQueries []query.Query
  146. for _, repoID := range options.RepoIDs {
  147. repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
  148. }
  149. if options.AllPublic {
  150. repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
  151. }
  152. queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
  153. }
  154. if options.IsPull.Has() {
  155. queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull"))
  156. }
  157. if options.IsClosed.Has() {
  158. queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed"))
  159. }
  160. if options.NoLabelOnly {
  161. queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
  162. } else {
  163. if len(options.IncludedLabelIDs) > 0 {
  164. var includeQueries []query.Query
  165. for _, labelID := range options.IncludedLabelIDs {
  166. includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  167. }
  168. queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
  169. } else if len(options.IncludedAnyLabelIDs) > 0 {
  170. var includeQueries []query.Query
  171. for _, labelID := range options.IncludedAnyLabelIDs {
  172. includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  173. }
  174. queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
  175. }
  176. if len(options.ExcludedLabelIDs) > 0 {
  177. var excludeQueries []query.Query
  178. for _, labelID := range options.ExcludedLabelIDs {
  179. q := bleve.NewBooleanQuery()
  180. q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
  181. excludeQueries = append(excludeQueries, q)
  182. }
  183. queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
  184. }
  185. }
  186. if len(options.MilestoneIDs) > 0 {
  187. var milestoneQueries []query.Query
  188. for _, milestoneID := range options.MilestoneIDs {
  189. milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
  190. }
  191. queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
  192. }
  193. if options.ProjectID.Has() {
  194. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id"))
  195. }
  196. if options.ProjectBoardID.Has() {
  197. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectBoardID.Value(), "project_board_id"))
  198. }
  199. if options.PosterID.Has() {
  200. queries = append(queries, inner_bleve.NumericEqualityQuery(options.PosterID.Value(), "poster_id"))
  201. }
  202. if options.AssigneeID.Has() {
  203. queries = append(queries, inner_bleve.NumericEqualityQuery(options.AssigneeID.Value(), "assignee_id"))
  204. }
  205. if options.MentionID.Has() {
  206. queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids"))
  207. }
  208. if options.ReviewedID.Has() {
  209. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids"))
  210. }
  211. if options.ReviewRequestedID.Has() {
  212. queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids"))
  213. }
  214. if options.SubscriberID.Has() {
  215. queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids"))
  216. }
  217. if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
  218. queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(
  219. options.UpdatedAfterUnix,
  220. options.UpdatedBeforeUnix,
  221. "updated_unix"))
  222. }
  223. var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
  224. if len(queries) == 0 {
  225. indexerQuery = bleve.NewMatchAllQuery()
  226. }
  227. skip, limit := indexer_internal.ParsePaginator(options.Paginator)
  228. search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
  229. if options.SortBy == "" {
  230. options.SortBy = internal.SortByCreatedAsc
  231. }
  232. search.SortBy([]string{string(options.SortBy), "-_id"})
  233. result, err := b.inner.Indexer.SearchInContext(ctx, search)
  234. if err != nil {
  235. return nil, err
  236. }
  237. ret := &internal.SearchResult{
  238. Total: int64(result.Total),
  239. Hits: make([]internal.Match, 0, len(result.Hits)),
  240. }
  241. for _, hit := range result.Hits {
  242. id, err := indexer_internal.ParseBase36(hit.ID)
  243. if err != nil {
  244. return nil, err
  245. }
  246. ret.Hits = append(ret.Hits, internal.Match{
  247. ID: id,
  248. })
  249. }
  250. return ret, nil
  251. }