summaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
authorJason Song <i@wolfogre.com>2023-07-31 14:28:53 +0800
committerGitHub <noreply@github.com>2023-07-31 06:28:53 +0000
commit1e76a824bcd71acd59cdfb2c4547806bc34b3d86 (patch)
treee92523a9c82bbb8d91035a44a36bab7ef981e162 /modules/indexer
parentaba9096999798efef448bee2f289917069f9b599 (diff)
downloadgitea-1e76a824bcd71acd59cdfb2c4547806bc34b3d86.tar.gz
gitea-1e76a824bcd71acd59cdfb2c4547806bc34b3d86.zip
Refactor and enhance issue indexer to support both searching, filtering and paging (#26012)
Fix #24662. Replace #24822 and #25708 (although it has been merged) ## Background In the past, Gitea supported issue searching with a keyword and conditions in a less efficient way. It worked by searching for issues with the keyword and obtaining limited IDs (as it is heavy to get all) on the indexer (bleve/elasticsearch/meilisearch), and then querying with conditions on the database to find a subset of the found IDs. This is why the results could be incomplete. To solve this issue, we need to store all fields that could be used as conditions in the indexer and support both keyword and additional conditions when searching with the indexer. ## Major changes - Redefine `IndexerData` to include all fields that could be used as filter conditions. - Refactor `Search(ctx context.Context, kw string, repoIDs []int64, limit, start int, state string)` to `Search(ctx context.Context, options *SearchOptions)`, so it supports more conditions now. - Change the data type stored in `issueIndexerQueue`. Use `IndexerMetadata` instead of `IndexerData` in case the data has been updated while it is in the queue. This also reduces the storage size of the queue. - Enhance searching with Bleve/Elasticsearch/Meilisearch, make them fully support `SearchOptions`. Also, update the data versions. - Keep most logic of database indexer, but remove `issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is the entry point to search issues. - Start a Meilisearch instance to test it in unit tests. - Add unit tests with almost full coverage to test Bleve/Elasticsearch/Meilisearch indexer. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/code/bleve/bleve.go13
-rw-r--r--modules/indexer/internal/bleve/query.go53
-rw-r--r--modules/indexer/internal/elasticsearch/indexer.go3
-rw-r--r--modules/indexer/internal/meilisearch/filter.go119
-rw-r--r--modules/indexer/internal/meilisearch/indexer.go6
-rw-r--r--modules/indexer/internal/paginator.go41
-rw-r--r--modules/indexer/issues/bleve/bleve.go188
-rw-r--r--modules/indexer/issues/bleve/bleve_test.go77
-rw-r--r--modules/indexer/issues/db/db.go60
-rw-r--r--modules/indexer/issues/db/options.go114
-rw-r--r--modules/indexer/issues/dboptions.go93
-rw-r--r--modules/indexer/issues/elasticsearch/elasticsearch.go208
-rw-r--r--modules/indexer/issues/elasticsearch/elasticsearch_test.go48
-rw-r--r--modules/indexer/issues/indexer.go222
-rw-r--r--modules/indexer/issues/indexer_test.go88
-rw-r--r--modules/indexer/issues/internal/indexer.go10
-rw-r--r--modules/indexer/issues/internal/model.go110
-rw-r--r--modules/indexer/issues/internal/tests/tests.go804
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch.go173
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch_test.go50
-rw-r--r--modules/indexer/issues/util.go173
21 files changed, 2301 insertions, 352 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 1e34226e8d..0bfd85cb3f 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -41,15 +41,6 @@ const (
maxBatchSize = 16
)
-// numericEqualityQuery a numeric equality query for the given value and field
-func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
- f := float64(value)
- tru := true
- q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
- q.SetField(field)
- return q
-}
-
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
"type": unicodenorm.Name,
@@ -225,7 +216,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
// Delete deletes indexes by ids
func (b *Indexer) Delete(_ context.Context, repoID int64) error {
- query := numericEqualityQuery(repoID, "RepoID")
+ query := inner_bleve.NumericEqualityQuery(repoID, "RepoID")
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
result, err := b.inner.Indexer.Search(searchRequest)
if err != nil {
@@ -262,7 +253,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
if len(repoIDs) > 0 {
repoQueries := make([]query.Query, 0, len(repoIDs))
for _, repoID := range repoIDs {
- repoQueries = append(repoQueries, numericEqualityQuery(repoID, "RepoID"))
+ repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
}
indexerQuery = bleve.NewConjunctionQuery(
diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go
new file mode 100644
index 0000000000..c7d66538c1
--- /dev/null
+++ b/modules/indexer/internal/bleve/query.go
@@ -0,0 +1,53 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package bleve
+
+import (
+ "github.com/blevesearch/bleve/v2"
+ "github.com/blevesearch/bleve/v2/search/query"
+)
+
+// NumericEqualityQuery generates a numeric equality query for the given value and field
+func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
+ f := float64(value)
+ tru := true
+ q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
+ q.SetField(field)
+ return q
+}
+
+// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
+func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
+ q := bleve.NewMatchPhraseQuery(matchPhrase)
+ q.FieldVal = field
+ q.Analyzer = analyzer
+ return q
+}
+
+// BoolFieldQuery generates a bool field query for the given value and field
+func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
+ q := bleve.NewBoolFieldQuery(value)
+ q.SetField(field)
+ return q
+}
+
+func NumericRangeInclusiveQuery(min, max *int64, field string) *query.NumericRangeQuery {
+ var minF, maxF *float64
+ var minI, maxI *bool
+ if min != nil {
+ minF = new(float64)
+ *minF = float64(*min)
+ minI = new(bool)
+ *minI = true
+ }
+ if max != nil {
+ maxF = new(float64)
+ *maxF = float64(*max)
+ maxI = new(bool)
+ *maxI = true
+ }
+ q := bleve.NewNumericRangeInclusiveQuery(minF, maxF, minI, maxI)
+ q.SetField(field)
+ return q
+}
diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go
index 2c60efad56..395eea3bce 100644
--- a/modules/indexer/internal/elasticsearch/indexer.go
+++ b/modules/indexer/internal/elasticsearch/indexer.go
@@ -76,7 +76,8 @@ func (i *Indexer) Ping(ctx context.Context) error {
if err != nil {
return err
}
- if resp.Status != "green" {
+ if resp.Status != "green" && resp.Status != "yellow" {
+ // It's healthy if the status is green, and it's available if the status is yellow,
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
return fmt.Errorf("status of elasticsearch cluster is %s", resp.Status)
}
diff --git a/modules/indexer/internal/meilisearch/filter.go b/modules/indexer/internal/meilisearch/filter.go
new file mode 100644
index 0000000000..593177f163
--- /dev/null
+++ b/modules/indexer/internal/meilisearch/filter.go
@@ -0,0 +1,119 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Filter represents a filter for meilisearch queries.
+// It's just a simple wrapper around a string.
+// DO NOT assume that it is a complete implementation.
+type Filter interface {
+ Statement() string
+}
+
+type FilterAnd struct {
+ filters []Filter
+}
+
+func (f *FilterAnd) Statement() string {
+ var statements []string
+ for _, filter := range f.filters {
+ if s := filter.Statement(); s != "" {
+ statements = append(statements, fmt.Sprintf("(%s)", s))
+ }
+ }
+ return strings.Join(statements, " AND ")
+}
+
+func (f *FilterAnd) And(filter Filter) *FilterAnd {
+ f.filters = append(f.filters, filter)
+ return f
+}
+
+type FilterOr struct {
+ filters []Filter
+}
+
+func (f *FilterOr) Statement() string {
+ var statements []string
+ for _, filter := range f.filters {
+ if s := filter.Statement(); s != "" {
+ statements = append(statements, fmt.Sprintf("(%s)", s))
+ }
+ }
+ return strings.Join(statements, " OR ")
+}
+
+func (f *FilterOr) Or(filter Filter) *FilterOr {
+ f.filters = append(f.filters, filter)
+ return f
+}
+
+type FilterIn string
+
+// NewFilterIn creates a new FilterIn.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterIn[T int64](field string, values ...T) FilterIn {
+ if len(values) == 0 {
+ return ""
+ }
+ vs := make([]string, len(values))
+ for i, v := range values {
+ vs[i] = fmt.Sprintf("%v", v)
+ }
+ return FilterIn(fmt.Sprintf("%s IN [%v]", field, strings.Join(vs, ", ")))
+}
+
+func (f FilterIn) Statement() string {
+ return string(f)
+}
+
+type FilterEq string
+
+// NewFilterEq creates a new FilterEq.
+// It supports int64 and bool only, to avoid extra works to handle strings with special characters.
+func NewFilterEq[T bool | int64](field string, value T) FilterEq {
+ return FilterEq(fmt.Sprintf("%s = %v", field, value))
+}
+
+func (f FilterEq) Statement() string {
+ return string(f)
+}
+
+type FilterNot string
+
+func NewFilterNot(filter Filter) FilterNot {
+ return FilterNot(fmt.Sprintf("NOT (%s)", filter.Statement()))
+}
+
+func (f FilterNot) Statement() string {
+ return string(f)
+}
+
+type FilterGte string
+
+// NewFilterGte creates a new FilterGte.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterGte[T int64](field string, value T) FilterGte {
+ return FilterGte(fmt.Sprintf("%s >= %v", field, value))
+}
+
+func (f FilterGte) Statement() string {
+ return string(f)
+}
+
+type FilterLte string
+
+// NewFilterLte creates a new FilterLte.
+// It supports int64 only, to avoid extra works to handle strings with special characters.
+func NewFilterLte[T int64](field string, value T) FilterLte {
+ return FilterLte(fmt.Sprintf("%s <= %v", field, value))
+}
+
+func (f FilterLte) Statement() string {
+ return string(f)
+}
diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go
index 06747ff7e0..b037249d43 100644
--- a/modules/indexer/internal/meilisearch/indexer.go
+++ b/modules/indexer/internal/meilisearch/indexer.go
@@ -17,14 +17,16 @@ type Indexer struct {
url, apiKey string
indexName string
version int
+ settings *meilisearch.Settings
}
-func NewIndexer(url, apiKey, indexName string, version int) *Indexer {
+func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearch.Settings) *Indexer {
return &Indexer{
url: url,
apiKey: apiKey,
indexName: indexName,
version: version,
+ settings: settings,
}
}
@@ -57,7 +59,7 @@ func (i *Indexer) Init(_ context.Context) (bool, error) {
i.checkOldIndexes()
- _, err = i.Client.Index(i.VersionedIndexName()).UpdateFilterableAttributes(&[]string{"repo_id"})
+ _, err = i.Client.Index(i.VersionedIndexName()).UpdateSettings(i.settings)
return false, err
}
diff --git a/modules/indexer/internal/paginator.go b/modules/indexer/internal/paginator.go
new file mode 100644
index 0000000000..de0a33c06f
--- /dev/null
+++ b/modules/indexer/internal/paginator.go
@@ -0,0 +1,41 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+ "math"
+
+ "code.gitea.io/gitea/models/db"
+)
+
+// ParsePaginator parses a db.Paginator into a skip and limit
+func ParsePaginator(paginator db.Paginator, max ...int) (int, int) {
+ // Use a very large number to indicate no limit
+ unlimited := math.MaxInt32
+ if len(max) > 0 {
+ // Some indexer engines have a limit on the page size, respect that
+ unlimited = max[0]
+ }
+
+ if paginator == nil || paginator.IsListAll() {
+ return 0, unlimited
+ }
+
+ // Warning: Do not use GetSkipTake() for *db.ListOptions
+ // Its implementation could reset the page size with setting.API.MaxResponseItems
+ if listOptions, ok := paginator.(*db.ListOptions); ok {
+ if listOptions.Page >= 0 && listOptions.PageSize > 0 {
+ var start int
+ if listOptions.Page == 0 {
+ start = 0
+ } else {
+ start = (listOptions.Page - 1) * listOptions.PageSize
+ }
+ return start, listOptions.PageSize
+ }
+ return 0, unlimited
+ }
+
+ return paginator.GetSkipTake()
+}
diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go
index c368a67ab5..7c82cfbb79 100644
--- a/modules/indexer/issues/bleve/bleve.go
+++ b/modules/indexer/issues/bleve/bleve.go
@@ -23,25 +23,9 @@ import (
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
- issueIndexerLatestVersion = 3
+ issueIndexerLatestVersion = 4
)
-// numericEqualityQuery a numeric equality query for the given value and field
-func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
- f := float64(value)
- tru := true
- q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
- q.SetField(field)
- return q
-}
-
-func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
- q := bleve.NewMatchPhraseQuery(matchPhrase)
- q.FieldVal = field
- q.Analyzer = analyzer
- return q
-}
-
const unicodeNormalizeName = "unicodeNormalize"
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@@ -74,10 +58,40 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Store = false
textFieldMapping.IncludeInAll = false
+
+ boolFieldMapping := bleve.NewBooleanFieldMapping()
+ boolFieldMapping.Store = false
+ boolFieldMapping.IncludeInAll = false
+
+ numberFieldMapping := bleve.NewNumericFieldMapping()
+ numberFieldMapping.Store = false
+ numberFieldMapping.IncludeInAll = false
+
+ docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
docMapping.AddFieldMappingsAt("content", textFieldMapping)
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
+ docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping)
+ docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping)
+ docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("no_label", boolFieldMapping)
+ docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("project_id", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping)
+
+ docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping)
+ docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping)
+
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
@@ -115,7 +129,7 @@ func NewIndexer(indexDir string) *Indexer {
}
// Index will save the index data
-func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
+func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
for _, issue := range issues {
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
@@ -138,33 +152,127 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
// Search searches for issues by given conditions.
// Returns the matching issue IDs
-func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int, state string) (*internal.SearchResult, error) {
- var repoQueriesP []*query.NumericRangeQuery
- for _, repoID := range repoIDs {
- repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "repo_id"))
- }
- repoQueries := make([]query.Query, len(repoQueriesP))
- for i, v := range repoQueriesP {
- repoQueries[i] = query.Query(v)
- }
-
- indexerQuery := bleve.NewConjunctionQuery(
- bleve.NewDisjunctionQuery(repoQueries...),
- bleve.NewDisjunctionQuery(
- newMatchPhraseQuery(keyword, "title", issueIndexerAnalyzer),
- newMatchPhraseQuery(keyword, "content", issueIndexerAnalyzer),
- newMatchPhraseQuery(keyword, "comments", issueIndexerAnalyzer),
- ))
- search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
- search.SortBy([]string{"-_score"})
+func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
+ var queries []query.Query
+
+ if options.Keyword != "" {
+ keywordQueries := []query.Query{
+ inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
+ }
+ queries = append(queries, bleve.NewDisjunctionQuery(keywordQueries...))
+ }
+
+ if len(options.RepoIDs) > 0 || options.AllPublic {
+ var repoQueries []query.Query
+ for _, repoID := range options.RepoIDs {
+ repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id"))
+ }
+ if options.AllPublic {
+ repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public"))
+ }
+ queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...))
+ }
+
+ if !options.IsPull.IsNone() {
+ queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.IsTrue(), "is_pull"))
+ }
+ if !options.IsClosed.IsNone() {
+ queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.IsTrue(), "is_closed"))
+ }
+
+ if options.NoLabelOnly {
+ queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label"))
+ } else {
+ if len(options.IncludedLabelIDs) > 0 {
+ var includeQueries []query.Query
+ for _, labelID := range options.IncludedLabelIDs {
+ includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
+ }
+ queries = append(queries, bleve.NewConjunctionQuery(includeQueries...))
+ } else if len(options.IncludedAnyLabelIDs) > 0 {
+ var includeQueries []query.Query
+ for _, labelID := range options.IncludedAnyLabelIDs {
+ includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
+ }
+ queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...))
+ }
+ if len(options.ExcludedLabelIDs) > 0 {
+ var excludeQueries []query.Query
+ for _, labelID := range options.ExcludedLabelIDs {
+ q := bleve.NewBooleanQuery()
+ q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids"))
+ excludeQueries = append(excludeQueries, q)
+ }
+ queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...))
+ }
+ }
+
+ if len(options.MilestoneIDs) > 0 {
+ var milestoneQueries []query.Query
+ for _, milestoneID := range options.MilestoneIDs {
+ milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id"))
+ }
+ queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...))
+ }
+
+ if options.ProjectID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.ProjectID, "project_id"))
+ }
+ if options.ProjectBoardID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.ProjectBoardID, "project_board_id"))
+ }
+
+ if options.PosterID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.PosterID, "poster_id"))
+ }
+
+ if options.AssigneeID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.AssigneeID, "assignee_id"))
+ }
+
+ if options.MentionID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.MentionID, "mention_ids"))
+ }
+
+ if options.ReviewedID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.ReviewedID, "reviewed_ids"))
+ }
+ if options.ReviewRequestedID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.ReviewRequestedID, "review_requested_ids"))
+ }
+
+ if options.SubscriberID != nil {
+ queries = append(queries, inner_bleve.NumericEqualityQuery(*options.SubscriberID, "subscriber_ids"))
+ }
+
+ if options.UpdatedAfterUnix != nil || options.UpdatedBeforeUnix != nil {
+ queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(options.UpdatedAfterUnix, options.UpdatedBeforeUnix, "updated_unix"))
+ }
+
+ var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
+ if len(queries) == 0 {
+ indexerQuery = bleve.NewMatchAllQuery()
+ }
+
+ skip, limit := indexer_internal.ParsePaginator(options.Paginator)
+ search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false)
+
+ if options.SortBy == "" {
+ options.SortBy = internal.SortByCreatedAsc
+ }
+
+ search.SortBy([]string{string(options.SortBy), "-_id"})
result, err := b.inner.Indexer.SearchInContext(ctx, search)
if err != nil {
return nil, err
}
- ret := internal.SearchResult{
- Hits: make([]internal.Match, 0, len(result.Hits)),
+ ret := &internal.SearchResult{
+ Total: int64(result.Total),
+ Hits: make([]internal.Match, 0, len(result.Hits)),
}
for _, hit := range result.Hits {
id, err := indexer_internal.ParseBase36(hit.ID)
@@ -175,5 +283,5 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
ID: id,
})
}
- return &ret, nil
+ return ret, nil
}
diff --git a/modules/indexer/issues/bleve/bleve_test.go b/modules/indexer/issues/bleve/bleve_test.go
index 0eb136d22b..908514a01a 100644
--- a/modules/indexer/issues/bleve/bleve_test.go
+++ b/modules/indexer/issues/bleve/bleve_test.go
@@ -4,86 +4,15 @@
package bleve
import (
- "context"
"testing"
- "code.gitea.io/gitea/modules/indexer/issues/internal"
-
- "github.com/stretchr/testify/assert"
+ "code.gitea.io/gitea/modules/indexer/issues/internal/tests"
)
-func TestBleveIndexAndSearch(t *testing.T) {
+func TestBleveIndexer(t *testing.T) {
dir := t.TempDir()
indexer := NewIndexer(dir)
defer indexer.Close()
- if _, err := indexer.Init(context.Background()); err != nil {
- assert.Fail(t, "Unable to initialize bleve indexer: %v", err)
- return
- }
-
- err := indexer.Index(context.Background(), []*internal.IndexerData{
- {
- ID: 1,
- RepoID: 2,
- Title: "Issue search should support Chinese",
- Content: "As title",
- Comments: []string{
- "test1",
- "test2",
- },
- },
- {
- ID: 2,
- RepoID: 2,
- Title: "CJK support could be optional",
- Content: "Chinese Korean and Japanese should be supported but I would like it's not enabled by default",
- Comments: []string{
- "LGTM",
- "Good idea",
- },
- },
- })
- assert.NoError(t, err)
-
- keywords := []struct {
- Keyword string
- IDs []int64
- }{
- {
- Keyword: "search",
- IDs: []int64{1},
- },
- {
- Keyword: "test1",
- IDs: []int64{1},
- },
- {
- Keyword: "test2",
- IDs: []int64{1},
- },
- {
- Keyword: "support",
- IDs: []int64{1, 2},
- },
- {
- Keyword: "chinese",
- IDs: []int64{1, 2},
- },
- {
- Keyword: "help",
- IDs: []int64{},
- },
- }
-
- for _, kw := range keywords {
- res, err := indexer.Search(context.TODO(), kw.Keyword, []int64{2}, 10, 0, "")
- assert.NoError(t, err)
-
- ids := make([]int64, 0, len(res.Hits))
- for _, hit := range res.Hits {
- ids = append(ids, hit.ID)
- }
- assert.ElementsMatch(t, kw.IDs, ids)
- }
+ tests.TestIndexer(t, indexer)
}
diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go
index b054b9d800..1016523b72 100644
--- a/modules/indexer/issues/db/db.go
+++ b/modules/indexer/issues/db/db.go
@@ -6,10 +6,13 @@ package db
import (
"context"
- issues_model "code.gitea.io/gitea/models/issues"
+ "code.gitea.io/gitea/models/db"
+ issue_model "code.gitea.io/gitea/models/issues"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
"code.gitea.io/gitea/modules/indexer/issues/internal"
+
+ "xorm.io/builder"
)
var _ internal.Indexer = &Indexer{}
@@ -26,7 +29,7 @@ func NewIndexer() *Indexer {
}
// Index dummy function
-func (i *Indexer) Index(_ context.Context, _ []*internal.IndexerData) error {
+func (i *Indexer) Index(_ context.Context, _ ...*internal.IndexerData) error {
return nil
}
@@ -36,19 +39,58 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
}
// Search searches for issues
-func (i *Indexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int, state string) (*internal.SearchResult, error) {
- total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start)
+func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
+ // FIXME: I tried to avoid importing models here, but it seems to be impossible.
+ // We can provide a function to register the search function, so models/issues can register it.
+ // So models/issues will import modules/indexer/issues, it's OK because it's by design.
+ // But modules/indexer/issues has already imported models/issues to do UpdateRepoIndexer and UpdateIssueIndexer.
+ // And to avoid circular import, we have to move the functions to another package.
+ // I believe it should be services/indexer, sounds great!
+ // But the two functions are used in modules/notification/indexer, that means we will import services/indexer in modules/notification/indexer.
+ // So that's the root problem:
+ // The notification is defined in modules, but it's using lots of things should be in services.
+
+ cond := builder.NewCond()
+
+ if options.Keyword != "" {
+ repoCond := builder.In("repo_id", options.RepoIDs)
+ if len(options.RepoIDs) == 1 {
+ repoCond = builder.Eq{"repo_id": options.RepoIDs[0]}
+ }
+ subQuery := builder.Select("id").From("issue").Where(repoCond)
+
+ cond = builder.Or(
+ db.BuildCaseInsensitiveLike("issue.name", options.Keyword),
+ db.BuildCaseInsensitiveLike("issue.content", options.Keyword),
+ builder.In("issue.id", builder.Select("issue_id").
+ From("comment").
+ Where(builder.And(
+ builder.Eq{"type": issue_model.CommentTypeComment},
+ builder.In("issue_id", subQuery),
+ db.BuildCaseInsensitiveLike("content", options.Keyword),
+ )),
+ ),
+ )
+ }
+
+ opt, err := ToDBOptions(ctx, options)
if err != nil {
return nil, err
}
- result := internal.SearchResult{
- Total: total,
- Hits: make([]internal.Match, 0, limit),
+
+ ids, total, err := issue_model.IssueIDs(ctx, opt, cond)
+ if err != nil {
+ return nil, err
}
+
+ hits := make([]internal.Match, 0, len(ids))
for _, id := range ids {
- result.Hits = append(result.Hits, internal.Match{
+ hits = append(hits, internal.Match{
ID: id,
})
}
- return &result, nil
+ return &internal.SearchResult{
+ Total: total,
+ Hits: hits,
+ }, nil
}
diff --git a/modules/indexer/issues/db/options.go b/modules/indexer/issues/db/options.go
new file mode 100644
index 0000000000..ebd672a695
--- /dev/null
+++ b/modules/indexer/issues/db/options.go
@@ -0,0 +1,114 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package db
+
+import (
+ "context"
+ "fmt"
+
+ "code.gitea.io/gitea/models/db"
+ issue_model "code.gitea.io/gitea/models/issues"
+ "code.gitea.io/gitea/modules/container"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+)
+
+func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_model.IssuesOptions, error) {
+ convertID := func(id *int64) int64 {
+ if id == nil {
+ return 0
+ }
+ if *id == 0 {
+ return db.NoConditionID
+ }
+ return *id
+ }
+ convertInt64 := func(i *int64) int64 {
+ if i == nil {
+ return 0
+ }
+ return *i
+ }
+ var sortType string
+ switch options.SortBy {
+ case internal.SortByCreatedAsc:
+ sortType = "oldest"
+ case internal.SortByUpdatedAsc:
+ sortType = "leastupdate"
+ case internal.SortByCommentsAsc:
+ sortType = "leastcomment"
+ case internal.SortByDeadlineAsc:
+ sortType = "farduedate"
+ case internal.SortByCreatedDesc:
+ sortType = "newest"
+ case internal.SortByUpdatedDesc:
+ sortType = "recentupdate"
+ case internal.SortByCommentsDesc:
+ sortType = "mostcomment"
+ case internal.SortByDeadlineDesc:
+ sortType = "nearduedate"
+ default:
+ sortType = "newest"
+ }
+
+ opts := &issue_model.IssuesOptions{
+ Paginator: options.Paginator,
+ RepoIDs: options.RepoIDs,
+ RepoCond: nil,
+ AssigneeID: convertID(options.AssigneeID),
+ PosterID: convertID(options.PosterID),
+ MentionedID: convertID(options.MentionID),
+ ReviewRequestedID: convertID(options.ReviewRequestedID),
+ ReviewedID: convertID(options.ReviewedID),
+ SubscriberID: convertID(options.SubscriberID),
+ ProjectID: convertID(options.ProjectID),
+ ProjectBoardID: convertID(options.ProjectBoardID),
+ IsClosed: options.IsClosed,
+ IsPull: options.IsPull,
+ IncludedLabelNames: nil,
+ ExcludedLabelNames: nil,
+ IncludeMilestones: nil,
+ SortType: sortType,
+ IssueIDs: nil,
+ UpdatedAfterUnix: convertInt64(options.UpdatedAfterUnix),
+ UpdatedBeforeUnix: convertInt64(options.UpdatedBeforeUnix),
+ PriorityRepoID: 0,
+ IsArchived: 0,
+ Org: nil,
+ Team: nil,
+ User: nil,
+ }
+
+ if len(options.MilestoneIDs) == 1 && options.MilestoneIDs[0] == 0 {
+ opts.MilestoneIDs = []int64{db.NoConditionID}
+ } else {
+ opts.MilestoneIDs = options.MilestoneIDs
+ }
+
+ if options.NoLabelOnly {
+ opts.LabelIDs = []int64{0} // Be careful, it's zero, not db.NoConditionID
+ } else {
+ opts.LabelIDs = make([]int64, 0, len(options.IncludedLabelIDs)+len(options.ExcludedLabelIDs))
+ opts.LabelIDs = append(opts.LabelIDs, options.IncludedLabelIDs...)
+ for _, id := range options.ExcludedLabelIDs {
+ opts.LabelIDs = append(opts.LabelIDs, -id)
+ }
+
+ if len(options.IncludedLabelIDs) == 0 && len(options.IncludedAnyLabelIDs) > 0 {
+ _ = ctx // issue_model.GetLabelsByIDs should be called with ctx, this line can be removed when it's done.
+ labels, err := issue_model.GetLabelsByIDs(options.IncludedAnyLabelIDs, "name")
+ if err != nil {
+ return nil, fmt.Errorf("GetLabelsByIDs: %v", err)
+ }
+ set := container.Set[string]{}
+ for _, label := range labels {
+ if !set.Contains(label.Name) {
+ set.Add(label.Name)
+ opts.IncludedLabelNames = append(opts.IncludedLabelNames, label.Name)
+ }
+ }
+ }
+ }
+
+ return opts, nil
+}
diff --git a/modules/indexer/issues/dboptions.go b/modules/indexer/issues/dboptions.go
new file mode 100644
index 0000000000..6a41afadd7
--- /dev/null
+++ b/modules/indexer/issues/dboptions.go
@@ -0,0 +1,93 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package issues
+
+import (
+ "code.gitea.io/gitea/models/db"
+ issues_model "code.gitea.io/gitea/models/issues"
+)
+
+func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOptions {
+ searchOpt := &SearchOptions{
+ Keyword: keyword,
+ RepoIDs: opts.RepoIDs,
+ AllPublic: false,
+ IsPull: opts.IsPull,
+ IsClosed: opts.IsClosed,
+ }
+
+ if len(opts.LabelIDs) == 1 && opts.LabelIDs[0] == 0 {
+ searchOpt.NoLabelOnly = true
+ } else {
+ for _, labelID := range opts.LabelIDs {
+ if labelID > 0 {
+ searchOpt.IncludedLabelIDs = append(searchOpt.IncludedLabelIDs, labelID)
+ } else {
+ searchOpt.ExcludedLabelIDs = append(searchOpt.ExcludedLabelIDs, -labelID)
+ }
+ }
+ // opts.IncludedLabelNames and opts.ExcludedLabelNames are not supported here.
+ // It's not a TO DO, it's just unnecessary.
+ }
+
+ if len(opts.MilestoneIDs) == 1 && opts.MilestoneIDs[0] == db.NoConditionID {
+ searchOpt.MilestoneIDs = []int64{0}
+ } else {
+ searchOpt.MilestoneIDs = opts.MilestoneIDs
+ }
+
+ if opts.AssigneeID > 0 {
+ searchOpt.AssigneeID = &opts.AssigneeID
+ }
+ if opts.PosterID > 0 {
+ searchOpt.PosterID = &opts.PosterID
+ }
+ if opts.MentionedID > 0 {
+ searchOpt.MentionID = &opts.MentionedID
+ }
+ if opts.ReviewedID > 0 {
+ searchOpt.ReviewedID = &opts.ReviewedID
+ }
+ if opts.ReviewRequestedID > 0 {
+ searchOpt.ReviewRequestedID = &opts.ReviewRequestedID
+ }
+ if opts.SubscriberID > 0 {
+ searchOpt.SubscriberID = &opts.SubscriberID
+ }
+
+ if opts.UpdatedAfterUnix > 0 {
+ searchOpt.UpdatedAfterUnix = &opts.UpdatedAfterUnix
+ }
+ if opts.UpdatedBeforeUnix > 0 {
+ searchOpt.UpdatedBeforeUnix = &opts.UpdatedBeforeUnix
+ }
+
+ searchOpt.Paginator = opts.Paginator
+
+ switch opts.SortType {
+ case "":
+ searchOpt.SortBy = SortByCreatedDesc
+ case "oldest":
+ searchOpt.SortBy = SortByCreatedAsc
+ case "recentupdate":
+ searchOpt.SortBy = SortByUpdatedDesc
+ case "leastupdate":
+ searchOpt.SortBy = SortByUpdatedAsc
+ case "mostcomment":
+ searchOpt.SortBy = SortByCommentsDesc
+ case "leastcomment":
+ searchOpt.SortBy = SortByCommentsAsc
+ case "nearduedate":
+ searchOpt.SortBy = SortByDeadlineAsc
+ case "farduedate":
+ searchOpt.SortBy = SortByDeadlineDesc
+ case "priority", "priorityrepo", "project-column-sorting":
+ // Unsupported sort type for search
+ searchOpt.SortBy = SortByUpdatedDesc
+ default:
+ searchOpt.SortBy = SortByUpdatedDesc
+ }
+
+ return searchOpt
+}
diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go
index cfd3628c18..d059f76b32 100644
--- a/modules/indexer/issues/elasticsearch/elasticsearch.go
+++ b/modules/indexer/issues/elasticsearch/elasticsearch.go
@@ -7,6 +7,7 @@ import (
"context"
"fmt"
"strconv"
+ "strings"
"code.gitea.io/gitea/modules/graceful"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
@@ -17,7 +18,7 @@ import (
)
const (
- issueIndexerLatestVersion = 0
+ issueIndexerLatestVersion = 1
)
var _ internal.Indexer = &Indexer{}
@@ -39,36 +40,44 @@ func NewIndexer(url, indexerName string) *Indexer {
}
const (
- defaultMapping = `{
- "mappings": {
- "properties": {
- "id": {
- "type": "integer",
- "index": true
- },
- "repo_id": {
- "type": "integer",
- "index": true
- },
- "title": {
- "type": "text",
- "index": true
- },
- "content": {
- "type": "text",
- "index": true
- },
- "comments": {
- "type" : "text",
- "index": true
- }
- }
+ defaultMapping = `
+{
+ "mappings": {
+ "properties": {
+ "id": { "type": "integer", "index": true },
+ "repo_id": { "type": "integer", "index": true },
+ "is_public": { "type": "boolean", "index": true },
+
+ "title": { "type": "text", "index": true },
+ "content": { "type": "text", "index": true },
+ "comments": { "type" : "text", "index": true },
+
+ "is_pull": { "type": "boolean", "index": true },
+ "is_closed": { "type": "boolean", "index": true },
+ "label_ids": { "type": "integer", "index": true },
+ "no_label": { "type": "boolean", "index": true },
+ "milestone_id": { "type": "integer", "index": true },
+ "project_id": { "type": "integer", "index": true },
+ "project_board_id": { "type": "integer", "index": true },
+ "poster_id": { "type": "integer", "index": true },
+ "assignee_id": { "type": "integer", "index": true },
+ "mention_ids": { "type": "integer", "index": true },
+ "reviewed_ids": { "type": "integer", "index": true },
+ "review_requested_ids": { "type": "integer", "index": true },
+ "subscriber_ids": { "type": "integer", "index": true },
+ "updated_unix": { "type": "integer", "index": true },
+
+ "created_unix": { "type": "integer", "index": true },
+ "deadline_unix": { "type": "integer", "index": true },
+ "comment_count": { "type": "integer", "index": true }
}
- }`
+ }
+}
+`
)
// Index will save the index data
-func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) error {
+func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
if len(issues) == 0 {
return nil
} else if len(issues) == 1 {
@@ -76,13 +85,7 @@ func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) err
_, err := b.inner.Client.Index().
Index(b.inner.VersionedIndexName()).
Id(fmt.Sprintf("%d", issue.ID)).
- BodyJson(map[string]any{
- "id": issue.ID,
- "repo_id": issue.RepoID,
- "title": issue.Title,
- "content": issue.Content,
- "comments": issue.Comments,
- }).
+ BodyJson(issue).
Do(ctx)
return err
}
@@ -93,13 +96,7 @@ func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) err
elastic.NewBulkIndexRequest().
Index(b.inner.VersionedIndexName()).
Id(fmt.Sprintf("%d", issue.ID)).
- Doc(map[string]any{
- "id": issue.ID,
- "repo_id": issue.RepoID,
- "title": issue.Title,
- "content": issue.Content,
- "comments": issue.Comments,
- }),
+ Doc(issue),
)
}
@@ -140,23 +137,113 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
// Search searches for issues by given conditions.
// Returns the matching issue IDs
-func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int, state string) (*internal.SearchResult, error) {
- kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments")
+func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
query := elastic.NewBoolQuery()
- query = query.Must(kwQuery)
- if len(repoIDs) > 0 {
- repoStrs := make([]any, 0, len(repoIDs))
- for _, repoID := range repoIDs {
- repoStrs = append(repoStrs, repoID)
+
+ if options.Keyword != "" {
+ query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments"))
+ }
+
+ if len(options.RepoIDs) > 0 {
+ q := elastic.NewBoolQuery()
+ q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
+ if options.AllPublic {
+ q.Should(elastic.NewTermQuery("is_public", true))
}
- repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
- query = query.Must(repoQuery)
+ query.Must(q)
+ }
+
+ if !options.IsPull.IsNone() {
+ query.Must(elastic.NewTermQuery("is_pull", options.IsPull.IsTrue()))
+ }
+ if !options.IsClosed.IsNone() {
+ query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.IsTrue()))
+ }
+
+ if options.NoLabelOnly {
+ query.Must(elastic.NewTermQuery("no_label", true))
+ } else {
+ if len(options.IncludedLabelIDs) > 0 {
+ q := elastic.NewBoolQuery()
+ for _, labelID := range options.IncludedLabelIDs {
+ q.Must(elastic.NewTermQuery("label_ids", labelID))
+ }
+ query.Must(q)
+ } else if len(options.IncludedAnyLabelIDs) > 0 {
+ query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
+ }
+ if len(options.ExcludedLabelIDs) > 0 {
+ q := elastic.NewBoolQuery()
+ for _, labelID := range options.ExcludedLabelIDs {
+ q.MustNot(elastic.NewTermQuery("label_ids", labelID))
+ }
+ query.Must(q)
+ }
+ }
+
+ if len(options.MilestoneIDs) > 0 {
+ query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
+ }
+
+ if options.ProjectID != nil {
+ query.Must(elastic.NewTermQuery("project_id", *options.ProjectID))
+ }
+ if options.ProjectBoardID != nil {
+ query.Must(elastic.NewTermQuery("project_board_id", *options.ProjectBoardID))
+ }
+
+ if options.PosterID != nil {
+ query.Must(elastic.NewTermQuery("poster_id", *options.PosterID))
}
+
+ if options.AssigneeID != nil {
+ query.Must(elastic.NewTermQuery("assignee_id", *options.AssigneeID))
+ }
+
+ if options.MentionID != nil {
+ query.Must(elastic.NewTermQuery("mention_ids", *options.MentionID))
+ }
+
+ if options.ReviewedID != nil {
+ query.Must(elastic.NewTermQuery("reviewed_ids", *options.ReviewedID))
+ }
+ if options.ReviewRequestedID != nil {
+ query.Must(elastic.NewTermQuery("review_requested_ids", *options.ReviewRequestedID))
+ }
+
+ if options.SubscriberID != nil {
+ query.Must(elastic.NewTermQuery("subscriber_ids", *options.SubscriberID))
+ }
+
+ if options.UpdatedAfterUnix != nil || options.UpdatedBeforeUnix != nil {
+ q := elastic.NewRangeQuery("updated_unix")
+ if options.UpdatedAfterUnix != nil {
+ q.Gte(*options.UpdatedAfterUnix)
+ }
+ if options.UpdatedBeforeUnix != nil {
+ q.Lte(*options.UpdatedBeforeUnix)
+ }
+ query.Must(q)
+ }
+
+ if options.SortBy == "" {
+ options.SortBy = internal.SortByCreatedAsc
+ }
+ sortBy := []elastic.Sorter{
+ parseSortBy(options.SortBy),
+ elastic.NewFieldSort("id").Desc(),
+ }
+
+ // See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
+ // TODO: make it configurable since it's configurable in elasticsearch
+ const maxPageSize = 10000
+
+ skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
searchResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Query(query).
- Sort("_score", false).
- From(start).Size(limit).
+ SortBy(sortBy...).
+ From(skip).Size(limit).
Do(ctx)
if err != nil {
return nil, err
@@ -175,3 +262,20 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
Hits: hits,
}, nil
}
+
+func toAnySlice[T any](s []T) []any {
+ ret := make([]any, 0, len(s))
+ for _, item := range s {
+ ret = append(ret, item)
+ }
+ return ret
+}
+
+func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
+ field := strings.TrimPrefix(string(sortBy), "-")
+ ret := elastic.NewFieldSort(field)
+ if strings.HasPrefix(string(sortBy), "-") {
+ ret.Desc()
+ }
+ return ret
+}
diff --git a/modules/indexer/issues/elasticsearch/elasticsearch_test.go b/modules/indexer/issues/elasticsearch/elasticsearch_test.go
new file mode 100644
index 0000000000..ffd85b1aa1
--- /dev/null
+++ b/modules/indexer/issues/elasticsearch/elasticsearch_test.go
@@ -0,0 +1,48 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package elasticsearch
+
+import (
+ "fmt"
+ "net/http"
+ "os"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/modules/indexer/issues/internal/tests"
+)
+
+func TestElasticsearchIndexer(t *testing.T) {
+ // The elasticsearch instance started by pull-db-tests.yml > test-unit > services > elasticsearch
+ url := "http://elastic:changeme@elasticsearch:9200"
+
+ if os.Getenv("CI") == "" {
+ // Make it possible to run tests against a local elasticsearch instance
+ url = os.Getenv("TEST_ELASTICSEARCH_URL")
+ if url == "" {
+ t.Skip("TEST_ELASTICSEARCH_URL not set and not running in CI")
+ return
+ }
+ }
+
+ ok := false
+ for i := 0; i < 60; i++ {
+ resp, err := http.Get(url)
+ if err == nil && resp.StatusCode == http.StatusOK {
+ ok = true
+ break
+ }
+ t.Logf("Waiting for elasticsearch to be up: %v", err)
+ time.Sleep(time.Second)
+ }
+ if !ok {
+ t.Fatalf("Failed to wait for elasticsearch to be up")
+ return
+ }
+
+ indexer := NewIndexer(url, fmt.Sprintf("test_elasticsearch_indexer_%d", time.Now().Unix()))
+ defer indexer.Close()
+
+ tests.TestIndexer(t, indexer)
+}
diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go
index fe5c5d8f26..42279cbddb 100644
--- a/modules/indexer/issues/indexer.go
+++ b/modules/indexer/issues/indexer.go
@@ -11,7 +11,6 @@ import (
"time"
db_model "code.gitea.io/gitea/models/db"
- issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/indexer/issues/bleve"
@@ -26,9 +25,24 @@ import (
"code.gitea.io/gitea/modules/util"
)
+// IndexerMetadata is used to send data to the queue, so it contains only the ids.
+// It may look weired, because it has to be compatible with the old queue data format.
+// If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
+// If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
+// It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
+// Valid values:
+// - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
+// - IsDelete = false, ID = 1, and IDs will be ignored
+type IndexerMetadata struct {
+ ID int64 `json:"id"`
+
+ IsDelete bool `json:"is_delete"`
+ IDs []int64 `json:"ids"`
+}
+
var (
// issueIndexerQueue queue of issue ids to be updated
- issueIndexerQueue *queue.WorkerPoolQueue[*internal.IndexerData]
+ issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
// globalIndexer is the global indexer, it cannot be nil.
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
// So it's always safe use it as *globalIndexer.Load() and call its methods.
@@ -50,37 +64,7 @@ func InitIssueIndexer(syncReindex bool) {
indexerInitWaitChannel := make(chan time.Duration, 1)
// Create the Queue
- switch setting.Indexer.IssueType {
- case "bleve", "elasticsearch", "meilisearch":
- handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) {
- indexer := *globalIndexer.Load()
- toIndex := make([]*internal.IndexerData, 0, len(items))
- for _, indexerData := range items {
- log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete)
- if indexerData.IsDelete {
- if err := indexer.Delete(ctx, indexerData.IDs...); err != nil {
- log.Error("Issue indexer handler: failed to from index: %v Error: %v", indexerData.IDs, err)
- unhandled = append(unhandled, indexerData)
- }
- continue
- }
- toIndex = append(toIndex, indexerData)
- }
- if err := indexer.Index(ctx, toIndex); err != nil {
- log.Error("Error whilst indexing: %v Error: %v", toIndex, err)
- unhandled = append(unhandled, toIndex...)
- }
- return unhandled
- }
-
- issueIndexerQueue = queue.CreateSimpleQueue(ctx, "issue_indexer", handler)
-
- if issueIndexerQueue == nil {
- log.Fatal("Unable to create issue indexer queue")
- }
- default:
- issueIndexerQueue = queue.CreateSimpleQueue[*internal.IndexerData](ctx, "issue_indexer", nil)
- }
+ issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
graceful.GetManager().RunAtTerminate(finished)
@@ -176,6 +160,44 @@ func InitIssueIndexer(syncReindex bool) {
}
}
+func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
+ return func(items ...*IndexerMetadata) []*IndexerMetadata {
+ var unhandled []*IndexerMetadata
+
+ indexer := *globalIndexer.Load()
+ for _, item := range items {
+ log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
+ if item.IsDelete {
+ if err := indexer.Delete(ctx, item.IDs...); err != nil {
+ log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
+ unhandled = append(unhandled, item)
+ }
+ continue
+ }
+ data, existed, err := getIssueIndexerData(ctx, item.ID)
+ if err != nil {
+ log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ continue
+ }
+ if !existed {
+ if err := indexer.Delete(ctx, item.ID); err != nil {
+ log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ }
+ continue
+ }
+ if err := indexer.Index(ctx, data); err != nil {
+ log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ continue
+ }
+ }
+
+ return unhandled
+ }
+}
+
// populateIssueIndexer populate the issue indexer with issue data
func populateIssueIndexer(ctx context.Context) {
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
@@ -203,101 +225,87 @@ func populateIssueIndexer(ctx context.Context) {
}
for _, repo := range repos {
- select {
- case <-ctx.Done():
- log.Info("Issue Indexer population shutdown before completion")
- return
- default:
+ for {
+ select {
+ case <-ctx.Done():
+ log.Info("Issue Indexer population shutdown before completion")
+ return
+ default:
+ }
+ if err := updateRepoIndexer(ctx, repo.ID); err != nil {
+ log.Warn("Retry to populate issue indexer for repo %d: %v", repo.ID, err)
+ continue
+ }
+ break
}
- UpdateRepoIndexer(ctx, repo)
}
}
}
// UpdateRepoIndexer add/update all issues of the repositories
-func UpdateRepoIndexer(ctx context.Context, repo *repo_model.Repository) {
- is, err := issues_model.Issues(ctx, &issues_model.IssuesOptions{
- RepoIDs: []int64{repo.ID},
- IsClosed: util.OptionalBoolNone,
- IsPull: util.OptionalBoolNone,
- })
- if err != nil {
- log.Error("Issues: %v", err)
- return
- }
- if err = issues_model.IssueList(is).LoadDiscussComments(ctx); err != nil {
- log.Error("LoadDiscussComments: %v", err)
- return
- }
- for _, issue := range is {
- UpdateIssueIndexer(issue)
+func UpdateRepoIndexer(ctx context.Context, repoID int64) {
+ if err := updateRepoIndexer(ctx, repoID); err != nil {
+ log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
}
}
// UpdateIssueIndexer add/update an issue to the issue indexer
-func UpdateIssueIndexer(issue *issues_model.Issue) {
- var comments []string
- for _, comment := range issue.Comments {
- if comment.Type == issues_model.CommentTypeComment {
- comments = append(comments, comment.Content)
- }
- }
- issueType := "issue"
- if issue.IsPull {
- issueType = "pull"
- }
- indexerData := &internal.IndexerData{
- ID: issue.ID,
- RepoID: issue.RepoID,
- State: string(issue.State()),
- IssueType: issueType,
- Title: issue.Title,
- Content: issue.Content,
- Comments: comments,
- }
- log.Debug("Adding to channel: %v", indexerData)
- if err := issueIndexerQueue.Push(indexerData); err != nil {
- log.Error("Unable to push to issue indexer: %v: Error: %v", indexerData, err)
+func UpdateIssueIndexer(issueID int64) {
+ if err := updateIssueIndexer(issueID); err != nil {
+ log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
}
}
// DeleteRepoIssueIndexer deletes repo's all issues indexes
-func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) {
- var ids []int64
- ids, err := issues_model.GetIssueIDsByRepoID(ctx, repo.ID)
- if err != nil {
- log.Error("GetIssueIDsByRepoID failed: %v", err)
- return
+func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
+ if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
+ log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
}
+}
- if len(ids) == 0 {
- return
- }
- indexerData := &internal.IndexerData{
- IDs: ids,
- IsDelete: true,
- }
- if err := issueIndexerQueue.Push(indexerData); err != nil {
- log.Error("Unable to push to issue indexer: %v: Error: %v", indexerData, err)
- }
+// IsAvailable checks if issue indexer is available
+func IsAvailable(ctx context.Context) bool {
+ return (*globalIndexer.Load()).Ping(ctx) == nil
}
-// SearchIssuesByKeyword search issue ids by keywords and repo id
-// WARNNING: You have to ensure user have permission to visit repoIDs' issues
-func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword, state string) ([]int64, error) {
- var issueIDs []int64
+// SearchOptions indicates the options for searching issues
+type SearchOptions internal.SearchOptions
+
+const (
+ SortByCreatedDesc = internal.SortByCreatedDesc
+ SortByUpdatedDesc = internal.SortByUpdatedDesc
+ SortByCommentsDesc = internal.SortByCommentsDesc
+ SortByDeadlineDesc = internal.SortByDeadlineDesc
+ SortByCreatedAsc = internal.SortByCreatedAsc
+ SortByUpdatedAsc = internal.SortByUpdatedAsc
+ SortByCommentsAsc = internal.SortByCommentsAsc
+ SortByDeadlineAsc = internal.SortByDeadlineAsc
+)
+
+// SearchIssues search issues by options.
+// It returns issue ids and a bool value indicates if the result is imprecise.
+func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
indexer := *globalIndexer.Load()
- res, err := indexer.Search(ctx, keyword, repoIDs, 50, 0, state)
+
+ if opts.Keyword == "" {
+ // This is a conservative shortcut.
+ // If the keyword is empty, db has better (at least not worse) performance to filter issues.
+ // When the keyword is empty, it tends to listing rather than searching issues.
+ // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
+ // Even worse, the external indexer like elastic search may not be available for a while,
+ // and the user may not be able to list issues completely until it is available again.
+ indexer = db.NewIndexer()
+ }
+
+ result, err := indexer.Search(ctx, (*internal.SearchOptions)(opts))
if err != nil {
- return nil, err
+ return nil, 0, err
}
- for _, r := range res.Hits {
- issueIDs = append(issueIDs, r.ID)
+
+ ret := make([]int64, 0, len(result.Hits))
+ for _, hit := range result.Hits {
+ ret = append(ret, hit.ID)
}
- return issueIDs, nil
-}
-// IsAvailable checks if issue indexer is available
-func IsAvailable(ctx context.Context) bool {
- return (*globalIndexer.Load()).Ping(ctx) == nil
+ return ret, result.Total, nil
}
diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go
index 757eb2f3d9..d6812f714e 100644
--- a/modules/indexer/issues/indexer_test.go
+++ b/modules/indexer/issues/indexer_test.go
@@ -50,21 +50,41 @@ func TestBleveSearchIssues(t *testing.T) {
time.Sleep(5 * time.Second)
- ids, err := SearchIssuesByKeyword(context.TODO(), []int64{1}, "issue2", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{2}, ids)
+ t.Run("issue2", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "issue2",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{2}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "first", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{1}, ids)
+ t.Run("first", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "first",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{1}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "for", "")
- assert.NoError(t, err)
- assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)
+ t.Run("for", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "for",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "good", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{1}, ids)
+ t.Run("good", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "good",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{1}, ids)
+ })
}
func TestDBSearchIssues(t *testing.T) {
@@ -73,19 +93,39 @@ func TestDBSearchIssues(t *testing.T) {
setting.Indexer.IssueType = "db"
InitIssueIndexer(true)
- ids, err := SearchIssuesByKeyword(context.TODO(), []int64{1}, "issue2", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{2}, ids)
+ t.Run("issue2", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "issue2",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{2}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "first", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{1}, ids)
+ t.Run("first", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "first",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{1}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "for", "")
- assert.NoError(t, err)
- assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)
+ t.Run("for", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "for",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.ElementsMatch(t, []int64{1, 2, 3, 5, 11}, ids)
+ })
- ids, err = SearchIssuesByKeyword(context.TODO(), []int64{1}, "good", "")
- assert.NoError(t, err)
- assert.EqualValues(t, []int64{1}, ids)
+ t.Run("good", func(t *testing.T) {
+ ids, _, err := SearchIssues(context.TODO(), &SearchOptions{
+ Keyword: "good",
+ RepoIDs: []int64{1},
+ })
+ assert.NoError(t, err)
+ assert.EqualValues(t, []int64{1}, ids)
+ })
}
diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go
index b96517bb80..95740bc598 100644
--- a/modules/indexer/issues/internal/indexer.go
+++ b/modules/indexer/issues/internal/indexer.go
@@ -13,9 +13,9 @@ import (
// Indexer defines an interface to indexer issues contents
type Indexer interface {
internal.Indexer
- Index(ctx context.Context, issue []*IndexerData) error
+ Index(ctx context.Context, issue ...*IndexerData) error
Delete(ctx context.Context, ids ...int64) error
- Search(ctx context.Context, kw string, repoIDs []int64, limit, start int, state string) (*SearchResult, error)
+ Search(ctx context.Context, options *SearchOptions) (*SearchResult, error)
}
// NewDummyIndexer returns a dummy indexer
@@ -29,14 +29,14 @@ type dummyIndexer struct {
internal.Indexer
}
-func (d *dummyIndexer) Index(ctx context.Context, issue []*IndexerData) error {
+func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error {
return fmt.Errorf("indexer is not ready")
}
-func (d *dummyIndexer) Delete(ctx context.Context, ids ...int64) error {
+func (d *dummyIndexer) Delete(_ context.Context, _ ...int64) error {
return fmt.Errorf("indexer is not ready")
}
-func (d *dummyIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int, state string) (*SearchResult, error) {
+func (d *dummyIndexer) Search(_ context.Context, _ *SearchOptions) (*SearchResult, error) {
return nil, fmt.Errorf("indexer is not ready")
}
diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go
index 2b52d32302..31acd16bd4 100644
--- a/modules/indexer/issues/internal/model.go
+++ b/modules/indexer/issues/internal/model.go
@@ -3,17 +3,45 @@
package internal
+import (
+ "code.gitea.io/gitea/models/db"
+ "code.gitea.io/gitea/modules/timeutil"
+ "code.gitea.io/gitea/modules/util"
+)
+
// IndexerData data stored in the issue indexer
type IndexerData struct {
- ID int64 `json:"id"`
- RepoID int64 `json:"repo_id"`
- State string `json:"state"` // open, closed, all
- IssueType string `json:"type"` // issue or pull
- Title string `json:"title"`
- Content string `json:"content"`
- Comments []string `json:"comments"`
- IsDelete bool `json:"is_delete"`
- IDs []int64 `json:"ids"`
+ ID int64 `json:"id"`
+ RepoID int64 `json:"repo_id"`
+ IsPublic bool `json:"is_public"` // If the repo is public
+
+ // Fields used for keyword searching
+ Title string `json:"title"`
+ Content string `json:"content"`
+ Comments []string `json:"comments"`
+
+ // Fields used for filtering
+ IsPull bool `json:"is_pull"`
+ IsClosed bool `json:"is_closed"`
+ LabelIDs []int64 `json:"label_ids"`
+ NoLabel bool `json:"no_label"` // True if LabelIDs is empty
+ MilestoneID int64 `json:"milestone_id"`
+ ProjectID int64 `json:"project_id"`
+ ProjectBoardID int64 `json:"project_board_id"`
+ PosterID int64 `json:"poster_id"`
+ AssigneeID int64 `json:"assignee_id"`
+ MentionIDs []int64 `json:"mention_ids"`
+ ReviewedIDs []int64 `json:"reviewed_ids"`
+ ReviewRequestedIDs []int64 `json:"review_requested_ids"`
+ SubscriberIDs []int64 `json:"subscriber_ids"`
+ UpdatedUnix timeutil.TimeStamp `json:"updated_unix"`
+
+ // Fields used for sorting
+ // UpdatedUnix is both used for filtering and sorting.
+ // ID is used for sorting too, to make the sorting stable.
+ CreatedUnix timeutil.TimeStamp `json:"created_unix"`
+ DeadlineUnix timeutil.TimeStamp `json:"deadline_unix"`
+ CommentCount int64 `json:"comment_count"`
}
// Match represents on search result
@@ -27,3 +55,67 @@ type SearchResult struct {
Total int64
Hits []Match
}
+
+// SearchOptions represents search options
+type SearchOptions struct {
+ Keyword string // keyword to search
+
+ RepoIDs []int64 // repository IDs which the issues belong to
+ AllPublic bool // if include all public repositories
+
+ IsPull util.OptionalBool // if the issues is a pull request
+ IsClosed util.OptionalBool // if the issues is closed
+
+ IncludedLabelIDs []int64 // labels the issues have
+ ExcludedLabelIDs []int64 // labels the issues don't have
+ IncludedAnyLabelIDs []int64 // labels the issues have at least one. It will be ignored if IncludedLabelIDs is not empty. It's an uncommon filter, but it has been supported accidentally by issues.IssuesOptions.IncludedLabelNames.
+ NoLabelOnly bool // if the issues have no label, if true, IncludedLabelIDs and ExcludedLabelIDs, IncludedAnyLabelIDs will be ignored
+
+ MilestoneIDs []int64 // milestones the issues have
+
+ ProjectID *int64 // project the issues belong to
+ ProjectBoardID *int64 // project board the issues belong to
+
+ PosterID *int64 // poster of the issues
+
+ AssigneeID *int64 // assignee of the issues, zero means no assignee
+
+ MentionID *int64 // mentioned user of the issues
+
+ ReviewedID *int64 // reviewer of the issues
+ ReviewRequestedID *int64 // requested reviewer of the issues
+
+ SubscriberID *int64 // subscriber of the issues
+
+ UpdatedAfterUnix *int64
+ UpdatedBeforeUnix *int64
+
+ db.Paginator
+
+ SortBy SortBy // sort by field
+}
+
+type SortBy string
+
+const (
+ SortByCreatedDesc SortBy = "-created_unix"
+ SortByUpdatedDesc SortBy = "-updated_unix"
+ SortByCommentsDesc SortBy = "-comment_count"
+ SortByDeadlineDesc SortBy = "-deadline_unix"
+ SortByCreatedAsc SortBy = "created_unix"
+ SortByUpdatedAsc SortBy = "updated_unix"
+ SortByCommentsAsc SortBy = "comment_count"
+ SortByDeadlineAsc SortBy = "deadline_unix"
+ // Unsupported sort types which are supported by issues.IssuesOptions.SortType:
+ //
+ // - "priorityrepo":
+ // It's impossible to support it in the indexer.
+ // It is based on the specified repository in the request, so we cannot add static field to the indexer.
+ // If we do something like that query the issues in the specified repository first then append other issues,
+ // it will break the pagination.
+ //
+ // - "project-column-sorting":
+ // Although it's possible to support it by adding project.ProjectIssue.Sorting to the indexer,
+ // but what if the issue belongs to multiple projects?
+ // Since it's unsupported to search issues with keyword in project page, we don't need to support it.
+)
diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go
new file mode 100644
index 0000000000..93d38a0b37
--- /dev/null
+++ b/modules/indexer/issues/internal/tests/tests.go
@@ -0,0 +1,804 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+// This package contains tests for issues indexer modules.
+// All the code in this package is only used for testing.
+// Do not put any production code in this package to avoid it being included in the final binary.
+
+package tests
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/models/db"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+ "code.gitea.io/gitea/modules/timeutil"
+ "code.gitea.io/gitea/modules/util"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestIndexer(t *testing.T, indexer internal.Indexer) {
+ _, err := indexer.Init(context.Background())
+ require.NoError(t, err)
+
+ require.NoError(t, indexer.Ping(context.Background()))
+
+ var (
+ ids []int64
+ data = map[int64]*internal.IndexerData{}
+ )
+ {
+ d := generateDefaultIndexerData()
+ for _, v := range d {
+ ids = append(ids, v.ID)
+ data[v.ID] = v
+ }
+ require.NoError(t, indexer.Index(context.Background(), d...))
+ require.NoError(t, waitData(indexer, int64(len(data))))
+ }
+
+ defer func() {
+ require.NoError(t, indexer.Delete(context.Background(), ids...))
+ }()
+
+ for _, c := range cases {
+ t.Run(c.Name, func(t *testing.T) {
+ if len(c.ExtraData) > 0 {
+ require.NoError(t, indexer.Index(context.Background(), c.ExtraData...))
+ for _, v := range c.ExtraData {
+ data[v.ID] = v
+ }
+ require.NoError(t, waitData(indexer, int64(len(data))))
+ defer func() {
+ for _, v := range c.ExtraData {
+ require.NoError(t, indexer.Delete(context.Background(), v.ID))
+ delete(data, v.ID)
+ }
+ require.NoError(t, waitData(indexer, int64(len(data))))
+ }()
+ }
+
+ result, err := indexer.Search(context.Background(), c.SearchOptions)
+ require.NoError(t, err)
+
+ if c.Expected != nil {
+ c.Expected(t, data, result)
+ } else {
+ ids := make([]int64, 0, len(result.Hits))
+ for _, hit := range result.Hits {
+ ids = append(ids, hit.ID)
+ }
+ assert.Equal(t, c.ExpectedIDs, ids)
+ assert.Equal(t, c.ExpectedTotal, result.Total)
+ }
+ })
+ }
+}
+
+var cases = []*testIndexerCase{
+ {
+ Name: "default",
+ SearchOptions: &internal.SearchOptions{},
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ },
+ },
+ {
+ Name: "empty",
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "f1dfac73-fda6-4a6b-b8a4-2408fcb8ef69",
+ },
+ ExpectedIDs: []int64{},
+ ExpectedTotal: 0,
+ },
+ {
+ Name: "with limit",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ },
+ },
+ {
+ Name: "Keyword",
+ ExtraData: []*internal.IndexerData{
+ {ID: 1000, Title: "hi hello world"},
+ {ID: 1001, Content: "hi hello world"},
+ {ID: 1002, Comments: []string{"hi", "hello world"}},
+ },
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "hello",
+ },
+ ExpectedIDs: []int64{1002, 1001, 1000},
+ ExpectedTotal: 3,
+ },
+ {
+ Name: "RepoIDs",
+ ExtraData: []*internal.IndexerData{
+ {ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
+ {ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
+ {ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
+ {ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
+ {ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
+ {ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
+ {ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
+ },
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "hello",
+ RepoIDs: []int64{1, 4},
+ },
+ ExpectedIDs: []int64{1006, 1002, 1001},
+ ExpectedTotal: 3,
+ },
+ {
+ Name: "RepoIDs and AllPublic",
+ ExtraData: []*internal.IndexerData{
+ {ID: 1001, Title: "hello world", RepoID: 1, IsPublic: false},
+ {ID: 1002, Title: "hello world", RepoID: 1, IsPublic: false},
+ {ID: 1003, Title: "hello world", RepoID: 2, IsPublic: true},
+ {ID: 1004, Title: "hello world", RepoID: 2, IsPublic: true},
+ {ID: 1005, Title: "hello world", RepoID: 3, IsPublic: true},
+ {ID: 1006, Title: "hello world", RepoID: 4, IsPublic: false},
+ {ID: 1007, Title: "hello world", RepoID: 5, IsPublic: false},
+ },
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "hello",
+ RepoIDs: []int64{1, 4},
+ AllPublic: true,
+ },
+ ExpectedIDs: []int64{1006, 1005, 1004, 1003, 1002, 1001},
+ ExpectedTotal: 6,
+ },
+ {
+ Name: "issue only",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ IsPull: util.OptionalBoolFalse,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.False(t, data[v.ID].IsPull)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsPull }), result.Total)
+ },
+ },
+ {
+ Name: "pull only",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ IsPull: util.OptionalBoolTrue,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.True(t, data[v.ID].IsPull)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsPull }), result.Total)
+ },
+ },
+ {
+ Name: "opened only",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ IsClosed: util.OptionalBoolFalse,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.False(t, data[v.ID].IsClosed)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return !v.IsClosed }), result.Total)
+ },
+ },
+ {
+ Name: "closed only",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ IsClosed: util.OptionalBoolTrue,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.True(t, data[v.ID].IsClosed)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { return v.IsClosed }), result.Total)
+ },
+ },
+ {
+ Name: "labels",
+ ExtraData: []*internal.IndexerData{
+ {ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
+ {ID: 1001, Title: "hello b", LabelIDs: []int64{2000, 2001}},
+ {ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
+ {ID: 1003, Title: "hello d", LabelIDs: []int64{2000}},
+ {ID: 1004, Title: "hello e", LabelIDs: []int64{}},
+ },
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "hello",
+ IncludedLabelIDs: []int64{2000, 2001},
+ ExcludedLabelIDs: []int64{2003},
+ },
+ ExpectedIDs: []int64{1001, 1000},
+ ExpectedTotal: 2,
+ },
+ {
+ Name: "include any labels",
+ ExtraData: []*internal.IndexerData{
+ {ID: 1000, Title: "hello a", LabelIDs: []int64{2000, 2001, 2002}},
+ {ID: 1001, Title: "hello b", LabelIDs: []int64{2001}},
+ {ID: 1002, Title: "hello c", LabelIDs: []int64{2000, 2001, 2003}},
+ {ID: 1003, Title: "hello d", LabelIDs: []int64{2002}},
+ {ID: 1004, Title: "hello e", LabelIDs: []int64{}},
+ },
+ SearchOptions: &internal.SearchOptions{
+ Keyword: "hello",
+ IncludedAnyLabelIDs: []int64{2001, 2002},
+ ExcludedLabelIDs: []int64{2003},
+ },
+ ExpectedIDs: []int64{1003, 1001, 1000},
+ ExpectedTotal: 3,
+ },
+ {
+ Name: "MilestoneIDs",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ MilestoneIDs: []int64{1, 2, 6},
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Contains(t, []int64{1, 2, 6}, data[v.ID].MilestoneID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.MilestoneID == 1 || v.MilestoneID == 2 || v.MilestoneID == 6
+ }), result.Total)
+ },
+ },
+ {
+ Name: "no MilestoneIDs",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ MilestoneIDs: []int64{0},
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(0), data[v.ID].MilestoneID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.MilestoneID == 0
+ }), result.Total)
+ },
+ },
+ {
+ Name: "ProjectID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ProjectID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(1), data[v.ID].ProjectID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.ProjectID == 1
+ }), result.Total)
+ },
+ },
+ {
+ Name: "no ProjectID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ProjectID: func() *int64 {
+ id := int64(0)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(0), data[v.ID].ProjectID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.ProjectID == 0
+ }), result.Total)
+ },
+ },
+ {
+ Name: "ProjectBoardID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ProjectBoardID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(1), data[v.ID].ProjectBoardID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.ProjectBoardID == 1
+ }), result.Total)
+ },
+ },
+ {
+ Name: "no ProjectBoardID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ProjectBoardID: func() *int64 {
+ id := int64(0)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(0), data[v.ID].ProjectBoardID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.ProjectBoardID == 0
+ }), result.Total)
+ },
+ },
+ {
+ Name: "PosterID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ PosterID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(1), data[v.ID].PosterID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.PosterID == 1
+ }), result.Total)
+ },
+ },
+ {
+ Name: "AssigneeID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ AssigneeID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(1), data[v.ID].AssigneeID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.AssigneeID == 1
+ }), result.Total)
+ },
+ },
+ {
+ Name: "no AssigneeID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ AssigneeID: func() *int64 {
+ id := int64(0)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Equal(t, int64(0), data[v.ID].AssigneeID)
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return v.AssigneeID == 0
+ }), result.Total)
+ },
+ },
+ {
+ Name: "MentionID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ MentionID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Contains(t, data[v.ID].MentionIDs, int64(1))
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return util.SliceContains(v.MentionIDs, 1)
+ }), result.Total)
+ },
+ },
+ {
+ Name: "ReviewedID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ReviewedID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Contains(t, data[v.ID].ReviewedIDs, int64(1))
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return util.SliceContains(v.ReviewedIDs, 1)
+ }), result.Total)
+ },
+ },
+ {
+ Name: "ReviewRequestedID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ ReviewRequestedID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Contains(t, data[v.ID].ReviewRequestedIDs, int64(1))
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return util.SliceContains(v.ReviewRequestedIDs, 1)
+ }), result.Total)
+ },
+ },
+ {
+ Name: "SubscriberID",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ SubscriberID: func() *int64 {
+ id := int64(1)
+ return &id
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.Contains(t, data[v.ID].SubscriberIDs, int64(1))
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return util.SliceContains(v.SubscriberIDs, 1)
+ }), result.Total)
+ },
+ },
+ {
+ Name: "updated",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 5,
+ },
+ UpdatedAfterUnix: func() *int64 {
+ var t int64 = 20
+ return &t
+ }(),
+ UpdatedBeforeUnix: func() *int64 {
+ var t int64 = 30
+ return &t
+ }(),
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, 5, len(result.Hits))
+ for _, v := range result.Hits {
+ assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, int64(20))
+ assert.LessOrEqual(t, data[v.ID].UpdatedUnix, int64(30))
+ }
+ assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool {
+ return data[v.ID].UpdatedUnix >= 20 && data[v.ID].UpdatedUnix <= 30
+ }), result.Total)
+ },
+ },
+ {
+ Name: "SortByCreatedDesc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByCreatedDesc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.GreaterOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByUpdatedDesc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByUpdatedDesc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.GreaterOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByCommentsDesc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByCommentsDesc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.GreaterOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByDeadlineDesc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByDeadlineDesc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.GreaterOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByCreatedAsc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByCreatedAsc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.LessOrEqual(t, data[v.ID].CreatedUnix, data[result.Hits[i+1].ID].CreatedUnix)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByUpdatedAsc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByUpdatedAsc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.LessOrEqual(t, data[v.ID].UpdatedUnix, data[result.Hits[i+1].ID].UpdatedUnix)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByCommentsAsc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByCommentsAsc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.LessOrEqual(t, data[v.ID].CommentCount, data[result.Hits[i+1].ID].CommentCount)
+ }
+ }
+ },
+ },
+ {
+ Name: "SortByDeadlineAsc",
+ SearchOptions: &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ ListAll: true,
+ },
+ SortBy: internal.SortByDeadlineAsc,
+ },
+ Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
+ assert.Equal(t, len(data), len(result.Hits))
+ assert.Equal(t, len(data), int(result.Total))
+ for i, v := range result.Hits {
+ if i < len(result.Hits)-1 {
+ assert.LessOrEqual(t, data[v.ID].DeadlineUnix, data[result.Hits[i+1].ID].DeadlineUnix)
+ }
+ }
+ },
+ },
+}
+
+type testIndexerCase struct {
+ Name string
+ ExtraData []*internal.IndexerData
+
+ SearchOptions *internal.SearchOptions
+
+ Expected func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) // if nil, use ExpectedIDs, ExpectedTotal
+ ExpectedIDs []int64
+ ExpectedTotal int64
+}
+
+func generateDefaultIndexerData() []*internal.IndexerData {
+ var id int64
+ var data []*internal.IndexerData
+ for repoID := int64(1); repoID <= 10; repoID++ {
+ for issueIndex := int64(1); issueIndex <= 20; issueIndex++ {
+ id++
+
+ comments := make([]string, id%4)
+ for i := range comments {
+ comments[i] = fmt.Sprintf("comment%d", i)
+ }
+
+ labelIDs := make([]int64, id%5)
+ for i := range labelIDs {
+ labelIDs[i] = int64(i) + 1 // LabelID should not be 0
+ }
+ mentionIDs := make([]int64, id%6)
+ for i := range mentionIDs {
+ mentionIDs[i] = int64(i) + 1 // MentionID should not be 0
+ }
+ reviewedIDs := make([]int64, id%7)
+ for i := range reviewedIDs {
+ reviewedIDs[i] = int64(i) + 1 // ReviewID should not be 0
+ }
+ reviewRequestedIDs := make([]int64, id%8)
+ for i := range reviewRequestedIDs {
+ reviewRequestedIDs[i] = int64(i) + 1 // ReviewRequestedID should not be 0
+ }
+ subscriberIDs := make([]int64, id%9)
+ for i := range subscriberIDs {
+ subscriberIDs[i] = int64(i) + 1 // SubscriberID should not be 0
+ }
+
+ data = append(data, &internal.IndexerData{
+ ID: id,
+ RepoID: repoID,
+ IsPublic: repoID%2 == 0,
+ Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID),
+ Content: fmt.Sprintf("content%d", issueIndex),
+ Comments: comments,
+ IsPull: issueIndex%2 == 0,
+ IsClosed: issueIndex%3 == 0,
+ LabelIDs: labelIDs,
+ NoLabel: len(labelIDs) == 0,
+ MilestoneID: issueIndex % 4,
+ ProjectID: issueIndex % 5,
+ ProjectBoardID: issueIndex % 6,
+ PosterID: id%10 + 1, // PosterID should not be 0
+ AssigneeID: issueIndex % 10,
+ MentionIDs: mentionIDs,
+ ReviewedIDs: reviewedIDs,
+ ReviewRequestedIDs: reviewRequestedIDs,
+ SubscriberIDs: subscriberIDs,
+ UpdatedUnix: timeutil.TimeStamp(id + issueIndex),
+ CreatedUnix: timeutil.TimeStamp(id),
+ DeadlineUnix: timeutil.TimeStamp(id + issueIndex + repoID),
+ CommentCount: int64(len(comments)),
+ })
+ }
+ }
+
+ return data
+}
+
+func countIndexerData(data map[int64]*internal.IndexerData, f func(v *internal.IndexerData) bool) int64 {
+ var count int64
+ for _, v := range data {
+ if f(v) {
+ count++
+ }
+ }
+ return count
+}
+
+// waitData waits for the indexer to index all data.
+// Some engines like Elasticsearch index data asynchronously, so we need to wait for a while.
+func waitData(indexer internal.Indexer, total int64) error {
+ var actual int64
+ for i := 0; i < 100; i++ {
+ result, err := indexer.Search(context.Background(), &internal.SearchOptions{
+ Paginator: &db.ListOptions{
+ PageSize: 0,
+ },
+ })
+ if err != nil {
+ return err
+ }
+ actual = result.Total
+ if actual == total {
+ return nil
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ return fmt.Errorf("waitData: expected %d, actual %d", total, actual)
+}
diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go
index 2ea06b576c..335395f2f6 100644
--- a/modules/indexer/issues/meilisearch/meilisearch.go
+++ b/modules/indexer/issues/meilisearch/meilisearch.go
@@ -16,7 +16,10 @@ import (
)
const (
- issueIndexerLatestVersion = 1
+ issueIndexerLatestVersion = 2
+
+ // TODO: make this configurable if necessary
+ maxTotalHits = 10000
)
var _ internal.Indexer = &Indexer{}
@@ -29,7 +32,53 @@ type Indexer struct {
// NewIndexer creates a new meilisearch indexer
func NewIndexer(url, apiKey, indexerName string) *Indexer {
- inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion)
+ settings := &meilisearch.Settings{
+ // The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
+ // So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
+ // So we need to specify the ranking rules to make sure the sort order is respected.
+ // See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
+ RankingRules: []string{"sort", // make sure "sort" has the highest priority
+ "words", "typo", "proximity", "attribute", "exactness"},
+
+ SearchableAttributes: []string{
+ "title",
+ "content",
+ "comments",
+ },
+ DisplayedAttributes: []string{
+ "id",
+ },
+ FilterableAttributes: []string{
+ "repo_id",
+ "is_public",
+ "is_pull",
+ "is_closed",
+ "label_ids",
+ "no_label",
+ "milestone_id",
+ "project_id",
+ "project_board_id",
+ "poster_id",
+ "assignee_id",
+ "mention_ids",
+ "reviewed_ids",
+ "review_requested_ids",
+ "subscriber_ids",
+ "updated_unix",
+ },
+ SortableAttributes: []string{
+ "updated_unix",
+ "created_unix",
+ "deadline_unix",
+ "comment_count",
+ "id",
+ },
+ Pagination: &meilisearch.Pagination{
+ MaxTotalHits: maxTotalHits,
+ },
+ }
+
+ inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
indexer := &Indexer{
inner: inner,
Indexer: inner,
@@ -38,7 +87,7 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer {
}
// Index will save the index data
-func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
+func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
if len(issues) == 0 {
return nil
}
@@ -70,23 +119,102 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
// Search searches for issues by given conditions.
// Returns the matching issue IDs
-func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int, state string) (*internal.SearchResult, error) {
- repoFilters := make([]string, 0, len(repoIDs))
- for _, repoID := range repoIDs {
- repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10))
- }
- filter := strings.Join(repoFilters, " OR ")
- if state == "open" || state == "closed" {
- if filter != "" {
- filter = "(" + filter + ") AND state = " + state
- } else {
- filter = "state = " + state
+func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
+ query := inner_meilisearch.FilterAnd{}
+
+ if len(options.RepoIDs) > 0 {
+ q := &inner_meilisearch.FilterOr{}
+ q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
+ if options.AllPublic {
+ q.Or(inner_meilisearch.NewFilterEq("is_public", true))
+ }
+ query.And(q)
+ }
+
+ if !options.IsPull.IsNone() {
+ query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.IsTrue()))
+ }
+ if !options.IsClosed.IsNone() {
+ query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.IsTrue()))
+ }
+
+ if options.NoLabelOnly {
+ query.And(inner_meilisearch.NewFilterEq("no_label", true))
+ } else {
+ if len(options.IncludedLabelIDs) > 0 {
+ q := &inner_meilisearch.FilterAnd{}
+ for _, labelID := range options.IncludedLabelIDs {
+ q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
+ }
+ query.And(q)
+ } else if len(options.IncludedAnyLabelIDs) > 0 {
+ query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
+ }
+ if len(options.ExcludedLabelIDs) > 0 {
+ q := &inner_meilisearch.FilterAnd{}
+ for _, labelID := range options.ExcludedLabelIDs {
+ q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
+ }
+ query.And(q)
}
}
- searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
- Filter: filter,
+
+ if len(options.MilestoneIDs) > 0 {
+ query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
+ }
+
+ if options.ProjectID != nil {
+ query.And(inner_meilisearch.NewFilterEq("project_id", *options.ProjectID))
+ }
+ if options.ProjectBoardID != nil {
+ query.And(inner_meilisearch.NewFilterEq("project_board_id", *options.ProjectBoardID))
+ }
+
+ if options.PosterID != nil {
+ query.And(inner_meilisearch.NewFilterEq("poster_id", *options.PosterID))
+ }
+
+ if options.AssigneeID != nil {
+ query.And(inner_meilisearch.NewFilterEq("assignee_id", *options.AssigneeID))
+ }
+
+ if options.MentionID != nil {
+ query.And(inner_meilisearch.NewFilterEq("mention_ids", *options.MentionID))
+ }
+
+ if options.ReviewedID != nil {
+ query.And(inner_meilisearch.NewFilterEq("reviewed_ids", *options.ReviewedID))
+ }
+ if options.ReviewRequestedID != nil {
+ query.And(inner_meilisearch.NewFilterEq("review_requested_ids", *options.ReviewRequestedID))
+ }
+
+ if options.SubscriberID != nil {
+ query.And(inner_meilisearch.NewFilterEq("subscriber_ids", *options.SubscriberID))
+ }
+
+ if options.UpdatedAfterUnix != nil {
+ query.And(inner_meilisearch.NewFilterGte("updated_unix", *options.UpdatedAfterUnix))
+ }
+ if options.UpdatedBeforeUnix != nil {
+ query.And(inner_meilisearch.NewFilterLte("updated_unix", *options.UpdatedBeforeUnix))
+ }
+
+ if options.SortBy == "" {
+ options.SortBy = internal.SortByCreatedAsc
+ }
+ sortBy := []string{
+ parseSortBy(options.SortBy),
+ "id:desc",
+ }
+
+ skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
+
+ searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{
+ Filter: query.Statement(),
Limit: int64(limit),
- Offset: int64(start),
+ Offset: int64(skip),
+ Sort: sortBy,
})
if err != nil {
return nil, err
@@ -98,8 +226,17 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
ID: int64(hit.(map[string]any)["id"].(float64)),
})
}
+
return &internal.SearchResult{
- Total: searchRes.TotalHits,
+ Total: searchRes.EstimatedTotalHits,
Hits: hits,
}, nil
}
+
+func parseSortBy(sortBy internal.SortBy) string {
+ field := strings.TrimPrefix(string(sortBy), "-")
+ if strings.HasPrefix(string(sortBy), "-") {
+ return field + ":desc"
+ }
+ return field + ":asc"
+}
diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go
new file mode 100644
index 0000000000..3d7237268e
--- /dev/null
+++ b/modules/indexer/issues/meilisearch/meilisearch_test.go
@@ -0,0 +1,50 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+ "fmt"
+ "net/http"
+ "os"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/modules/indexer/issues/internal/tests"
+)
+
+func TestMeilisearchIndexer(t *testing.T) {
+ // The meilisearch instance started by pull-db-tests.yml > test-unit > services > meilisearch
+ url := "http://meilisearch:7700"
+ key := "" // auth has been disabled in test environment
+
+ if os.Getenv("CI") == "" {
+ // Make it possible to run tests against a local meilisearch instance
+ url = os.Getenv("TEST_MEILISEARCH_URL")
+ if url == "" {
+ t.Skip("TEST_MEILISEARCH_URL not set and not running in CI")
+ return
+ }
+ key = os.Getenv("TEST_MEILISEARCH_KEY")
+ }
+
+ ok := false
+ for i := 0; i < 60; i++ {
+ resp, err := http.Get(url)
+ if err == nil && resp.StatusCode == http.StatusOK {
+ ok = true
+ break
+ }
+ t.Logf("Waiting for meilisearch to be up: %v", err)
+ time.Sleep(time.Second)
+ }
+ if !ok {
+ t.Fatalf("Failed to wait for meilisearch to be up")
+ return
+ }
+
+ indexer := NewIndexer(url, key, fmt.Sprintf("test_meilisearch_indexer_%d", time.Now().Unix()))
+ defer indexer.Close()
+
+ tests.TestIndexer(t, indexer)
+}
diff --git a/modules/indexer/issues/util.go b/modules/indexer/issues/util.go
new file mode 100644
index 0000000000..2dec3b71db
--- /dev/null
+++ b/modules/indexer/issues/util.go
@@ -0,0 +1,173 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package issues
+
+import (
+ "context"
+ "errors"
+ "fmt"
+
+ "code.gitea.io/gitea/models/db"
+ issue_model "code.gitea.io/gitea/models/issues"
+ "code.gitea.io/gitea/modules/container"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/queue"
+)
+
+// getIssueIndexerData returns the indexer data of an issue and a bool value indicating whether the issue exists.
+func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerData, bool, error) {
+ issue, err := issue_model.GetIssueByID(ctx, issueID)
+ if err != nil {
+ if issue_model.IsErrIssueNotExist(err) {
+ return nil, false, nil
+ }
+ return nil, false, err
+ }
+
+ // FIXME: what if users want to search for a review comment of a pull request?
+ // The comment type is CommentTypeCode or CommentTypeReview.
+ // But LoadDiscussComments only loads CommentTypeComment.
+ if err := issue.LoadDiscussComments(ctx); err != nil {
+ return nil, false, err
+ }
+
+ comments := make([]string, 0, len(issue.Comments))
+ for _, comment := range issue.Comments {
+ if comment.Content != "" {
+ // what ever the comment type is, index the content if it is not empty.
+ comments = append(comments, comment.Content)
+ }
+ }
+
+ if err := issue.LoadAttributes(ctx); err != nil {
+ return nil, false, err
+ }
+
+ labels := make([]int64, 0, len(issue.Labels))
+ for _, label := range issue.Labels {
+ labels = append(labels, label.ID)
+ }
+
+ mentionIDs, err := issue_model.GetIssueMentionIDs(ctx, issueID)
+ if err != nil {
+ return nil, false, err
+ }
+
+ var (
+ reviewedIDs []int64
+ reviewRequestedIDs []int64
+ )
+ {
+ reviews, err := issue_model.FindReviews(ctx, issue_model.FindReviewOptions{
+ ListOptions: db.ListOptions{
+ ListAll: true,
+ },
+ IssueID: issueID,
+ OfficialOnly: false,
+ })
+ if err != nil {
+ return nil, false, err
+ }
+
+ reviewedIDsSet := make(container.Set[int64], len(reviews))
+ reviewRequestedIDsSet := make(container.Set[int64], len(reviews))
+ for _, review := range reviews {
+ if review.Type == issue_model.ReviewTypeRequest {
+ reviewRequestedIDsSet.Add(review.ReviewerID)
+ } else {
+ reviewedIDsSet.Add(review.ReviewerID)
+ }
+ }
+ reviewedIDs = reviewedIDsSet.Values()
+ reviewRequestedIDs = reviewRequestedIDsSet.Values()
+ }
+
+ subscriberIDs, err := issue_model.GetIssueWatchersIDs(ctx, issue.ID, true)
+ if err != nil {
+ return nil, false, err
+ }
+
+ var projectID int64
+ if issue.Project != nil {
+ projectID = issue.Project.ID
+ }
+
+ return &internal.IndexerData{
+ ID: issue.ID,
+ RepoID: issue.RepoID,
+ IsPublic: !issue.Repo.IsPrivate,
+ Title: issue.Title,
+ Content: issue.Content,
+ Comments: comments,
+ IsPull: issue.IsPull,
+ IsClosed: issue.IsClosed,
+ LabelIDs: labels,
+ NoLabel: len(labels) == 0,
+ MilestoneID: issue.MilestoneID,
+ ProjectID: projectID,
+ ProjectBoardID: issue.ProjectBoardID(),
+ PosterID: issue.PosterID,
+ AssigneeID: issue.AssigneeID,
+ MentionIDs: mentionIDs,
+ ReviewedIDs: reviewedIDs,
+ ReviewRequestedIDs: reviewRequestedIDs,
+ SubscriberIDs: subscriberIDs,
+ UpdatedUnix: issue.UpdatedUnix,
+ CreatedUnix: issue.CreatedUnix,
+ DeadlineUnix: issue.DeadlineUnix,
+ CommentCount: int64(len(issue.Comments)),
+ }, true, nil
+}
+
+func updateRepoIndexer(ctx context.Context, repoID int64) error {
+ ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
+ if err != nil {
+ return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
+ }
+ for _, id := range ids {
+ if err := updateIssueIndexer(id); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func updateIssueIndexer(issueID int64) error {
+ return pushIssueIndexerQueue(&IndexerMetadata{ID: issueID})
+}
+
+func deleteRepoIssueIndexer(ctx context.Context, repoID int64) error {
+ var ids []int64
+ ids, err := issue_model.GetIssueIDsByRepoID(ctx, repoID)
+ if err != nil {
+ return fmt.Errorf("issue_model.GetIssueIDsByRepoID: %w", err)
+ }
+
+ if len(ids) == 0 {
+ return nil
+ }
+ return pushIssueIndexerQueue(&IndexerMetadata{
+ IDs: ids,
+ IsDelete: true,
+ })
+}
+
+func pushIssueIndexerQueue(data *IndexerMetadata) error {
+ if issueIndexerQueue == nil {
+ // Some unit tests will trigger indexing, but the queue is not initialized.
+ // It's OK to ignore it, but log a warning message in case it's not a unit test.
+ log.Warn("Trying to push %+v to issue indexer queue, but the queue is not initialized, it's OK if it's a unit test", data)
+ return nil
+ }
+
+ err := issueIndexerQueue.Push(data)
+ if errors.Is(err, queue.ErrAlreadyInQueue) {
+ return nil
+ }
+ if errors.Is(err, context.DeadlineExceeded) {
+ log.Warn("It seems that issue indexer is slow and the queue is full. Please check the issue indexer or increase the queue size.")
+ }
+ return err
+}