diff options
Diffstat (limited to 'modules/indexer')
34 files changed, 577 insertions, 251 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 772317fa59..70f0995a01 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -16,7 +16,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" @@ -24,11 +24,11 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" @@ -70,7 +70,7 @@ const ( filenameIndexerAnalyzer = "filenameIndexerAnalyzer" filenameIndexerTokenizer = "filenameIndexerTokenizer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 8 + repoIndexerLatestVersion = 9 ) // generateBleveIndexMapping generates a bleve index mapping for the repo indexer @@ -107,7 +107,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { "type": analyzer_custom.Name, "char_filters": []string{}, "tokenizer": letter.Name, - "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, lowercase.Name}, }); err != nil { return nil, err } @@ -136,6 +136,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) @@ -158,7 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return err } @@ -185,7 +189,8 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return err } else if !typesniffer.DetectContentType(fileContents).IsText() { // FIXME: UTF-16 files will probably fail here - return nil + // Even if the file is not recognized as a "text file", we could still put its name into the indexers to make the filename become searchable, while leave the content to empty. + fileContents = nil } if _, err = batchReader.Discard(1); err != nil { @@ -211,12 +216,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - gitBatch, err := r.NewBatch(ctx) + gitBatch, err := git.NewBatch(ctx, repo.RepoPath()) if err != nil { return err } @@ -260,17 +260,31 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int var ( indexerQuery query.Query keywordQuery query.Query + contentQuery query.Query ) pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword)) pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - contentQuery := bleve.NewMatchQuery(opts.Keyword) - contentQuery.FieldVal = "Content" - - if opts.IsKeywordFuzzy { - contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery + q := bleve.NewMatchPhraseQuery(opts.Keyword) + q.Analyzer = repoIndexerAnalyzer + q.FieldVal = "Content" + contentQuery = q + } else /* words */ { + q := bleve.NewMatchQuery(opts.Keyword) + q.FieldVal = "Content" + q.Analyzer = repoIndexerAnalyzer + if searchMode == indexer.SearchModeFuzzy { + // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case. + q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + } else { + q.Operator = query.MatchQueryOperatorAnd + } + contentQuery = q } keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery) diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go index 107e0da109..6dfc12f146 100644 --- a/modules/indexer/code/bleve/token/path/path.go +++ b/modules/indexer/code/bleve/token/path/path.go @@ -51,13 +51,13 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke slices.Reverse(input) } - for i := 0; i < len(input); i++ { + for i := range input { var sb strings.Builder - sb.WriteString(string(input[0].Term)) + sb.Write(input[0].Term) for j := 1; j < i; j++ { sb.WriteString("/") - sb.WriteString(string(input[j].Term)) + sb.Write(input[j].Term) } term := sb.String() @@ -97,5 +97,9 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke } func init() { - registry.RegisterTokenFilter(Name, TokenFilterConstructor) + // FIXME: move it to the bleve's init function, but do not call it in global init + err := registry.RegisterTokenFilter(Name, TokenFilterConstructor) + if err != nil { + panic(err) + } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 1c4dd39eff..f925ce396a 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -15,7 +15,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" @@ -24,6 +24,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" "github.com/olivere/elastic/v7" @@ -45,6 +46,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) @@ -142,7 +147,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return nil, err } @@ -203,12 +208,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - batch, err := r.NewBatch(ctx) + batch, err := git.NewBatch(ctx, repo.RepoPath()) if err != nil { return err } @@ -359,13 +359,16 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan // Search searches for codes and language stats by given conditions. func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { - searchType := esMultiMatchTypePhrasePrefix - if opts.IsKeywordFuzzy { - searchType = esMultiMatchTypeBestFields + var contentQuery elastic.Query + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery + contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword) + } else /* words */ { + contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and") } - kwQuery := elastic.NewBoolQuery().Should( - elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType), + contentQuery, elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix), ) query := elastic.NewBoolQuery() diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go index a6d2af92b2..e8f1f202ce 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch_test.go +++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go @@ -11,6 +11,6 @@ import ( func TestIndexPos(t *testing.T) { startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end") - assert.EqualValues(t, 11, startIdx) - assert.EqualValues(t, 15, endIdx) + assert.Equal(t, 11, startIdx) + assert.Equal(t, 15, endIdx) } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index df9783288b..41bc74e6ec 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -16,7 +16,7 @@ import ( ) func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { - stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err := git.NewCommand("show-ref", "-s").AddDynamicArguments(git.BranchPrefix+repo.DefaultBranch).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return "", err } @@ -32,8 +32,8 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s needGenesis := len(status.CommitSha) == 0 if !needGenesis { - hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) - stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + hasAncestorCmd := git.NewCommand("merge-base").AddDynamicArguments(status.CommitSha, revision) + stdout, _, _ := hasAncestorCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) needGenesis = len(stdout) == 0 } @@ -86,7 +86,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) { // genesisChanges get changes to add repo to the indexer for the first time func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { var changes internal.RepoChanges - stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, runErr := git.NewCommand("ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { return nil, runErr } @@ -98,8 +98,8 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s // nonGenesisChanges get changes since the previous indexer update func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { - diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) - stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + diffCmd := git.NewCommand("diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) + stdout, _, runErr := diffCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { // previous commit sha may have been removed by a force push, so // try rebuilding from scratch @@ -115,9 +115,9 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio updatedFilenames := make([]string, 0, 10) updateChanges := func() error { - cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). + cmd := git.NewCommand("ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). AddDashesAndList(updatedFilenames...) - lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + lsTreeStdout, _, err := cmd.RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return err } @@ -129,8 +129,8 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio changes.Updates = append(changes.Updates, updates...) return nil } - lines := strings.Split(stdout, "\n") - for _, line := range lines { + lines := strings.SplitSeq(stdout, "\n") + for line := range lines { line = strings.TrimSpace(line) if len(line) == 0 { continue diff --git a/modules/indexer/code/gitgrep/gitgrep.go b/modules/indexer/code/gitgrep/gitgrep.go new file mode 100644 index 0000000000..6f6e0b47b9 --- /dev/null +++ b/modules/indexer/code/gitgrep/gitgrep.go @@ -0,0 +1,66 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitgrep + +import ( + "context" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer" + code_indexer "code.gitea.io/gitea/modules/indexer/code" + "code.gitea.io/gitea/modules/setting" +) + +func indexSettingToGitGrepPathspecList() (list []string) { + for _, expr := range setting.Indexer.IncludePatterns { + list = append(list, ":(glob)"+expr.PatternString()) + } + for _, expr := range setting.Indexer.ExcludePatterns { + list = append(list, ":(glob,exclude)"+expr.PatternString()) + } + return list +} + +func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) { + grepMode := git.GrepModeWords + switch searchMode { + case indexer.SearchModeExact: + grepMode = git.GrepModeExact + case indexer.SearchModeRegexp: + grepMode = git.GrepModeRegexp + } + res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{ + ContextLineNumber: 1, + GrepMode: grepMode, + RefName: ref.String(), + PathspecList: indexSettingToGitGrepPathspecList(), + }) + if err != nil { + // TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree. + return nil, 0, fmt.Errorf("git.GrepSearch: %w", err) + } + commitID, err := gitRepo.GetRefCommitID(ref.String()) + if err != nil { + return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err) + } + + total = len(res) + pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) + pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) + res = res[pageStart:pageEnd] + for _, r := range res { + searchResults = append(searchResults, &code_indexer.Result{ + RepoID: repoID, + Filename: r.Filename, + CommitID: commitID, + // UpdatedUnix: not supported yet + // Language: not supported yet + // Color: not supported yet + Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")), + }) + } + return searchResults, total, nil +} diff --git a/modules/indexer/code/gitgrep/gitgrep_test.go b/modules/indexer/code/gitgrep/gitgrep_test.go new file mode 100644 index 0000000000..97dda9d966 --- /dev/null +++ b/modules/indexer/code/gitgrep/gitgrep_test.go @@ -0,0 +1,19 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitgrep + +import ( + "testing" + + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + + "github.com/stretchr/testify/assert" +) + +func TestIndexSettingToGitGrepPathspecList(t *testing.T) { + defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("a"))() + defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("b"))() + assert.Equal(t, []string{":(glob)a", ":(glob,exclude)b"}, indexSettingToGitGrepPathspecList()) +} diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 728b37fab6..6035ddfe95 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -29,13 +30,11 @@ var ( // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. // So it's always safe use it as *globalIndexer.Load() and call its methods. globalIndexer atomic.Pointer[internal.Indexer] - dummyIndexer *internal.Indexer ) func init() { - i := internal.NewDummyIndexer() - dummyIndexer = &i - globalIndexer.Store(dummyIndexer) + dummyIndexer := internal.NewDummyIndexer() + globalIndexer.Store(&dummyIndexer) } func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { @@ -304,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) { } log.Info("Done (re)populating the repo indexer with existing repositories") } + +func SupportedSearchModes() []indexer.SearchMode { + gi := globalIndexer.Load() + if gi == nil { + return nil + } + return (*gi).SupportedSearchModes() +} diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index f358bbe785..78fea22f10 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -11,12 +11,13 @@ import ( "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/git" + indexer_module "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/test" + "code.gitea.io/gitea/modules/util" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -37,13 +38,14 @@ func TestMain(m *testing.M) { func testIndexer(name string, t *testing.T, indexer internal.Indexer) { t.Run(name, func(t *testing.T) { - assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer)) + assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer)) keywords := []struct { - RepoIDs []int64 - Keyword string - Langs int - Results []codeSearchResult + RepoIDs []int64 + Keyword string + Langs int + SearchMode indexer_module.SearchModeType + Results []codeSearchResult }{ // Search for an exact match on the contents of a file // This scenario yields a single result (the file README.md on the repo '1') @@ -184,9 +186,10 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, // Search for matches on the contents of files regardless of case. { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + SearchMode: indexer_module.SearchModeFuzzy, Results: []codeSearchResult{ { Filename: "README.md", @@ -194,7 +197,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for an exact match on the filename within the repo '62' (case insenstive). + // Search for an exact match on the filename within the repo '62' (case-insensitive). // This scenario yields a single result (the file avocado.md on the repo '62') { RepoIDs: []int64{62}, @@ -207,7 +210,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is a expression. + // Search for matches on the contents of files when the criteria are an expression. { RepoIDs: []int64{62}, Keyword: "console.log", @@ -219,7 +222,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is part of a expression. + // Search for matches on the contents of files when the criteria are parts of an expression. { RepoIDs: []int64{62}, Keyword: "log", @@ -235,17 +238,17 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { - total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ - RepoIDs: kw.RepoIDs, - Keyword: kw.Keyword, + total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{ + RepoIDs: kw.RepoIDs, + Keyword: kw.Keyword, + SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords), Paginator: &db.ListOptions{ Page: 1, PageSize: 10, }, - IsKeywordFuzzy: true, }) - assert.NoError(t, err) - assert.Len(t, langs, kw.Langs) + require.NoError(t, err) + require.Len(t, langs, kw.Langs) hits := make([]codeSearchResult, 0, len(res)) @@ -275,7 +278,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }) } - assert.NoError(t, tearDownRepositoryIndexes(indexer)) + assert.NoError(t, tearDownRepositoryIndexes(t.Context(), indexer)) }) } @@ -287,10 +290,10 @@ func TestBleveIndexAndSearch(t *testing.T) { idx := bleve.NewIndexer(dir) defer idx.Close() - _, err := idx.Init(context.Background()) + _, err := idx.Init(t.Context()) require.NoError(t, err) - testIndexer("beleve", t, idx) + testIndexer("bleve", t, idx) } func TestESIndexAndSearch(t *testing.T) { @@ -303,11 +306,11 @@ func TestESIndexAndSearch(t *testing.T) { } indexer := elasticsearch.NewIndexer(u, "gitea_codes") - if _, err := indexer.Init(context.Background()); err != nil { + if _, err := indexer.Init(t.Context()); err != nil { if indexer != nil { indexer.Close() } - assert.FailNow(t, "Unable to init ES indexer Error: %v", err) + require.NoError(t, err, "Unable to init ES indexer") } defer indexer.Close() @@ -324,9 +327,9 @@ func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error return nil } -func tearDownRepositoryIndexes(indexer internal.Indexer) error { +func tearDownRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error { for _, repoID := range repositoriesToSearch() { - if err := indexer.Delete(context.Background(), repoID); err != nil { + if err := indexer.Delete(ctx, repoID); err != nil { return err } } diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c259fcd26e..d58b028124 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -5,10 +5,11 @@ package internal import ( "context" - "fmt" + "errors" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -18,6 +19,7 @@ type Indexer interface { Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) + SupportedSearchModes() []indexer.SearchMode } type SearchOptions struct { @@ -25,7 +27,7 @@ type SearchOptions struct { Keyword string Language string - IsKeywordFuzzy bool + SearchMode indexer.SearchModeType db.Paginator } @@ -41,14 +43,18 @@ type dummyIndexer struct { internal.Indexer } +func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { + return nil +} + func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) { - return 0, nil, nil, fmt.Errorf("indexer is not ready") + return 0, nil, nil, errors.New("indexer is not ready") } diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go index 5b95783d9f..fa958be473 100644 --- a/modules/indexer/code/internal/util.go +++ b/modules/indexer/code/internal/util.go @@ -10,9 +10,7 @@ import ( "code.gitea.io/gitea/modules/log" ) -const ( - filenameMatchNumberOfLines = 7 // Copied from github search -) +const filenameMatchNumberOfLines = 7 // Copied from GitHub search func FilenameIndexerID(repoID int64, filename string) string { return internal.Base36(repoID) + "_" + filename @@ -35,7 +33,7 @@ func FilenameOfIndexerID(indexerID string) string { return indexerID[index+1:] } -// Given the contents of file, returns the boundaries of its first seven lines. +// FilenameMatchIndexPos returns the boundaries of its first seven lines. func FilenameMatchIndexPos(content string) (int, int) { count := 1 for i, c := range content { diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 74c957dde6..a7a5d7d2e3 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -77,7 +77,7 @@ func HighlightSearchResultCode(filename, language string, lineNums []int, code s // The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n` lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums))) - for i := 0; i < len(lines); i++ { + for i := range lines { lines[i] = &ResultLine{ Num: lineNums[i], FormattedContent: template.HTML(highlightedLines[i]), @@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go new file mode 100644 index 0000000000..1e0f81de89 --- /dev/null +++ b/modules/indexer/indexer.go @@ -0,0 +1,54 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package indexer + +type SearchModeType string + +const ( + SearchModeExact SearchModeType = "exact" + SearchModeWords SearchModeType = "words" + SearchModeFuzzy SearchModeType = "fuzzy" + SearchModeRegexp SearchModeType = "regexp" +) + +type SearchMode struct { + ModeValue SearchModeType + TooltipTrKey string + TitleTrKey string +} + +func SearchModesExactWords() []SearchMode { + return []SearchMode{ + { + ModeValue: SearchModeExact, + TooltipTrKey: "search.exact_tooltip", + TitleTrKey: "search.exact", + }, + { + ModeValue: SearchModeWords, + TooltipTrKey: "search.words_tooltip", + TitleTrKey: "search.words", + }, + } +} + +func SearchModesExactWordsFuzzy() []SearchMode { + return append(SearchModesExactWords(), []SearchMode{ + { + ModeValue: SearchModeFuzzy, + TooltipTrKey: "search.fuzzy_tooltip", + TitleTrKey: "search.fuzzy", + }, + }...) +} + +func GitGrepSupportedSearchModes() []SearchMode { + return append(SearchModesExactWords(), []SearchMode{ + { + ModeValue: SearchModeRegexp, + TooltipTrKey: "search.regexp_tooltip", + TitleTrKey: "search.regexp", + }, + }...) +} diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index 01e53ca636..9d1e24a874 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -5,7 +5,7 @@ package bleve import ( "context" - "fmt" + "errors" "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" @@ -39,11 +39,11 @@ func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.Inde // Init initializes the indexer func (i *Indexer) Init(_ context.Context) (bool, error) { if i == nil { - return false, fmt.Errorf("cannot init nil indexer") + return false, errors.New("cannot init nil indexer") } if i.Indexer != nil { - return false, fmt.Errorf("indexer is already initialized") + return false, errors.New("indexer is already initialized") } indexer, version, err := openIndexer(i.indexDir, i.version) @@ -83,10 +83,10 @@ func (i *Indexer) Init(_ context.Context) (bool, error) { // Ping checks if the indexer is available func (i *Indexer) Ping(_ context.Context) error { if i == nil { - return fmt.Errorf("cannot ping nil indexer") + return errors.New("cannot ping nil indexer") } if i.Indexer == nil { - return fmt.Errorf("indexer is not initialized") + return errors.New("indexer is not initialized") } return nil } diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 1b18ca1a77..8895ae2c64 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -28,6 +28,16 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query return q } +// MatchAndQuery generates a match query for the given phrase, field and analyzer +func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery { + q := bleve.NewMatchQuery(matchPhrase) + q.FieldVal = field + q.Analyzer = analyzer + q.Fuzziness = fuzziness + q.Operator = query.MatchQueryOperatorAnd + return q +} + // BoolFieldQuery generates a bool field query for the given value and field func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { q := bleve.NewBoolFieldQuery(value) diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index 395eea3bce..265ce26585 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -5,6 +5,7 @@ package elasticsearch import ( "context" + "errors" "fmt" "code.gitea.io/gitea/modules/indexer/internal" @@ -36,10 +37,10 @@ func NewIndexer(url, indexName string, version int, mapping string) *Indexer { // Init initializes the indexer func (i *Indexer) Init(ctx context.Context) (bool, error) { if i == nil { - return false, fmt.Errorf("cannot init nil indexer") + return false, errors.New("cannot init nil indexer") } if i.Client != nil { - return false, fmt.Errorf("indexer is already initialized") + return false, errors.New("indexer is already initialized") } client, err := i.initClient() @@ -66,10 +67,10 @@ func (i *Indexer) Init(ctx context.Context) (bool, error) { // Ping checks if the indexer is available func (i *Indexer) Ping(ctx context.Context) error { if i == nil { - return fmt.Errorf("cannot ping nil indexer") + return errors.New("cannot ping nil indexer") } if i.Client == nil { - return fmt.Errorf("indexer is not initialized") + return errors.New("indexer is not initialized") } resp, err := i.Client.ClusterHealth().Do(ctx) diff --git a/modules/indexer/internal/indexer.go b/modules/indexer/internal/indexer.go index c7f356da1e..3442bbaff2 100644 --- a/modules/indexer/internal/indexer.go +++ b/modules/indexer/internal/indexer.go @@ -5,7 +5,7 @@ package internal import ( "context" - "fmt" + "errors" ) // Indexer defines an basic indexer interface @@ -27,11 +27,11 @@ func NewDummyIndexer() Indexer { type dummyIndexer struct{} func (d *dummyIndexer) Init(ctx context.Context) (bool, error) { - return false, fmt.Errorf("indexer is not ready") + return false, errors.New("indexer is not ready") } func (d *dummyIndexer) Ping(ctx context.Context) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Close() {} diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go index 01bb49bbfc..65db75bb55 100644 --- a/modules/indexer/internal/meilisearch/indexer.go +++ b/modules/indexer/internal/meilisearch/indexer.go @@ -5,6 +5,7 @@ package meilisearch import ( "context" + "errors" "fmt" "github.com/meilisearch/meilisearch-go" @@ -33,11 +34,11 @@ func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearc // Init initializes the indexer func (i *Indexer) Init(_ context.Context) (bool, error) { if i == nil { - return false, fmt.Errorf("cannot init nil indexer") + return false, errors.New("cannot init nil indexer") } if i.Client != nil { - return false, fmt.Errorf("indexer is already initialized") + return false, errors.New("indexer is already initialized") } i.Client = meilisearch.New(i.url, meilisearch.WithAPIKey(i.apiKey)) @@ -62,10 +63,10 @@ func (i *Indexer) Init(_ context.Context) (bool, error) { // Ping checks if the indexer is available func (i *Indexer) Ping(ctx context.Context) error { if i == nil { - return fmt.Errorf("cannot ping nil indexer") + return errors.New("cannot ping nil indexer") } if i.Client == nil { - return fmt.Errorf("indexer is not initialized") + return errors.New("indexer is not initialized") } resp, err := i.Client.Health() if err != nil { diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index bf51bd6c14..39d96cab98 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -5,10 +5,14 @@ package bleve import ( "context" + "strconv" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -120,6 +124,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWordsFuzzy() +} + // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) @@ -157,16 +165,24 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - fuzziness := 0 - if options.IsFuzzyKeyword { - fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) + searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeWords || searchMode == indexer.SearchModeFuzzy { + fuzziness := 0 + if searchMode == indexer.SearchModeFuzzy { + fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) + } + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) + } else /* exact */ { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0), + }...)) } - - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), - }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { @@ -232,12 +248,20 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectColumnID.Value(), "project_board_id")) } - if options.PosterID.Has() { - queries = append(queries, inner_bleve.NumericEqualityQuery(options.PosterID.Value(), "poster_id")) + if options.PosterID != "" { + // "(none)" becomes 0, it means no poster + posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64) + queries = append(queries, inner_bleve.NumericEqualityQuery(posterIDInt64, "poster_id")) } - if options.AssigneeID.Has() { - queries = append(queries, inner_bleve.NumericEqualityQuery(options.AssigneeID.Value(), "assignee_id")) + if options.AssigneeID != "" { + if options.AssigneeID == "(any)" { + queries = append(queries, inner_bleve.NumericRangeInclusiveQuery(optional.Some[int64](1), optional.None[int64](), "assignee_id")) + } else { + // "(none)" becomes 0, it means no assignee + assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64) + queries = append(queries, inner_bleve.NumericEqualityQuery(assigneeIDInt64, "assignee_id")) + } } if options.MentionID.Has() { diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index 6c9cfcf670..50951f9c88 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -5,29 +5,35 @@ package db import ( "context" + "strings" + "sync" "code.gitea.io/gitea/models/db" issue_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/util" "xorm.io/builder" ) -var _ internal.Indexer = &Indexer{} +var _ internal.Indexer = (*Indexer)(nil) // Indexer implements Indexer interface to use database's like search type Indexer struct { indexer_internal.Indexer } -func NewIndexer() *Indexer { - return &Indexer{ - Indexer: &inner_db.Indexer{}, - } +func (i *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() } +var GetIndexer = sync.OnceValue(func() *Indexer { + return &Indexer{Indexer: &inner_db.Indexer{}} +}) + // Index dummy function func (i *Indexer) Index(_ context.Context, _ ...*internal.IndexerData) error { return nil @@ -38,6 +44,26 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error { return nil } +func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond { + if mode == indexer.SearchModeExact { + return db.BuildCaseInsensitiveLike(colName, keyword) + } + + // match words + cond := builder.NewCond() + fields := strings.Fields(keyword) + if len(fields) == 0 { + return builder.Expr("1=1") + } + for _, field := range fields { + if field == "" { + continue + } + cond = cond.And(db.BuildCaseInsensitiveLike(colName, field)) + } + return cond +} + // Search searches for issues func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { // FIXME: I tried to avoid importing models here, but it seems to be impossible. @@ -58,16 +84,16 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( repoCond = builder.Eq{"repo_id": options.RepoIDs[0]} } subQuery := builder.Select("id").From("issue").Where(repoCond) - + searchMode := util.IfZero(options.SearchMode, i.SupportedSearchModes()[0].ModeValue) cond = builder.Or( - db.BuildCaseInsensitiveLike("issue.name", options.Keyword), - db.BuildCaseInsensitiveLike("issue.content", options.Keyword), + buildMatchQuery(searchMode, "issue.name", options.Keyword), + buildMatchQuery(searchMode, "issue.content", options.Keyword), builder.In("issue.id", builder.Select("issue_id"). From("comment"). Where(builder.And( builder.Eq{"type": issue_model.CommentTypeComment}, builder.In("issue_id", subQuery), - db.BuildCaseInsensitiveLike("content", options.Keyword), + buildMatchQuery(searchMode, "content", options.Keyword), )), ), ) @@ -95,7 +121,11 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( }, nil } - ids, total, err := issue_model.IssueIDs(ctx, opt, cond) + return i.FindWithIssueOptions(ctx, opt, cond) +} + +func (i *Indexer) FindWithIssueOptions(ctx context.Context, opt *issue_model.IssuesOptions, otherConds ...builder.Cond) (*internal.SearchResult, error) { + ids, total, err := issue_model.IssueIDs(ctx, opt, otherConds...) if err != nil { return nil, err } diff --git a/modules/indexer/issues/db/options.go b/modules/indexer/issues/db/options.go index 42834f6e88..380a25dc23 100644 --- a/modules/indexer/issues/db/options.go +++ b/modules/indexer/issues/db/options.go @@ -6,6 +6,7 @@ package db import ( "context" "fmt" + "strings" "code.gitea.io/gitea/models/db" issue_model "code.gitea.io/gitea/models/issues" @@ -34,7 +35,11 @@ func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_m case internal.SortByDeadlineAsc: sortType = "nearduedate" default: - sortType = "newest" + if strings.HasPrefix(string(options.SortBy), issue_model.ScopeSortPrefix) { + sortType = string(options.SortBy) + } else { + sortType = "newest" + } } // See the comment of issues_model.SearchOptions for the reason why we need to convert @@ -54,7 +59,7 @@ func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_m RepoIDs: options.RepoIDs, AllPublic: options.AllPublic, RepoCond: nil, - AssigneeID: optional.Some(convertID(options.AssigneeID)), + AssigneeID: options.AssigneeID, PosterID: options.PosterID, MentionedID: convertID(options.MentionID), ReviewRequestedID: convertID(options.ReviewRequestedID), @@ -68,14 +73,13 @@ func ToDBOptions(ctx context.Context, options *internal.SearchOptions) (*issue_m ExcludedLabelNames: nil, IncludeMilestones: nil, SortType: sortType, - IssueIDs: nil, UpdatedAfterUnix: options.UpdatedAfterUnix.Value(), UpdatedBeforeUnix: options.UpdatedBeforeUnix.Value(), PriorityRepoID: 0, IsArchived: options.IsArchived, - Org: nil, + Owner: nil, Team: nil, - User: nil, + Doer: nil, } if len(options.MilestoneIDs) == 1 && options.MilestoneIDs[0] == 0 { diff --git a/modules/indexer/issues/dboptions.go b/modules/indexer/issues/dboptions.go index 4f6ad96d22..f17724664d 100644 --- a/modules/indexer/issues/dboptions.go +++ b/modules/indexer/issues/dboptions.go @@ -4,12 +4,19 @@ package issues import ( + "strings" + "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/indexer/issues/internal" "code.gitea.io/gitea/modules/optional" + "code.gitea.io/gitea/modules/setting" ) func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOptions { + if opts.IssueIDs != nil { + setting.PanicInDevOrTesting("Indexer SearchOptions doesn't support IssueIDs") + } searchOpt := &SearchOptions{ Keyword: keyword, RepoIDs: opts.RepoIDs, @@ -45,11 +52,7 @@ func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOp searchOpt.ProjectID = optional.Some[int64](0) // Those issues with no project(projectid==0) } - if opts.AssigneeID.Value() == db.NoConditionID { - searchOpt.AssigneeID = optional.Some[int64](0) // FIXME: this is inconsistent from other places, 0 means "no assignee" - } else if opts.AssigneeID.Value() != 0 { - searchOpt.AssigneeID = opts.AssigneeID - } + searchOpt.AssigneeID = opts.AssigneeID // See the comment of issues_model.SearchOptions for the reason why we need to convert convertID := func(id int64) optional.Option[int64] { @@ -99,7 +102,11 @@ func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOp // Unsupported sort type for search fallthrough default: - searchOpt.SortBy = SortByUpdatedDesc + if strings.HasPrefix(opts.SortType, issues_model.ScopeSortPrefix) { + searchOpt.SortBy = internal.SortBy(opts.SortType) + } else { + searchOpt.SortBy = SortByUpdatedDesc + } } return searchOpt diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 4c293f3f2a..9d627466ef 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -5,14 +5,15 @@ package elasticsearch import ( "context" - "fmt" "strconv" "strings" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/util" "github.com/olivere/elastic/v7" ) @@ -33,6 +34,11 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + // TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO" + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) @@ -89,7 +95,7 @@ func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) er issue := issues[0] _, err := b.inner.Client.Index(). Index(b.inner.VersionedIndexName()). - Id(fmt.Sprintf("%d", issue.ID)). + Id(strconv.FormatInt(issue.ID, 10)). BodyJson(issue). Do(ctx) return err @@ -100,7 +106,7 @@ func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) er reqs = append(reqs, elastic.NewBulkIndexRequest(). Index(b.inner.VersionedIndexName()). - Id(fmt.Sprintf("%d", issue.ID)). + Id(strconv.FormatInt(issue.ID, 10)). Doc(issue), ) } @@ -119,7 +125,7 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { } else if len(ids) == 1 { _, err := b.inner.Client.Delete(). Index(b.inner.VersionedIndexName()). - Id(fmt.Sprintf("%d", ids[0])). + Id(strconv.FormatInt(ids[0], 10)). Do(ctx) return err } @@ -129,7 +135,7 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { reqs = append(reqs, elastic.NewBulkDeleteRequest(). Index(b.inner.VersionedIndexName()). - Id(fmt.Sprintf("%d", id)), + Id(strconv.FormatInt(id, 10)), ) } @@ -146,12 +152,12 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query := elastic.NewBoolQuery() if options.Keyword != "" { - searchType := esMultiMatchTypePhrasePrefix - if options.IsFuzzyKeyword { - searchType = esMultiMatchTypeBestFields + searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix)) + } else /* words */ { + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and")) } - - query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType)) } if len(options.RepoIDs) > 0 { @@ -205,12 +211,22 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query.Must(elastic.NewTermQuery("project_board_id", options.ProjectColumnID.Value())) } - if options.PosterID.Has() { - query.Must(elastic.NewTermQuery("poster_id", options.PosterID.Value())) + if options.PosterID != "" { + // "(none)" becomes 0, it means no poster + posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64) + query.Must(elastic.NewTermQuery("poster_id", posterIDInt64)) } - if options.AssigneeID.Has() { - query.Must(elastic.NewTermQuery("assignee_id", options.AssigneeID.Value())) + if options.AssigneeID != "" { + if options.AssigneeID == "(any)" { + q := elastic.NewRangeQuery("assignee_id") + q.Gte(1) + query.Must(q) + } else { + // "(none)" becomes 0, it means no assignee + assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64) + query.Must(elastic.NewTermQuery("assignee_id", assigneeIDInt64)) + } } if options.MentionID.Has() { diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index c82dc0867e..8f25c84b76 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -14,6 +14,7 @@ import ( db_model "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/issues/bleve" "code.gitea.io/gitea/modules/indexer/issues/db" "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" @@ -102,7 +103,7 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } case "db": - issueIndexer = db.NewIndexer() + issueIndexer = db.GetIndexer() case "meilisearch": issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) existed, err = issueIndexer.Init(ctx) @@ -216,7 +217,7 @@ func PopulateIssueIndexer(ctx context.Context) error { return fmt.Errorf("shutdown before completion: %w", ctx.Err()) default: } - repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{ + repos, _, err := repo_model.SearchRepositoryByName(ctx, repo_model.SearchRepoOptions{ ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize}, OrderBy: db_model.SearchOrderByID, Private: true, @@ -281,7 +282,7 @@ const ( // SearchIssues search issues by options. func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) { - indexer := *globalIndexer.Load() + ix := *globalIndexer.Load() if opts.Keyword == "" || opts.IsKeywordNumeric() { // This is a conservative shortcut. @@ -290,20 +291,22 @@ func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, err // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue. // Even worse, the external indexer like elastic search may not be available for a while, // and the user may not be able to list issues completely until it is available again. - indexer = db.NewIndexer() + ix = db.GetIndexer() } - result, err := indexer.Search(ctx, opts) + result, err := ix.Search(ctx, opts) if err != nil { return nil, 0, err } + return SearchResultToIDSlice(result), result.Total, nil +} +func SearchResultToIDSlice(result *internal.SearchResult) []int64 { ret := make([]int64, 0, len(result.Hits)) for _, hit := range result.Hits { ret = append(ret, hit.ID) } - - return ret, result.Total, nil + return ret } // CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count. @@ -313,3 +316,11 @@ func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) { _, total, err := SearchIssues(ctx, opts) return total, err } + +func SupportedSearchModes() []indexer.SearchMode { + gi := globalIndexer.Load() + if gi == nil { + return nil + } + return (*gi).SupportedSearchModes() +} diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index 8043d33eeb..3e38ac49b7 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -4,7 +4,6 @@ package issues import ( - "context" "testing" "code.gitea.io/gitea/models/db" @@ -45,6 +44,7 @@ func TestDBSearchIssues(t *testing.T) { t.Run("search issues with order", searchIssueWithOrder) t.Run("search issues in project", searchIssueInProject) t.Run("search issues with paginator", searchIssueWithPaginator) + t.Run("search issues with any assignee", searchIssueWithAnyAssignee) } func searchIssueWithKeyword(t *testing.T) { @@ -83,9 +83,11 @@ func searchIssueWithKeyword(t *testing.T) { } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) - require.NoError(t, err) - assert.Equal(t, test.expectedIDs, issueIDs) + t.Run(test.opts.Keyword, func(t *testing.T) { + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) + require.NoError(t, err) + assert.Equal(t, test.expectedIDs, issueIDs) + }) } } @@ -118,7 +120,7 @@ func searchIssueByIndex(t *testing.T) { } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -162,7 +164,7 @@ func searchIssueInRepo(t *testing.T) { } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -175,19 +177,19 @@ func searchIssueByID(t *testing.T) { }{ { opts: SearchOptions{ - PosterID: optional.Some(int64(1)), + PosterID: "1", }, expectedIDs: []int64{11, 6, 3, 2, 1}, }, { opts: SearchOptions{ - AssigneeID: optional.Some(int64(1)), + AssigneeID: "1", }, expectedIDs: []int64{6, 1}, }, { - // NOTE: This tests no assignees filtering and also ToSearchOptions() to ensure it will set AssigneeID to 0 when it is passed as -1. - opts: *ToSearchOptions("", &issues.IssuesOptions{AssigneeID: optional.Some(db.NoConditionID)}), + // NOTE: This tests no assignees filtering and also ToSearchOptions() to ensure it handles the filter correctly + opts: *ToSearchOptions("", &issues.IssuesOptions{AssigneeID: "(none)"}), expectedIDs: []int64{22, 21, 16, 15, 14, 13, 12, 11, 20, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2}, }, { @@ -232,7 +234,7 @@ func searchIssueByID(t *testing.T) { } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -257,7 +259,7 @@ func searchIssueIsPull(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -282,7 +284,7 @@ func searchIssueIsClosed(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -307,7 +309,7 @@ func searchIssueIsArchived(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -332,7 +334,7 @@ func searchIssueByMilestoneID(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -363,7 +365,7 @@ func searchIssueByLabelID(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -382,7 +384,7 @@ func searchIssueByTime(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -401,7 +403,7 @@ func searchIssueWithOrder(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -432,7 +434,7 @@ func searchIssueInProject(t *testing.T) { }, } for _, test := range tests { - issueIDs, _, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, _, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) } @@ -455,7 +457,29 @@ func searchIssueWithPaginator(t *testing.T) { }, } for _, test := range tests { - issueIDs, total, err := SearchIssues(context.TODO(), &test.opts) + issueIDs, total, err := SearchIssues(t.Context(), &test.opts) + require.NoError(t, err) + assert.Equal(t, test.expectedIDs, issueIDs) + assert.Equal(t, test.expectedTotal, total) + } +} + +func searchIssueWithAnyAssignee(t *testing.T) { + tests := []struct { + opts SearchOptions + expectedIDs []int64 + expectedTotal int64 + }{ + { + SearchOptions{ + AssigneeID: "(any)", + }, + []int64{17, 6, 1}, + 3, + }, + } + for _, test := range tests { + issueIDs, total, err := SearchIssues(t.Context(), &test.opts) require.NoError(t, err) assert.Equal(t, test.expectedIDs, issueIDs) assert.Equal(t, test.expectedTotal, total) diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go index 95740bc598..59c6f48485 100644 --- a/modules/indexer/issues/internal/indexer.go +++ b/modules/indexer/issues/internal/indexer.go @@ -5,8 +5,9 @@ package internal import ( "context" - "fmt" + "errors" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -16,6 +17,7 @@ type Indexer interface { Index(ctx context.Context, issue ...*IndexerData) error Delete(ctx context.Context, ids ...int64) error Search(ctx context.Context, options *SearchOptions) (*SearchResult, error) + SupportedSearchModes() []indexer.SearchMode } // NewDummyIndexer returns a dummy indexer @@ -29,14 +31,18 @@ type dummyIndexer struct { internal.Indexer } +func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { + return nil +} + func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Delete(_ context.Context, _ ...int64) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Search(_ context.Context, _ *SearchOptions) (*SearchResult, error) { - return nil, fmt.Errorf("indexer is not ready") + return nil, errors.New("indexer is not ready") } diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 09dcbf4804..0d4f0f727d 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -7,6 +7,7 @@ import ( "strconv" "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/timeutil" ) @@ -77,7 +78,7 @@ type SearchResult struct { type SearchOptions struct { Keyword string // keyword to search - IsFuzzyKeyword bool // if false the levenshtein distance is 0 + SearchMode indexer.SearchModeType RepoIDs []int64 // repository IDs which the issues belong to AllPublic bool // if include all public repositories @@ -96,9 +97,8 @@ type SearchOptions struct { ProjectID optional.Option[int64] // project the issues belong to ProjectColumnID optional.Option[int64] // project column the issues belong to - PosterID optional.Option[int64] // poster of the issues - - AssigneeID optional.Option[int64] // assignee of the issues, zero means no assignee + PosterID string // poster of the issues, "(none)" or "(any)" or a user ID + AssigneeID string // assignee of the issues, "(none)" or "(any)" or a user ID MentionID optional.Option[int64] // mentioned user of the issues diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go index 94ce8520bf..7aebbbcd58 100644 --- a/modules/indexer/issues/internal/tests/tests.go +++ b/modules/indexer/issues/internal/tests/tests.go @@ -8,7 +8,6 @@ package tests import ( - "context" "fmt" "slices" "testing" @@ -24,10 +23,10 @@ import ( ) func TestIndexer(t *testing.T, indexer internal.Indexer) { - _, err := indexer.Init(context.Background()) + _, err := indexer.Init(t.Context()) require.NoError(t, err) - require.NoError(t, indexer.Ping(context.Background())) + require.NoError(t, indexer.Ping(t.Context())) var ( ids []int64 @@ -39,32 +38,32 @@ func TestIndexer(t *testing.T, indexer internal.Indexer) { ids = append(ids, v.ID) data[v.ID] = v } - require.NoError(t, indexer.Index(context.Background(), d...)) - require.NoError(t, waitData(indexer, int64(len(data)))) + require.NoError(t, indexer.Index(t.Context(), d...)) + waitData(t, indexer, int64(len(data))) } defer func() { - require.NoError(t, indexer.Delete(context.Background(), ids...)) + require.NoError(t, indexer.Delete(t.Context(), ids...)) }() for _, c := range cases { t.Run(c.Name, func(t *testing.T) { if len(c.ExtraData) > 0 { - require.NoError(t, indexer.Index(context.Background(), c.ExtraData...)) + require.NoError(t, indexer.Index(t.Context(), c.ExtraData...)) for _, v := range c.ExtraData { data[v.ID] = v } - require.NoError(t, waitData(indexer, int64(len(data)))) + waitData(t, indexer, int64(len(data))) defer func() { for _, v := range c.ExtraData { - require.NoError(t, indexer.Delete(context.Background(), v.ID)) + require.NoError(t, indexer.Delete(t.Context(), v.ID)) delete(data, v.ID) } - require.NoError(t, waitData(indexer, int64(len(data)))) + waitData(t, indexer, int64(len(data))) }() } - result, err := indexer.Search(context.Background(), c.SearchOptions) + result, err := indexer.Search(t.Context(), c.SearchOptions) require.NoError(t, err) if c.Expected != nil { @@ -80,7 +79,7 @@ func TestIndexer(t *testing.T, indexer internal.Indexer) { // test counting c.SearchOptions.Paginator = &db.ListOptions{PageSize: 0} - countResult, err := indexer.Search(context.Background(), c.SearchOptions) + countResult, err := indexer.Search(t.Context(), c.SearchOptions) require.NoError(t, err) assert.Empty(t, countResult.Hits) assert.Equal(t, result.Total, countResult.Total) @@ -93,7 +92,7 @@ var cases = []*testIndexerCase{ Name: "default", SearchOptions: &internal.SearchOptions{}, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) }, }, @@ -379,7 +378,7 @@ var cases = []*testIndexerCase{ Paginator: &db.ListOptions{ PageSize: 5, }, - PosterID: optional.Some(int64(1)), + PosterID: "1", }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { assert.Len(t, result.Hits, 5) @@ -397,7 +396,7 @@ var cases = []*testIndexerCase{ Paginator: &db.ListOptions{ PageSize: 5, }, - AssigneeID: optional.Some(int64(1)), + AssigneeID: "1", }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { assert.Len(t, result.Hits, 5) @@ -415,7 +414,7 @@ var cases = []*testIndexerCase{ Paginator: &db.ListOptions{ PageSize: 5, }, - AssigneeID: optional.Some(int64(0)), + AssigneeID: "(none)", }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { assert.Len(t, result.Hits, 5) @@ -526,7 +525,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByCreatedDesc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -542,7 +541,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByUpdatedDesc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -558,7 +557,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByCommentsDesc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -574,7 +573,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByDeadlineDesc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -590,7 +589,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByCreatedAsc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -606,7 +605,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByUpdatedAsc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -622,7 +621,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByCommentsAsc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -638,7 +637,7 @@ var cases = []*testIndexerCase{ SortBy: internal.SortByDeadlineAsc, }, Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { - assert.Equal(t, len(data), len(result.Hits)) + assert.Len(t, result.Hits, len(data)) assert.Equal(t, len(data), int(result.Total)) for i, v := range result.Hits { if i < len(result.Hits)-1 { @@ -647,6 +646,21 @@ var cases = []*testIndexerCase{ } }, }, + { + Name: "SearchAnyAssignee", + SearchOptions: &internal.SearchOptions{ + AssigneeID: "(any)", + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Len(t, result.Hits, 180) + for _, v := range result.Hits { + assert.GreaterOrEqual(t, data[v.ID].AssigneeID, int64(1)) + } + assert.Equal(t, countIndexerData(data, func(v *internal.IndexerData) bool { + return v.AssigneeID >= 1 + }), result.Total) + }, + }, } type testIndexerCase struct { @@ -736,22 +750,10 @@ func countIndexerData(data map[int64]*internal.IndexerData, f func(v *internal.I // waitData waits for the indexer to index all data. // Some engines like Elasticsearch index data asynchronously, so we need to wait for a while. -func waitData(indexer internal.Indexer, total int64) error { - var actual int64 - for i := 0; i < 100; i++ { - result, err := indexer.Search(context.Background(), &internal.SearchOptions{ - Paginator: &db.ListOptions{ - PageSize: 0, - }, - }) - if err != nil { - return err - } - actual = result.Total - if actual == total { - return nil - } - time.Sleep(100 * time.Millisecond) - } - return fmt.Errorf("waitData: expected %d, actual %d", total, actual) +func waitData(t *testing.T, indexer internal.Indexer, total int64) { + assert.Eventually(t, func() bool { + result, err := indexer.Search(t.Context(), &internal.SearchOptions{Paginator: &db.ListOptions{}}) + require.NoError(t, err) + return result.Total == total + }, 10*time.Second, 100*time.Millisecond, "expected total=%d", total) } diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 1066e96272..759a98473f 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" + "code.gitea.io/gitea/modules/indexer" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" @@ -35,6 +36,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new meilisearch indexer func NewIndexer(url, apiKey, indexerName string) *Indexer { settings := &meilisearch.Settings{ @@ -182,12 +187,20 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectColumnID.Value())) } - if options.PosterID.Has() { - query.And(inner_meilisearch.NewFilterEq("poster_id", options.PosterID.Value())) + if options.PosterID != "" { + // "(none)" becomes 0, it means no poster + posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64) + query.And(inner_meilisearch.NewFilterEq("poster_id", posterIDInt64)) } - if options.AssigneeID.Has() { - query.And(inner_meilisearch.NewFilterEq("assignee_id", options.AssigneeID.Value())) + if options.AssigneeID != "" { + if options.AssigneeID == "(any)" { + query.And(inner_meilisearch.NewFilterGte("assignee_id", 1)) + } else { + // "(none)" becomes 0, it means no assignee + assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64) + query.And(inner_meilisearch.NewFilterEq("assignee_id", assigneeIDInt64)) + } } if options.MentionID.Has() { @@ -230,9 +243,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( limit = 1 } - keyword := options.Keyword - if !options.IsFuzzyKeyword { - // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) + keyword := options.Keyword // default to match "words" + if options.SearchMode == indexer.SearchModeExact { // https://www.meilisearch.com/docs/reference/api/search#phrase-search keyword = doubleQuoteKeyword(keyword) } diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go index a3a332554a..2fea4004cb 100644 --- a/modules/indexer/issues/meilisearch/meilisearch_test.go +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -74,13 +74,13 @@ func TestConvertHits(t *testing.T) { } hits, err := convertHits(validResponse) assert.NoError(t, err) - assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) + assert.Equal(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) } func TestDoubleQuoteKeyword(t *testing.T) { - assert.EqualValues(t, "", doubleQuoteKeyword("")) - assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c")) - assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) - assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) - assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`)) + assert.Empty(t, doubleQuoteKeyword("")) + assert.Equal(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c")) + assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.Equal(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`)) } diff --git a/modules/indexer/issues/util.go b/modules/indexer/issues/util.go index deb19adc49..19d835a1d8 100644 --- a/modules/indexer/issues/util.go +++ b/modules/indexer/issues/util.go @@ -92,6 +92,11 @@ func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerD projectID = issue.Project.ID } + projectColumnID, err := issue.ProjectColumnID(ctx) + if err != nil { + return nil, false, err + } + return &internal.IndexerData{ ID: issue.ID, RepoID: issue.RepoID, @@ -106,7 +111,7 @@ func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerD NoLabel: len(labels) == 0, MilestoneID: issue.MilestoneID, ProjectID: projectID, - ProjectColumnID: issue.ProjectColumnID(ctx), + ProjectColumnID: projectColumnID, PosterID: issue.PosterID, AssigneeID: issue.AssigneeID, MentionIDs: mentionIDs, diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go index 98a977c700..199d493e97 100644 --- a/modules/indexer/stats/db.go +++ b/modules/indexer/stats/db.go @@ -8,6 +8,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/languagestats" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" @@ -49,10 +50,10 @@ func (db *DBIndexer) Index(id int64) error { commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch) if err != nil { if git.IsErrBranchNotExist(err) || git.IsErrNotExist(err) || setting.IsInTesting { - log.Debug("Unable to get commit ID for default branch %s in %s ... skipping this repository", repo.DefaultBranch, repo.RepoPath()) + log.Debug("Unable to get commit ID for default branch %s in %s ... skipping this repository", repo.DefaultBranch, repo.FullName()) return nil } - log.Error("Unable to get commit ID for default branch %s in %s. Error: %v", repo.DefaultBranch, repo.RepoPath(), err) + log.Error("Unable to get commit ID for default branch %s in %s. Error: %v", repo.DefaultBranch, repo.FullName(), err) return err } @@ -62,20 +63,20 @@ func (db *DBIndexer) Index(id int64) error { } // Calculate and save language statistics to database - stats, err := gitRepo.GetLanguageStats(commitID) + stats, err := languagestats.GetLanguageStats(gitRepo, commitID) if err != nil { if !setting.IsInTesting { - log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err) + log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err) } return err } err = repo_model.UpdateLanguageStats(ctx, repo, commitID, stats) if err != nil { - log.Error("Unable to update language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err) + log.Error("Unable to update language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.FullName(), err) return err } - log.Debug("DBIndexer completed language stats for ID %s for default branch %s in %s. stats count: %d", commitID, repo.DefaultBranch, repo.RepoPath(), len(stats)) + log.Debug("DBIndexer completed language stats for ID %s for default branch %s in %s. stats count: %d", commitID, repo.DefaultBranch, repo.FullName(), len(stats)) return nil } diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go index 5be45d7a3b..d32a8bf151 100644 --- a/modules/indexer/stats/indexer_test.go +++ b/modules/indexer/stats/indexer_test.go @@ -4,7 +4,6 @@ package stats import ( - "context" "testing" "time" @@ -40,7 +39,7 @@ func TestRepoStatsIndex(t *testing.T) { err = UpdateRepoIndexer(repo) assert.NoError(t, err) - assert.NoError(t, queue.GetManager().FlushAll(context.Background(), 5*time.Second)) + assert.NoError(t, queue.GetManager().FlushAll(t.Context(), 5*time.Second)) status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats) assert.NoError(t, err) diff --git a/modules/indexer/stats/queue.go b/modules/indexer/stats/queue.go index d002bd57cf..69cde321d8 100644 --- a/modules/indexer/stats/queue.go +++ b/modules/indexer/stats/queue.go @@ -4,7 +4,7 @@ package stats import ( - "fmt" + "errors" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" @@ -31,7 +31,7 @@ func handler(items ...int64) []int64 { func initStatsQueue() error { statsQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_stats_update", handler) if statsQueue == nil { - return fmt.Errorf("unable to create repo_stats_update queue") + return errors.New("unable to create repo_stats_update queue") } go graceful.GetManager().RunWithCancel(statsQueue) return nil |