diff options
Diffstat (limited to 'modules/indexer/code')
-rw-r--r-- | modules/indexer/code/bleve/bleve.go | 48 | ||||
-rw-r--r-- | modules/indexer/code/bleve/token/path/path.go | 12 | ||||
-rw-r--r-- | modules/indexer/code/elasticsearch/elasticsearch.go | 29 | ||||
-rw-r--r-- | modules/indexer/code/elasticsearch/elasticsearch_test.go | 4 | ||||
-rw-r--r-- | modules/indexer/code/git.go | 20 | ||||
-rw-r--r-- | modules/indexer/code/gitgrep/gitgrep.go | 66 | ||||
-rw-r--r-- | modules/indexer/code/gitgrep/gitgrep_test.go | 19 | ||||
-rw-r--r-- | modules/indexer/code/indexer.go | 15 | ||||
-rw-r--r-- | modules/indexer/code/indexer_test.go | 53 | ||||
-rw-r--r-- | modules/indexer/code/internal/indexer.go | 16 | ||||
-rw-r--r-- | modules/indexer/code/internal/util.go | 6 | ||||
-rw-r--r-- | modules/indexer/code/search.go | 3 |
12 files changed, 205 insertions, 86 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 772317fa59..70f0995a01 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -16,7 +16,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" @@ -24,11 +24,11 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" @@ -70,7 +70,7 @@ const ( filenameIndexerAnalyzer = "filenameIndexerAnalyzer" filenameIndexerTokenizer = "filenameIndexerTokenizer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 8 + repoIndexerLatestVersion = 9 ) // generateBleveIndexMapping generates a bleve index mapping for the repo indexer @@ -107,7 +107,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { "type": analyzer_custom.Name, "char_filters": []string{}, "tokenizer": letter.Name, - "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, lowercase.Name}, }); err != nil { return nil, err } @@ -136,6 +136,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) @@ -158,7 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return err } @@ -185,7 +189,8 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return err } else if !typesniffer.DetectContentType(fileContents).IsText() { // FIXME: UTF-16 files will probably fail here - return nil + // Even if the file is not recognized as a "text file", we could still put its name into the indexers to make the filename become searchable, while leave the content to empty. + fileContents = nil } if _, err = batchReader.Discard(1); err != nil { @@ -211,12 +216,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - gitBatch, err := r.NewBatch(ctx) + gitBatch, err := git.NewBatch(ctx, repo.RepoPath()) if err != nil { return err } @@ -260,17 +260,31 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int var ( indexerQuery query.Query keywordQuery query.Query + contentQuery query.Query ) pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword)) pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - contentQuery := bleve.NewMatchQuery(opts.Keyword) - contentQuery.FieldVal = "Content" - - if opts.IsKeywordFuzzy { - contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery + q := bleve.NewMatchPhraseQuery(opts.Keyword) + q.Analyzer = repoIndexerAnalyzer + q.FieldVal = "Content" + contentQuery = q + } else /* words */ { + q := bleve.NewMatchQuery(opts.Keyword) + q.FieldVal = "Content" + q.Analyzer = repoIndexerAnalyzer + if searchMode == indexer.SearchModeFuzzy { + // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case. + q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) + } else { + q.Operator = query.MatchQueryOperatorAnd + } + contentQuery = q } keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery) diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go index 107e0da109..6dfc12f146 100644 --- a/modules/indexer/code/bleve/token/path/path.go +++ b/modules/indexer/code/bleve/token/path/path.go @@ -51,13 +51,13 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke slices.Reverse(input) } - for i := 0; i < len(input); i++ { + for i := range input { var sb strings.Builder - sb.WriteString(string(input[0].Term)) + sb.Write(input[0].Term) for j := 1; j < i; j++ { sb.WriteString("/") - sb.WriteString(string(input[j].Term)) + sb.Write(input[j].Term) } term := sb.String() @@ -97,5 +97,9 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke } func init() { - registry.RegisterTokenFilter(Name, TokenFilterConstructor) + // FIXME: move it to the bleve's init function, but do not call it in global init + err := registry.RegisterTokenFilter(Name, TokenFilterConstructor) + if err != nil { + panic(err) + } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 1c4dd39eff..f925ce396a 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -15,7 +15,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" @@ -24,6 +24,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" "github.com/olivere/elastic/v7" @@ -45,6 +46,10 @@ type Indexer struct { indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } +func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { + return indexer.SearchModesExactWords() +} + // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) @@ -142,7 +147,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro var err error if !update.Sized { var stdout string - stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return nil, err } @@ -203,12 +208,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - batch, err := r.NewBatch(ctx) + batch, err := git.NewBatch(ctx, repo.RepoPath()) if err != nil { return err } @@ -359,13 +359,16 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan // Search searches for codes and language stats by given conditions. func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { - searchType := esMultiMatchTypePhrasePrefix - if opts.IsKeywordFuzzy { - searchType = esMultiMatchTypeBestFields + var contentQuery elastic.Query + searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue) + if searchMode == indexer.SearchModeExact { + // 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery + contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword) + } else /* words */ { + contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and") } - kwQuery := elastic.NewBoolQuery().Should( - elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType), + contentQuery, elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix), ) query := elastic.NewBoolQuery() diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go index a6d2af92b2..e8f1f202ce 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch_test.go +++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go @@ -11,6 +11,6 @@ import ( func TestIndexPos(t *testing.T) { startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end") - assert.EqualValues(t, 11, startIdx) - assert.EqualValues(t, 15, endIdx) + assert.Equal(t, 11, startIdx) + assert.Equal(t, 15, endIdx) } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index df9783288b..41bc74e6ec 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -16,7 +16,7 @@ import ( ) func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { - stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, err := git.NewCommand("show-ref", "-s").AddDynamicArguments(git.BranchPrefix+repo.DefaultBranch).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return "", err } @@ -32,8 +32,8 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s needGenesis := len(status.CommitSha) == 0 if !needGenesis { - hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) - stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + hasAncestorCmd := git.NewCommand("merge-base").AddDynamicArguments(status.CommitSha, revision) + stdout, _, _ := hasAncestorCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) needGenesis = len(stdout) == 0 } @@ -86,7 +86,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) { // genesisChanges get changes to add repo to the indexer for the first time func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { var changes internal.RepoChanges - stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + stdout, _, runErr := git.NewCommand("ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { return nil, runErr } @@ -98,8 +98,8 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s // nonGenesisChanges get changes since the previous indexer update func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { - diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) - stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) + diffCmd := git.NewCommand("diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) + stdout, _, runErr := diffCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { // previous commit sha may have been removed by a force push, so // try rebuilding from scratch @@ -115,9 +115,9 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio updatedFilenames := make([]string, 0, 10) updateChanges := func() error { - cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). + cmd := git.NewCommand("ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). AddDashesAndList(updatedFilenames...) - lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + lsTreeStdout, _, err := cmd.RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()}) if err != nil { return err } @@ -129,8 +129,8 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio changes.Updates = append(changes.Updates, updates...) return nil } - lines := strings.Split(stdout, "\n") - for _, line := range lines { + lines := strings.SplitSeq(stdout, "\n") + for line := range lines { line = strings.TrimSpace(line) if len(line) == 0 { continue diff --git a/modules/indexer/code/gitgrep/gitgrep.go b/modules/indexer/code/gitgrep/gitgrep.go new file mode 100644 index 0000000000..6f6e0b47b9 --- /dev/null +++ b/modules/indexer/code/gitgrep/gitgrep.go @@ -0,0 +1,66 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitgrep + +import ( + "context" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer" + code_indexer "code.gitea.io/gitea/modules/indexer/code" + "code.gitea.io/gitea/modules/setting" +) + +func indexSettingToGitGrepPathspecList() (list []string) { + for _, expr := range setting.Indexer.IncludePatterns { + list = append(list, ":(glob)"+expr.PatternString()) + } + for _, expr := range setting.Indexer.ExcludePatterns { + list = append(list, ":(glob,exclude)"+expr.PatternString()) + } + return list +} + +func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) { + grepMode := git.GrepModeWords + switch searchMode { + case indexer.SearchModeExact: + grepMode = git.GrepModeExact + case indexer.SearchModeRegexp: + grepMode = git.GrepModeRegexp + } + res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{ + ContextLineNumber: 1, + GrepMode: grepMode, + RefName: ref.String(), + PathspecList: indexSettingToGitGrepPathspecList(), + }) + if err != nil { + // TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree. + return nil, 0, fmt.Errorf("git.GrepSearch: %w", err) + } + commitID, err := gitRepo.GetRefCommitID(ref.String()) + if err != nil { + return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err) + } + + total = len(res) + pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) + pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) + res = res[pageStart:pageEnd] + for _, r := range res { + searchResults = append(searchResults, &code_indexer.Result{ + RepoID: repoID, + Filename: r.Filename, + CommitID: commitID, + // UpdatedUnix: not supported yet + // Language: not supported yet + // Color: not supported yet + Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")), + }) + } + return searchResults, total, nil +} diff --git a/modules/indexer/code/gitgrep/gitgrep_test.go b/modules/indexer/code/gitgrep/gitgrep_test.go new file mode 100644 index 0000000000..97dda9d966 --- /dev/null +++ b/modules/indexer/code/gitgrep/gitgrep_test.go @@ -0,0 +1,19 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitgrep + +import ( + "testing" + + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + + "github.com/stretchr/testify/assert" +) + +func TestIndexSettingToGitGrepPathspecList(t *testing.T) { + defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("a"))() + defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("b"))() + assert.Equal(t, []string{":(glob)a", ":(glob,exclude)b"}, indexSettingToGitGrepPathspecList()) +} diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 728b37fab6..6035ddfe95 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -29,13 +30,11 @@ var ( // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. // So it's always safe use it as *globalIndexer.Load() and call its methods. globalIndexer atomic.Pointer[internal.Indexer] - dummyIndexer *internal.Indexer ) func init() { - i := internal.NewDummyIndexer() - dummyIndexer = &i - globalIndexer.Store(dummyIndexer) + dummyIndexer := internal.NewDummyIndexer() + globalIndexer.Store(&dummyIndexer) } func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { @@ -304,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) { } log.Info("Done (re)populating the repo indexer with existing repositories") } + +func SupportedSearchModes() []indexer.SearchMode { + gi := globalIndexer.Load() + if gi == nil { + return nil + } + return (*gi).SupportedSearchModes() +} diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index f358bbe785..78fea22f10 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -11,12 +11,13 @@ import ( "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/git" + indexer_module "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/test" + "code.gitea.io/gitea/modules/util" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -37,13 +38,14 @@ func TestMain(m *testing.M) { func testIndexer(name string, t *testing.T, indexer internal.Indexer) { t.Run(name, func(t *testing.T) { - assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer)) + assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer)) keywords := []struct { - RepoIDs []int64 - Keyword string - Langs int - Results []codeSearchResult + RepoIDs []int64 + Keyword string + Langs int + SearchMode indexer_module.SearchModeType + Results []codeSearchResult }{ // Search for an exact match on the contents of a file // This scenario yields a single result (the file README.md on the repo '1') @@ -184,9 +186,10 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, // Search for matches on the contents of files regardless of case. { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + SearchMode: indexer_module.SearchModeFuzzy, Results: []codeSearchResult{ { Filename: "README.md", @@ -194,7 +197,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for an exact match on the filename within the repo '62' (case insenstive). + // Search for an exact match on the filename within the repo '62' (case-insensitive). // This scenario yields a single result (the file avocado.md on the repo '62') { RepoIDs: []int64{62}, @@ -207,7 +210,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is a expression. + // Search for matches on the contents of files when the criteria are an expression. { RepoIDs: []int64{62}, Keyword: "console.log", @@ -219,7 +222,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files when the criteria is part of a expression. + // Search for matches on the contents of files when the criteria are parts of an expression. { RepoIDs: []int64{62}, Keyword: "log", @@ -235,17 +238,17 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { - total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ - RepoIDs: kw.RepoIDs, - Keyword: kw.Keyword, + total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{ + RepoIDs: kw.RepoIDs, + Keyword: kw.Keyword, + SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords), Paginator: &db.ListOptions{ Page: 1, PageSize: 10, }, - IsKeywordFuzzy: true, }) - assert.NoError(t, err) - assert.Len(t, langs, kw.Langs) + require.NoError(t, err) + require.Len(t, langs, kw.Langs) hits := make([]codeSearchResult, 0, len(res)) @@ -275,7 +278,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }) } - assert.NoError(t, tearDownRepositoryIndexes(indexer)) + assert.NoError(t, tearDownRepositoryIndexes(t.Context(), indexer)) }) } @@ -287,10 +290,10 @@ func TestBleveIndexAndSearch(t *testing.T) { idx := bleve.NewIndexer(dir) defer idx.Close() - _, err := idx.Init(context.Background()) + _, err := idx.Init(t.Context()) require.NoError(t, err) - testIndexer("beleve", t, idx) + testIndexer("bleve", t, idx) } func TestESIndexAndSearch(t *testing.T) { @@ -303,11 +306,11 @@ func TestESIndexAndSearch(t *testing.T) { } indexer := elasticsearch.NewIndexer(u, "gitea_codes") - if _, err := indexer.Init(context.Background()); err != nil { + if _, err := indexer.Init(t.Context()); err != nil { if indexer != nil { indexer.Close() } - assert.FailNow(t, "Unable to init ES indexer Error: %v", err) + require.NoError(t, err, "Unable to init ES indexer") } defer indexer.Close() @@ -324,9 +327,9 @@ func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error return nil } -func tearDownRepositoryIndexes(indexer internal.Indexer) error { +func tearDownRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error { for _, repoID := range repositoriesToSearch() { - if err := indexer.Delete(context.Background(), repoID); err != nil { + if err := indexer.Delete(ctx, repoID); err != nil { return err } } diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c259fcd26e..d58b028124 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -5,10 +5,11 @@ package internal import ( "context" - "fmt" + "errors" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -18,6 +19,7 @@ type Indexer interface { Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) + SupportedSearchModes() []indexer.SearchMode } type SearchOptions struct { @@ -25,7 +27,7 @@ type SearchOptions struct { Keyword string Language string - IsKeywordFuzzy bool + SearchMode indexer.SearchModeType db.Paginator } @@ -41,14 +43,18 @@ type dummyIndexer struct { internal.Indexer } +func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { + return nil +} + func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { - return fmt.Errorf("indexer is not ready") + return errors.New("indexer is not ready") } func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) { - return 0, nil, nil, fmt.Errorf("indexer is not ready") + return 0, nil, nil, errors.New("indexer is not ready") } diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go index 5b95783d9f..fa958be473 100644 --- a/modules/indexer/code/internal/util.go +++ b/modules/indexer/code/internal/util.go @@ -10,9 +10,7 @@ import ( "code.gitea.io/gitea/modules/log" ) -const ( - filenameMatchNumberOfLines = 7 // Copied from github search -) +const filenameMatchNumberOfLines = 7 // Copied from GitHub search func FilenameIndexerID(repoID int64, filename string) string { return internal.Base36(repoID) + "_" + filename @@ -35,7 +33,7 @@ func FilenameOfIndexerID(indexerID string) string { return indexerID[index+1:] } -// Given the contents of file, returns the boundaries of its first seven lines. +// FilenameMatchIndexPos returns the boundaries of its first seven lines. func FilenameMatchIndexPos(content string) (int, int) { count := 1 for i, c := range content { diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 74c957dde6..a7a5d7d2e3 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -77,7 +77,7 @@ func HighlightSearchResultCode(filename, language string, lineNums []int, code s // The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n` lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums))) - for i := 0; i < len(lines); i++ { + for i := range lines { lines[i] = &ResultLine{ Num: lineNums[i], FormattedContent: template.HTML(highlightedLines[i]), @@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil |