aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer/code
diff options
context:
space:
mode:
Diffstat (limited to 'modules/indexer/code')
-rw-r--r--modules/indexer/code/bleve/bleve.go48
-rw-r--r--modules/indexer/code/bleve/token/path/path.go12
-rw-r--r--modules/indexer/code/elasticsearch/elasticsearch.go29
-rw-r--r--modules/indexer/code/elasticsearch/elasticsearch_test.go4
-rw-r--r--modules/indexer/code/git.go20
-rw-r--r--modules/indexer/code/gitgrep/gitgrep.go66
-rw-r--r--modules/indexer/code/gitgrep/gitgrep_test.go19
-rw-r--r--modules/indexer/code/indexer.go15
-rw-r--r--modules/indexer/code/indexer_test.go53
-rw-r--r--modules/indexer/code/internal/indexer.go16
-rw-r--r--modules/indexer/code/internal/util.go6
-rw-r--r--modules/indexer/code/search.go3
12 files changed, 205 insertions, 86 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 772317fa59..70f0995a01 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -16,7 +16,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/gitrepo"
+ "code.gitea.io/gitea/modules/indexer"
path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
@@ -24,11 +24,11 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
+ "code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
- "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
@@ -70,7 +70,7 @@ const (
filenameIndexerAnalyzer = "filenameIndexerAnalyzer"
filenameIndexerTokenizer = "filenameIndexerTokenizer"
repoIndexerDocType = "repoIndexerDocType"
- repoIndexerLatestVersion = 8
+ repoIndexerLatestVersion = 9
)
// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
@@ -107,7 +107,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": letter.Name,
- "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, lowercase.Name},
}); err != nil {
return nil, err
}
@@ -136,6 +136,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
}
+func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
+ return indexer.SearchModesExactWords()
+}
+
// NewIndexer creates a new bleve local indexer
func NewIndexer(indexDir string) *Indexer {
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
@@ -158,7 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
var err error
if !update.Sized {
var stdout string
- stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if err != nil {
return err
}
@@ -185,7 +189,8 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
return err
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
- return nil
+ // Even if the file is not recognized as a "text file", we could still put its name into the indexers to make the filename become searchable, while leave the content to empty.
+ fileContents = nil
}
if _, err = batchReader.Discard(1); err != nil {
@@ -211,12 +216,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
if len(changes.Updates) > 0 {
- r, err := gitrepo.OpenRepository(ctx, repo)
- if err != nil {
- return err
- }
- defer r.Close()
- gitBatch, err := r.NewBatch(ctx)
+ gitBatch, err := git.NewBatch(ctx, repo.RepoPath())
if err != nil {
return err
}
@@ -260,17 +260,31 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
var (
indexerQuery query.Query
keywordQuery query.Query
+ contentQuery query.Query
)
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)
- contentQuery := bleve.NewMatchQuery(opts.Keyword)
- contentQuery.FieldVal = "Content"
-
- if opts.IsKeywordFuzzy {
- contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+ searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
+ if searchMode == indexer.SearchModeExact {
+ // 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery
+ q := bleve.NewMatchPhraseQuery(opts.Keyword)
+ q.Analyzer = repoIndexerAnalyzer
+ q.FieldVal = "Content"
+ contentQuery = q
+ } else /* words */ {
+ q := bleve.NewMatchQuery(opts.Keyword)
+ q.FieldVal = "Content"
+ q.Analyzer = repoIndexerAnalyzer
+ if searchMode == indexer.SearchModeFuzzy {
+ // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
+ q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+ } else {
+ q.Operator = query.MatchQueryOperatorAnd
+ }
+ contentQuery = q
}
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
diff --git a/modules/indexer/code/bleve/token/path/path.go b/modules/indexer/code/bleve/token/path/path.go
index 107e0da109..6dfc12f146 100644
--- a/modules/indexer/code/bleve/token/path/path.go
+++ b/modules/indexer/code/bleve/token/path/path.go
@@ -51,13 +51,13 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke
slices.Reverse(input)
}
- for i := 0; i < len(input); i++ {
+ for i := range input {
var sb strings.Builder
- sb.WriteString(string(input[0].Term))
+ sb.Write(input[0].Term)
for j := 1; j < i; j++ {
sb.WriteString("/")
- sb.WriteString(string(input[j].Term))
+ sb.Write(input[j].Term)
}
term := sb.String()
@@ -97,5 +97,9 @@ func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.Toke
}
func init() {
- registry.RegisterTokenFilter(Name, TokenFilterConstructor)
+ // FIXME: move it to the bleve's init function, but do not call it in global init
+ err := registry.RegisterTokenFilter(Name, TokenFilterConstructor)
+ if err != nil {
+ panic(err)
+ }
}
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go
index 1c4dd39eff..f925ce396a 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -15,7 +15,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/gitrepo"
+ "code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
@@ -24,6 +24,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
+ "code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
@@ -45,6 +46,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
}
+func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
+ return indexer.SearchModesExactWords()
+}
+
// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
@@ -142,7 +147,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
var err error
if !update.Sized {
var stdout string
- stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if err != nil {
return nil, err
}
@@ -203,12 +208,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
reqs := make([]elastic.BulkableRequest, 0)
if len(changes.Updates) > 0 {
- r, err := gitrepo.OpenRepository(ctx, repo)
- if err != nil {
- return err
- }
- defer r.Close()
- batch, err := r.NewBatch(ctx)
+ batch, err := git.NewBatch(ctx, repo.RepoPath())
if err != nil {
return err
}
@@ -359,13 +359,16 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
- searchType := esMultiMatchTypePhrasePrefix
- if opts.IsKeywordFuzzy {
- searchType = esMultiMatchTypeBestFields
+ var contentQuery elastic.Query
+ searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
+ if searchMode == indexer.SearchModeExact {
+ // 1.21 used NewMultiMatchQuery().Type(esMultiMatchTypePhrasePrefix), but later releases changed to NewMatchPhraseQuery
+ contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
+ } else /* words */ {
+ contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
}
-
kwQuery := elastic.NewBoolQuery().Should(
- elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
+ contentQuery,
elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
)
query := elastic.NewBoolQuery()
diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go
index a6d2af92b2..e8f1f202ce 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch_test.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go
@@ -11,6 +11,6 @@ import (
func TestIndexPos(t *testing.T) {
startIdx, endIdx := contentMatchIndexPos("test index start and end", "start", "end")
- assert.EqualValues(t, 11, startIdx)
- assert.EqualValues(t, 15, endIdx)
+ assert.Equal(t, 11, startIdx)
+ assert.Equal(t, 15, endIdx)
}
diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go
index df9783288b..41bc74e6ec 100644
--- a/modules/indexer/code/git.go
+++ b/modules/indexer/code/git.go
@@ -16,7 +16,7 @@ import (
)
func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) {
- stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ stdout, _, err := git.NewCommand("show-ref", "-s").AddDynamicArguments(git.BranchPrefix+repo.DefaultBranch).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if err != nil {
return "", err
}
@@ -32,8 +32,8 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
needGenesis := len(status.CommitSha) == 0
if !needGenesis {
- hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
- stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ hasAncestorCmd := git.NewCommand("merge-base").AddDynamicArguments(status.CommitSha, revision)
+ stdout, _, _ := hasAncestorCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
needGenesis = len(stdout) == 0
}
@@ -86,7 +86,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) {
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
var changes internal.RepoChanges
- stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
+ stdout, _, runErr := git.NewCommand("ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if runErr != nil {
return nil, runErr
}
@@ -98,8 +98,8 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s
// nonGenesisChanges get changes since the previous indexer update
func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
- diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
- stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ diffCmd := git.NewCommand("diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
+ stdout, _, runErr := diffCmd.RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if runErr != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
@@ -115,9 +115,9 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio
updatedFilenames := make([]string, 0, 10)
updateChanges := func() error {
- cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
+ cmd := git.NewCommand("ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
AddDashesAndList(updatedFilenames...)
- lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
+ lsTreeStdout, _, err := cmd.RunStdBytes(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if err != nil {
return err
}
@@ -129,8 +129,8 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio
changes.Updates = append(changes.Updates, updates...)
return nil
}
- lines := strings.Split(stdout, "\n")
- for _, line := range lines {
+ lines := strings.SplitSeq(stdout, "\n")
+ for line := range lines {
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
diff --git a/modules/indexer/code/gitgrep/gitgrep.go b/modules/indexer/code/gitgrep/gitgrep.go
new file mode 100644
index 0000000000..6f6e0b47b9
--- /dev/null
+++ b/modules/indexer/code/gitgrep/gitgrep.go
@@ -0,0 +1,66 @@
+// Copyright 2025 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package gitgrep
+
+import (
+ "context"
+ "fmt"
+ "strings"
+
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/indexer"
+ code_indexer "code.gitea.io/gitea/modules/indexer/code"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+func indexSettingToGitGrepPathspecList() (list []string) {
+ for _, expr := range setting.Indexer.IncludePatterns {
+ list = append(list, ":(glob)"+expr.PatternString())
+ }
+ for _, expr := range setting.Indexer.ExcludePatterns {
+ list = append(list, ":(glob,exclude)"+expr.PatternString())
+ }
+ return list
+}
+
+func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) {
+ grepMode := git.GrepModeWords
+ switch searchMode {
+ case indexer.SearchModeExact:
+ grepMode = git.GrepModeExact
+ case indexer.SearchModeRegexp:
+ grepMode = git.GrepModeRegexp
+ }
+ res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
+ ContextLineNumber: 1,
+ GrepMode: grepMode,
+ RefName: ref.String(),
+ PathspecList: indexSettingToGitGrepPathspecList(),
+ })
+ if err != nil {
+ // TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
+ return nil, 0, fmt.Errorf("git.GrepSearch: %w", err)
+ }
+ commitID, err := gitRepo.GetRefCommitID(ref.String())
+ if err != nil {
+ return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err)
+ }
+
+ total = len(res)
+ pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
+ pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
+ res = res[pageStart:pageEnd]
+ for _, r := range res {
+ searchResults = append(searchResults, &code_indexer.Result{
+ RepoID: repoID,
+ Filename: r.Filename,
+ CommitID: commitID,
+ // UpdatedUnix: not supported yet
+ // Language: not supported yet
+ // Color: not supported yet
+ Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
+ })
+ }
+ return searchResults, total, nil
+}
diff --git a/modules/indexer/code/gitgrep/gitgrep_test.go b/modules/indexer/code/gitgrep/gitgrep_test.go
new file mode 100644
index 0000000000..97dda9d966
--- /dev/null
+++ b/modules/indexer/code/gitgrep/gitgrep_test.go
@@ -0,0 +1,19 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package gitgrep
+
+import (
+ "testing"
+
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/test"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestIndexSettingToGitGrepPathspecList(t *testing.T) {
+ defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("a"))()
+ defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("b"))()
+ assert.Equal(t, []string{":(glob)a", ":(glob,exclude)b"}, indexSettingToGitGrepPathspecList())
+}
diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go
index 728b37fab6..6035ddfe95 100644
--- a/modules/indexer/code/indexer.go
+++ b/modules/indexer/code/indexer.go
@@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
@@ -29,13 +30,11 @@ var (
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
// So it's always safe use it as *globalIndexer.Load() and call its methods.
globalIndexer atomic.Pointer[internal.Indexer]
- dummyIndexer *internal.Indexer
)
func init() {
- i := internal.NewDummyIndexer()
- dummyIndexer = &i
- globalIndexer.Store(dummyIndexer)
+ dummyIndexer := internal.NewDummyIndexer()
+ globalIndexer.Store(&dummyIndexer)
}
func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
@@ -304,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) {
}
log.Info("Done (re)populating the repo indexer with existing repositories")
}
+
+func SupportedSearchModes() []indexer.SearchMode {
+ gi := globalIndexer.Load()
+ if gi == nil {
+ return nil
+ }
+ return (*gi).SupportedSearchModes()
+}
diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go
index f358bbe785..78fea22f10 100644
--- a/modules/indexer/code/indexer_test.go
+++ b/modules/indexer/code/indexer_test.go
@@ -11,12 +11,13 @@ import (
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest"
- "code.gitea.io/gitea/modules/git"
+ indexer_module "code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
+ "code.gitea.io/gitea/modules/util"
_ "code.gitea.io/gitea/models"
_ "code.gitea.io/gitea/models/actions"
@@ -37,13 +38,14 @@ func TestMain(m *testing.M) {
func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
t.Run(name, func(t *testing.T) {
- assert.NoError(t, setupRepositoryIndexes(git.DefaultContext, indexer))
+ assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer))
keywords := []struct {
- RepoIDs []int64
- Keyword string
- Langs int
- Results []codeSearchResult
+ RepoIDs []int64
+ Keyword string
+ Langs int
+ SearchMode indexer_module.SearchModeType
+ Results []codeSearchResult
}{
// Search for an exact match on the contents of a file
// This scenario yields a single result (the file README.md on the repo '1')
@@ -184,9 +186,10 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
// Search for matches on the contents of files regardless of case.
{
- RepoIDs: nil,
- Keyword: "dESCRIPTION",
- Langs: 1,
+ RepoIDs: nil,
+ Keyword: "dESCRIPTION",
+ Langs: 1,
+ SearchMode: indexer_module.SearchModeFuzzy,
Results: []codeSearchResult{
{
Filename: "README.md",
@@ -194,7 +197,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
- // Search for an exact match on the filename within the repo '62' (case insenstive).
+ // Search for an exact match on the filename within the repo '62' (case-insensitive).
// This scenario yields a single result (the file avocado.md on the repo '62')
{
RepoIDs: []int64{62},
@@ -207,7 +210,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
- // Search for matches on the contents of files when the criteria is a expression.
+ // Search for matches on the contents of files when the criteria are an expression.
{
RepoIDs: []int64{62},
Keyword: "console.log",
@@ -219,7 +222,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
- // Search for matches on the contents of files when the criteria is part of a expression.
+ // Search for matches on the contents of files when the criteria are parts of an expression.
{
RepoIDs: []int64{62},
Keyword: "log",
@@ -235,17 +238,17 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
- total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
- RepoIDs: kw.RepoIDs,
- Keyword: kw.Keyword,
+ total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
+ RepoIDs: kw.RepoIDs,
+ Keyword: kw.Keyword,
+ SearchMode: util.IfZero(kw.SearchMode, indexer_module.SearchModeWords),
Paginator: &db.ListOptions{
Page: 1,
PageSize: 10,
},
- IsKeywordFuzzy: true,
})
- assert.NoError(t, err)
- assert.Len(t, langs, kw.Langs)
+ require.NoError(t, err)
+ require.Len(t, langs, kw.Langs)
hits := make([]codeSearchResult, 0, len(res))
@@ -275,7 +278,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
})
}
- assert.NoError(t, tearDownRepositoryIndexes(indexer))
+ assert.NoError(t, tearDownRepositoryIndexes(t.Context(), indexer))
})
}
@@ -287,10 +290,10 @@ func TestBleveIndexAndSearch(t *testing.T) {
idx := bleve.NewIndexer(dir)
defer idx.Close()
- _, err := idx.Init(context.Background())
+ _, err := idx.Init(t.Context())
require.NoError(t, err)
- testIndexer("beleve", t, idx)
+ testIndexer("bleve", t, idx)
}
func TestESIndexAndSearch(t *testing.T) {
@@ -303,11 +306,11 @@ func TestESIndexAndSearch(t *testing.T) {
}
indexer := elasticsearch.NewIndexer(u, "gitea_codes")
- if _, err := indexer.Init(context.Background()); err != nil {
+ if _, err := indexer.Init(t.Context()); err != nil {
if indexer != nil {
indexer.Close()
}
- assert.FailNow(t, "Unable to init ES indexer Error: %v", err)
+ require.NoError(t, err, "Unable to init ES indexer")
}
defer indexer.Close()
@@ -324,9 +327,9 @@ func setupRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error
return nil
}
-func tearDownRepositoryIndexes(indexer internal.Indexer) error {
+func tearDownRepositoryIndexes(ctx context.Context, indexer internal.Indexer) error {
for _, repoID := range repositoriesToSearch() {
- if err := indexer.Delete(context.Background(), repoID); err != nil {
+ if err := indexer.Delete(ctx, repoID); err != nil {
return err
}
}
diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go
index c259fcd26e..d58b028124 100644
--- a/modules/indexer/code/internal/indexer.go
+++ b/modules/indexer/code/internal/indexer.go
@@ -5,10 +5,11 @@ package internal
import (
"context"
- "fmt"
+ "errors"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/internal"
)
@@ -18,6 +19,7 @@ type Indexer interface {
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
+ SupportedSearchModes() []indexer.SearchMode
}
type SearchOptions struct {
@@ -25,7 +27,7 @@ type SearchOptions struct {
Keyword string
Language string
- IsKeywordFuzzy bool
+ SearchMode indexer.SearchModeType
db.Paginator
}
@@ -41,14 +43,18 @@ type dummyIndexer struct {
internal.Indexer
}
+func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
+ return nil
+}
+
func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error {
- return fmt.Errorf("indexer is not ready")
+ return errors.New("indexer is not ready")
}
func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
- return fmt.Errorf("indexer is not ready")
+ return errors.New("indexer is not ready")
}
func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
- return 0, nil, nil, fmt.Errorf("indexer is not ready")
+ return 0, nil, nil, errors.New("indexer is not ready")
}
diff --git a/modules/indexer/code/internal/util.go b/modules/indexer/code/internal/util.go
index 5b95783d9f..fa958be473 100644
--- a/modules/indexer/code/internal/util.go
+++ b/modules/indexer/code/internal/util.go
@@ -10,9 +10,7 @@ import (
"code.gitea.io/gitea/modules/log"
)
-const (
- filenameMatchNumberOfLines = 7 // Copied from github search
-)
+const filenameMatchNumberOfLines = 7 // Copied from GitHub search
func FilenameIndexerID(repoID int64, filename string) string {
return internal.Base36(repoID) + "_" + filename
@@ -35,7 +33,7 @@ func FilenameOfIndexerID(indexerID string) string {
return indexerID[index+1:]
}
-// Given the contents of file, returns the boundaries of its first seven lines.
+// FilenameMatchIndexPos returns the boundaries of its first seven lines.
func FilenameMatchIndexPos(content string) (int, int) {
count := 1
for i, c := range content {
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 74c957dde6..a7a5d7d2e3 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -77,7 +77,7 @@ func HighlightSearchResultCode(filename, language string, lineNums []int, code s
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums)))
- for i := 0; i < len(lines); i++ {
+ for i := range lines {
lines[i] = &ResultLine{
Num: lineNums[i],
FormattedContent: template.HTML(highlightedLines[i]),
@@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
}
// PerformSearch perform a search on a repository
-// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
if opts == nil || len(opts.Keyword) == 0 {
return 0, nil, nil, nil