summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
author6543 <m.huber@kithara.com>2024-03-16 11:32:45 +0100
committerGitHub <noreply@github.com>2024-03-16 10:32:45 +0000
commit1262ff6734543b37d834e63a6a623648c77ee4f4 (patch)
treecc99d1a96d094881a820659ea7fdf2d326b8a862 /modules
parente0ea3811c4178ffa30452b7ca4bd211e59326f91 (diff)
downloadgitea-1262ff6734543b37d834e63a6a623648c77ee4f4.tar.gz
gitea-1262ff6734543b37d834e63a6a623648c77ee4f4.zip
Refactor code_indexer to use an SearchOptions struct for PerformSearch (#29724)
similar to how it's already done for the issue_indexer --- *Sponsored by Kithara Software GmbH*
Diffstat (limited to 'modules')
-rw-r--r--modules/indexer/code/bleve/bleve.go26
-rw-r--r--modules/indexer/code/elasticsearch/elasticsearch.go26
-rw-r--r--modules/indexer/code/git.go2
-rw-r--r--modules/indexer/code/indexer_test.go11
-rw-r--r--modules/indexer/code/internal/indexer.go15
-rw-r--r--modules/indexer/code/search.go8
6 files changed, 53 insertions, 35 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 107dd23598..d7f735e957 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
return err
}
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
- return fmt.Errorf("Misformatted git cat-file output: %w", err)
+ return fmt.Errorf("misformatted git cat-file output: %w", err)
}
}
@@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
// Search searches for files in the specified repo.
// Returns the matching file-paths
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var (
indexerQuery query.Query
keywordQuery query.Query
)
- if isFuzzy {
- phraseQuery := bleve.NewMatchPhraseQuery(keyword)
+ if opts.IsKeywordFuzzy {
+ phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
- prefixQuery := bleve.NewPrefixQuery(keyword)
+ prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
}
- if len(repoIDs) > 0 {
- repoQueries := make([]query.Query, 0, len(repoIDs))
- for _, repoID := range repoIDs {
+ if len(opts.RepoIDs) > 0 {
+ repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
+ for _, repoID := range opts.RepoIDs {
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
}
@@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
// Save for reuse without language filter
facetQuery := indexerQuery
- if len(language) > 0 {
- languageQuery := bleve.NewMatchQuery(language)
+ if len(opts.Language) > 0 {
+ languageQuery := bleve.NewMatchQuery(opts.Language)
languageQuery.FieldVal = "Language"
languageQuery.Analyzer = analyzer_keyword.Name
@@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
)
}
- from := (page - 1) * pageSize
+ from, pageSize := opts.GetSkipTake()
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
searchRequest.IncludeLocations = true
- if len(language) == 0 {
+ if len(opts.Language) == 0 {
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
}
@@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
}
searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
- if len(language) > 0 {
+ if len(opts.Language) > 0 {
// Use separate query to go get all language counts
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go
index 065b0b2061..e4622fd66e 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
}
// Search searches for codes and language stats by given conditions.
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypePhrasePrefix
- if isFuzzy {
+ if opts.IsKeywordFuzzy {
searchType = esMultiMatchTypeBestFields
}
- kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
+ kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
query := elastic.NewBoolQuery()
query = query.Must(kwQuery)
- if len(repoIDs) > 0 {
- repoStrs := make([]any, 0, len(repoIDs))
- for _, repoID := range repoIDs {
+ if len(opts.RepoIDs) > 0 {
+ repoStrs := make([]any, 0, len(opts.RepoIDs))
+ for _, repoID := range opts.RepoIDs {
repoStrs = append(repoStrs, repoID)
}
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
@@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
}
var (
- start int
- kw = "<em>" + keyword + "</em>"
- aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
+ start, pageSize = opts.GetSkipTake()
+ kw = "<em>" + opts.Keyword + "</em>"
+ aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
)
- if page > 0 {
- start = (page - 1) * pageSize
- }
-
- if len(language) == 0 {
+ if len(opts.Language) == 0 {
searchResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation).
@@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
return convertResult(searchResult, kw, pageSize)
}
- langQuery := elastic.NewMatchQuery("language", language)
+ langQuery := elastic.NewMatchQuery("language", opts.Language)
countResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation).
diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go
index f105d032eb..2905a540e5 100644
--- a/modules/indexer/code/git.go
+++ b/modules/indexer/code/git.go
@@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
needGenesis := len(status.CommitSha) == 0
if !needGenesis {
- hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
+ hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
needGenesis = len(stdout) == 0
}
diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go
index 23dbd63410..8975c5ce40 100644
--- a/modules/indexer/code/indexer_test.go
+++ b/modules/indexer/code/indexer_test.go
@@ -8,6 +8,7 @@ import (
"os"
"testing"
+ "code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer/code/bleve"
@@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
- total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
+ total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
+ RepoIDs: kw.RepoIDs,
+ Keyword: kw.Keyword,
+ Paginator: &db.ListOptions{
+ Page: 1,
+ PageSize: 10,
+ },
+ IsKeywordFuzzy: true,
+ })
assert.NoError(t, err)
assert.Len(t, kw.IDs, int(total))
assert.Len(t, langs, kw.Langs)
diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go
index c92419deb2..c259fcd26e 100644
--- a/modules/indexer/code/internal/indexer.go
+++ b/modules/indexer/code/internal/indexer.go
@@ -7,6 +7,7 @@ import (
"context"
"fmt"
+ "code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/indexer/internal"
)
@@ -16,7 +17,17 @@ type Indexer interface {
internal.Indexer
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
- Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
+ Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
+}
+
+type SearchOptions struct {
+ RepoIDs []int64
+ Keyword string
+ Language string
+
+ IsKeywordFuzzy bool
+
+ db.Paginator
}
// NewDummyIndexer returns a dummy indexer
@@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
return fmt.Errorf("indexer is not ready")
}
-func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
+func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
return 0, nil, nil, fmt.Errorf("indexer is not ready")
}
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 89a62a8d3e..51c7595cf8 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -32,6 +32,8 @@ type ResultLine struct {
type SearchResultLanguages = internal.SearchResultLanguages
+type SearchOptions = internal.SearchOptions
+
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
startIndex := selectionStartIndex
numLinesBefore := 0
@@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
// PerformSearch perform a search on a repository
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
-func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
- if len(keyword) == 0 {
+func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
+ if opts == nil || len(opts.Keyword) == 0 {
return 0, nil, nil, nil
}
- total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
+ total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
if err != nil {
return 0, nil, nil, err
}