diff options
Diffstat (limited to 'modules/indexer/code')
-rw-r--r-- | modules/indexer/code/bleve/bleve.go | 7 | ||||
-rw-r--r-- | modules/indexer/code/indexer.go | 3 | ||||
-rw-r--r-- | modules/indexer/code/indexer_test.go | 74 |
3 files changed, 48 insertions, 36 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 772317fa59..395c7a0d31 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -28,7 +28,6 @@ import ( "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" @@ -70,7 +69,7 @@ const ( filenameIndexerAnalyzer = "filenameIndexerAnalyzer" filenameIndexerTokenizer = "filenameIndexerTokenizer" repoIndexerDocType = "repoIndexerDocType" - repoIndexerLatestVersion = 8 + repoIndexerLatestVersion = 9 ) // generateBleveIndexMapping generates a bleve index mapping for the repo indexer @@ -107,7 +106,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { "type": analyzer_custom.Name, "char_filters": []string{}, "tokenizer": letter.Name, - "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, + "token_filters": []string{unicodeNormalizeName, lowercase.Name}, }); err != nil { return nil, err } @@ -266,7 +265,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - contentQuery := bleve.NewMatchQuery(opts.Keyword) + contentQuery := bleve.NewMatchPhraseQuery(opts.Keyword) contentQuery.FieldVal = "Content" if opts.IsKeywordFuzzy { diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index c1ab26569c..728b37fab6 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -123,13 +123,12 @@ func Init() { for _, indexerData := range items { log.Trace("IndexerData Process Repo: %d", indexerData.RepoID) if err := index(ctx, indexer, indexerData.RepoID); err != nil { - unhandled = append(unhandled, indexerData) if !setting.IsInTesting { log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err) } } } - return unhandled + return nil // do not re-queue the failed items, otherwise some broken repo will block the queue } indexerQueue = queue.CreateUniqueQueue(ctx, "code_indexer", handler) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index d04088531a..48afdd1a71 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -15,6 +15,8 @@ import ( "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" _ "code.gitea.io/gitea/models" _ "code.gitea.io/gitea/models/actions" @@ -163,35 +165,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files within the repo '62'. - // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) - { - RepoIDs: []int64{62}, - Keyword: "This is not cheese", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "potato/ham.md", - Content: "This is not cheese", - }, - { - Filename: "ham.md", - Content: "This is also not cheese", - }, - }, - }, - // Search for matches on the contents of files regardless of case. - { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "README.md", - Content: "# repo1\n\nDescription for repo1", - }, - }, - }, // Search for an exact match on the filename within the repo '62' (case insenstive). // This scenario yields a single result (the file avocado.md on the repo '62') { @@ -231,6 +204,47 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, } + if name == "elastic_search" { + // Additional scenarios for elastic_search only + additional := []struct { + RepoIDs []int64 + Keyword string + Langs int + Results []codeSearchResult + }{ + // Search for matches on the contents of files within the repo '62'. + // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) + { + RepoIDs: []int64{62}, + Keyword: "This is not cheese", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "potato/ham.md", + Content: "This is not cheese", + }, + { + Filename: "ham.md", + Content: "This is also not cheese", + }, + }, + }, + // Search for matches on the contents of files regardless of case. + { + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "README.md", + Content: "# repo1\n\nDescription for repo1", + }, + }, + }, + } + keywords = append(keywords, additional...) + } + for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ @@ -279,7 +293,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { func TestBleveIndexAndSearch(t *testing.T) { unittest.PrepareTestEnv(t) - + defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)() dir := t.TempDir() idx := bleve.NewIndexer(dir) |