aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer/code
diff options
context:
space:
mode:
Diffstat (limited to 'modules/indexer/code')
-rw-r--r--modules/indexer/code/bleve/bleve.go7
-rw-r--r--modules/indexer/code/indexer.go3
-rw-r--r--modules/indexer/code/indexer_test.go74
3 files changed, 48 insertions, 36 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 772317fa59..395c7a0d31 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -28,7 +28,6 @@ import (
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
- "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
@@ -70,7 +69,7 @@ const (
filenameIndexerAnalyzer = "filenameIndexerAnalyzer"
filenameIndexerTokenizer = "filenameIndexerTokenizer"
repoIndexerDocType = "repoIndexerDocType"
- repoIndexerLatestVersion = 8
+ repoIndexerLatestVersion = 9
)
// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
@@ -107,7 +106,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": letter.Name,
- "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, lowercase.Name},
}); err != nil {
return nil, err
}
@@ -266,7 +265,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)
- contentQuery := bleve.NewMatchQuery(opts.Keyword)
+ contentQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
contentQuery.FieldVal = "Content"
if opts.IsKeywordFuzzy {
diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go
index c1ab26569c..728b37fab6 100644
--- a/modules/indexer/code/indexer.go
+++ b/modules/indexer/code/indexer.go
@@ -123,13 +123,12 @@ func Init() {
for _, indexerData := range items {
log.Trace("IndexerData Process Repo: %d", indexerData.RepoID)
if err := index(ctx, indexer, indexerData.RepoID); err != nil {
- unhandled = append(unhandled, indexerData)
if !setting.IsInTesting {
log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err)
}
}
}
- return unhandled
+ return nil // do not re-queue the failed items, otherwise some broken repo will block the queue
}
indexerQueue = queue.CreateUniqueQueue(ctx, "code_indexer", handler)
diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go
index d04088531a..48afdd1a71 100644
--- a/modules/indexer/code/indexer_test.go
+++ b/modules/indexer/code/indexer_test.go
@@ -15,6 +15,8 @@ import (
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
+ "code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/test"
_ "code.gitea.io/gitea/models"
_ "code.gitea.io/gitea/models/actions"
@@ -163,35 +165,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
- // Search for matches on the contents of files within the repo '62'.
- // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
- {
- RepoIDs: []int64{62},
- Keyword: "This is not cheese",
- Langs: 1,
- Results: []codeSearchResult{
- {
- Filename: "potato/ham.md",
- Content: "This is not cheese",
- },
- {
- Filename: "ham.md",
- Content: "This is also not cheese",
- },
- },
- },
- // Search for matches on the contents of files regardless of case.
- {
- RepoIDs: nil,
- Keyword: "dESCRIPTION",
- Langs: 1,
- Results: []codeSearchResult{
- {
- Filename: "README.md",
- Content: "# repo1\n\nDescription for repo1",
- },
- },
- },
// Search for an exact match on the filename within the repo '62' (case insenstive).
// This scenario yields a single result (the file avocado.md on the repo '62')
{
@@ -231,6 +204,47 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
}
+ if name == "elastic_search" {
+ // Additional scenarios for elastic_search only
+ additional := []struct {
+ RepoIDs []int64
+ Keyword string
+ Langs int
+ Results []codeSearchResult
+ }{
+ // Search for matches on the contents of files within the repo '62'.
+ // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
+ {
+ RepoIDs: []int64{62},
+ Keyword: "This is not cheese",
+ Langs: 1,
+ Results: []codeSearchResult{
+ {
+ Filename: "potato/ham.md",
+ Content: "This is not cheese",
+ },
+ {
+ Filename: "ham.md",
+ Content: "This is also not cheese",
+ },
+ },
+ },
+ // Search for matches on the contents of files regardless of case.
+ {
+ RepoIDs: nil,
+ Keyword: "dESCRIPTION",
+ Langs: 1,
+ Results: []codeSearchResult{
+ {
+ Filename: "README.md",
+ Content: "# repo1\n\nDescription for repo1",
+ },
+ },
+ },
+ }
+ keywords = append(keywords, additional...)
+ }
+
for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
@@ -279,7 +293,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
func TestBleveIndexAndSearch(t *testing.T) {
unittest.PrepareTestEnv(t)
-
+ defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
dir := t.TempDir()
idx := bleve.NewIndexer(dir)