aboutsummaryrefslogtreecommitdiffstats
path: root/modules/indexer/code/bleve/bleve.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/indexer/code/bleve/bleve.go')
-rw-r--r--modules/indexer/code/bleve/bleve.go48
1 files changed, 31 insertions, 17 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 772317fa59..70f0995a01 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -16,7 +16,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/gitrepo"
+ "code.gitea.io/gitea/modules/indexer"
path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
@@ -24,11 +24,11 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
+ "code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
- "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
@@ -70,7 +70,7 @@ const (
filenameIndexerAnalyzer = "filenameIndexerAnalyzer"
filenameIndexerTokenizer = "filenameIndexerTokenizer"
repoIndexerDocType = "repoIndexerDocType"
- repoIndexerLatestVersion = 8
+ repoIndexerLatestVersion = 9
)
// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
@@ -107,7 +107,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": letter.Name,
- "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, lowercase.Name},
}); err != nil {
return nil, err
}
@@ -136,6 +136,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
}
+func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
+ return indexer.SearchModesExactWords()
+}
+
// NewIndexer creates a new bleve local indexer
func NewIndexer(indexDir string) *Indexer {
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
@@ -158,7 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
var err error
if !update.Sized {
var stdout string
- stdout, _, err = git.NewCommand(ctx, "cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
+ stdout, _, err = git.NewCommand("cat-file", "-s").AddDynamicArguments(update.BlobSha).RunStdString(ctx, &git.RunOpts{Dir: repo.RepoPath()})
if err != nil {
return err
}
@@ -185,7 +189,8 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
return err
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
- return nil
+ // Even if the file is not recognized as a "text file", we could still put its name into the indexers to make the filename become searchable, while leave the content to empty.
+ fileContents = nil
}
if _, err = batchReader.Discard(1); err != nil {
@@ -211,12 +216,7 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
if len(changes.Updates) > 0 {
- r, err := gitrepo.OpenRepository(ctx, repo)
- if err != nil {
- return err
- }
- defer r.Close()
- gitBatch, err := r.NewBatch(ctx)
+ gitBatch, err := git.NewBatch(ctx, repo.RepoPath())
if err != nil {
return err
}
@@ -260,17 +260,31 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
var (
indexerQuery query.Query
keywordQuery query.Query
+ contentQuery query.Query
)
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)
- contentQuery := bleve.NewMatchQuery(opts.Keyword)
- contentQuery.FieldVal = "Content"
-
- if opts.IsKeywordFuzzy {
- contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+ searchMode := util.IfZero(opts.SearchMode, b.SupportedSearchModes()[0].ModeValue)
+ if searchMode == indexer.SearchModeExact {
+ // 1.21 used NewPrefixQuery, but it seems not working well, and later releases changed to NewMatchPhraseQuery
+ q := bleve.NewMatchPhraseQuery(opts.Keyword)
+ q.Analyzer = repoIndexerAnalyzer
+ q.FieldVal = "Content"
+ contentQuery = q
+ } else /* words */ {
+ q := bleve.NewMatchQuery(opts.Keyword)
+ q.FieldVal = "Content"
+ q.Analyzer = repoIndexerAnalyzer
+ if searchMode == indexer.SearchModeFuzzy {
+ // this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
+ q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
+ } else {
+ q.Operator = query.MatchQueryOperatorAnd
+ }
+ contentQuery = q
}
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)