aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
author6543 <6543@obermui.de>2024-03-23 16:45:13 +0100
committerGitHub <noreply@github.com>2024-03-23 16:45:13 +0100
commitb9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c (patch)
tree93a9068e327499c1d92ee53066892a5d27bc6b67 /modules
parent1cdc6c3a4ea28396788b2697f9cf257df161ff9a (diff)
downloadgitea-b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c.tar.gz
gitea-b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c.zip
Determine fuzziness of bleve indexer by keyword length (#29706)
also bleve did match on fuzzy search and the other way around. this also fix that bug.
Diffstat (limited to 'modules')
-rw-r--r--modules/indexer/code/bleve/bleve.go15
-rw-r--r--modules/indexer/internal/bleve/query.go10
-rw-r--r--modules/indexer/issues/bleve/bleve.go25
3 files changed, 22 insertions, 28 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index d7f735e957..c607d780ef 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -39,6 +39,8 @@ import (
const (
unicodeNormalizeName = "unicodeNormalize"
maxBatchSize = 16
+ // fuzzyDenominator determines the levenshtein distance per each character of a keyword
+ fuzzyDenominator = 4
)
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
keywordQuery query.Query
)
+ phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
+ phraseQuery.FieldVal = "Content"
+ phraseQuery.Analyzer = repoIndexerAnalyzer
+ keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
- phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
- phraseQuery.FieldVal = "Content"
- phraseQuery.Analyzer = repoIndexerAnalyzer
- keywordQuery = phraseQuery
- } else {
- prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
- prefixQuery.FieldVal = "Content"
- keywordQuery = prefixQuery
+ phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
}
if len(opts.RepoIDs) > 0 {
diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go
index b96875343e..21422b281c 100644
--- a/modules/indexer/internal/bleve/query.go
+++ b/modules/indexer/internal/bleve/query.go
@@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
-func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
+func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
- return q
-}
-
-// PrefixQuery generates a match prefix query for the given prefix and field
-func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
- q := bleve.NewPrefixQuery(matchPrefix)
- q.FieldVal = field
+ q.Fuzziness = fuzziness
return q
}
diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go
index 927ad58cd4..1f54be721b 100644
--- a/modules/indexer/issues/bleve/bleve.go
+++ b/modules/indexer/issues/bleve/bleve.go
@@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}
-const maxBatchSize = 16
+const (
+ maxBatchSize = 16
+ // fuzzyDenominator determines the levenshtein distance per each character of a keyword
+ fuzzyDenominator = 4
+)
// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
@@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query
if options.Keyword != "" {
+ fuzziness := 0
if options.IsFuzzyKeyword {
- queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
- inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
- }...))
- } else {
- queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
- inner_bleve.PrefixQuery(options.Keyword, "title"),
- inner_bleve.PrefixQuery(options.Keyword, "content"),
- inner_bleve.PrefixQuery(options.Keyword, "comments"),
- }...))
+ fuzziness = len(options.Keyword) / fuzzyDenominator
}
+
+ queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
+ inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
+ }...))
}
if len(options.RepoIDs) > 0 || options.AllPublic {