diff options
author | 6543 <6543@obermui.de> | 2024-03-23 16:45:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-23 16:45:13 +0100 |
commit | b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c (patch) | |
tree | 93a9068e327499c1d92ee53066892a5d27bc6b67 /modules | |
parent | 1cdc6c3a4ea28396788b2697f9cf257df161ff9a (diff) | |
download | gitea-b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c.tar.gz gitea-b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c.zip |
Determine fuzziness of bleve indexer by keyword length (#29706)
also bleve did match on fuzzy search and the other way around. this also fix that bug.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/indexer/code/bleve/bleve.go | 15 | ||||
-rw-r--r-- | modules/indexer/internal/bleve/query.go | 10 | ||||
-rw-r--r-- | modules/indexer/issues/bleve/bleve.go | 25 |
3 files changed, 22 insertions, 28 deletions
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index d7f735e957..c607d780ef 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -39,6 +39,8 @@ import ( const ( unicodeNormalizeName = "unicodeNormalize" maxBatchSize = 16 + // fuzzyDenominator determines the levenshtein distance per each character of a keyword + fuzzyDenominator = 4 ) func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { @@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int keywordQuery query.Query ) + phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) + phraseQuery.FieldVal = "Content" + phraseQuery.Analyzer = repoIndexerAnalyzer + keywordQuery = phraseQuery if opts.IsKeywordFuzzy { - phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) - phraseQuery.FieldVal = "Content" - phraseQuery.Analyzer = repoIndexerAnalyzer - keywordQuery = phraseQuery - } else { - prefixQuery := bleve.NewPrefixQuery(opts.Keyword) - prefixQuery.FieldVal = "Content" - keywordQuery = prefixQuery + phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator } if len(opts.RepoIDs) > 0 { diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index b96875343e..21422b281c 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery { } // MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer -func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { +func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery { q := bleve.NewMatchPhraseQuery(matchPhrase) q.FieldVal = field q.Analyzer = analyzer - return q -} - -// PrefixQuery generates a match prefix query for the given prefix and field -func PrefixQuery(matchPrefix, field string) *query.PrefixQuery { - q := bleve.NewPrefixQuery(matchPrefix) - q.FieldVal = field + q.Fuzziness = fuzziness return q } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 927ad58cd4..1f54be721b 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -const maxBatchSize = 16 +const ( + maxBatchSize = 16 + // fuzzyDenominator determines the levenshtein distance per each character of a keyword + fuzzyDenominator = 4 +) // IndexerData an update to the issue indexer type IndexerData internal.IndexerData @@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { + fuzziness := 0 if options.IsFuzzyKeyword { - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), - }...)) - } else { - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.PrefixQuery(options.Keyword, "title"), - inner_bleve.PrefixQuery(options.Keyword, "content"), - inner_bleve.PrefixQuery(options.Keyword, "comments"), - }...)) + fuzziness = len(options.Keyword) / fuzzyDenominator } + + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { |