From b9c57fb78e8e0d80d786d8e1da433b6c7ebf2f1c Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Sat, 23 Mar 2024 16:45:13 +0100 Subject: Determine fuzziness of bleve indexer by keyword length (#29706) also bleve did match on fuzzy search and the other way around. this also fix that bug. --- modules/indexer/code/bleve/bleve.go | 15 +++++++-------- modules/indexer/internal/bleve/query.go | 10 ++-------- modules/indexer/issues/bleve/bleve.go | 25 +++++++++++++------------ 3 files changed, 22 insertions(+), 28 deletions(-) (limited to 'modules') diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index d7f735e957..c607d780ef 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -39,6 +39,8 @@ import ( const ( unicodeNormalizeName = "unicodeNormalize" maxBatchSize = 16 + // fuzzyDenominator determines the levenshtein distance per each character of a keyword + fuzzyDenominator = 4 ) func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { @@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int keywordQuery query.Query ) + phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) + phraseQuery.FieldVal = "Content" + phraseQuery.Analyzer = repoIndexerAnalyzer + keywordQuery = phraseQuery if opts.IsKeywordFuzzy { - phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) - phraseQuery.FieldVal = "Content" - phraseQuery.Analyzer = repoIndexerAnalyzer - keywordQuery = phraseQuery - } else { - prefixQuery := bleve.NewPrefixQuery(opts.Keyword) - prefixQuery.FieldVal = "Content" - keywordQuery = prefixQuery + phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator } if len(opts.RepoIDs) > 0 { diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index b96875343e..21422b281c 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery { } // MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer -func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { +func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery { q := bleve.NewMatchPhraseQuery(matchPhrase) q.FieldVal = field q.Analyzer = analyzer - return q -} - -// PrefixQuery generates a match prefix query for the given prefix and field -func PrefixQuery(matchPrefix, field string) *query.PrefixQuery { - q := bleve.NewPrefixQuery(matchPrefix) - q.FieldVal = field + q.Fuzziness = fuzziness return q } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 927ad58cd4..1f54be721b 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -const maxBatchSize = 16 +const ( + maxBatchSize = 16 + // fuzzyDenominator determines the levenshtein distance per each character of a keyword + fuzzyDenominator = 4 +) // IndexerData an update to the issue indexer type IndexerData internal.IndexerData @@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { + fuzziness := 0 if options.IsFuzzyKeyword { - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), - }...)) - } else { - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.PrefixQuery(options.Keyword, "title"), - inner_bleve.PrefixQuery(options.Keyword, "content"), - inner_bleve.PrefixQuery(options.Keyword, "comments"), - }...)) + fuzziness = len(options.Keyword) / fuzzyDenominator } + + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { -- cgit v1.2.3