also bleve did match on fuzzy search and the other way around. this also fix that bug.tags/v1.22.0-rc0
@@ -39,6 +39,8 @@ import ( | |||
const ( | |||
unicodeNormalizeName = "unicodeNormalize" | |||
maxBatchSize = 16 | |||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword | |||
fuzzyDenominator = 4 | |||
) | |||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { | |||
@@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int | |||
keywordQuery query.Query | |||
) | |||
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) | |||
phraseQuery.FieldVal = "Content" | |||
phraseQuery.Analyzer = repoIndexerAnalyzer | |||
keywordQuery = phraseQuery | |||
if opts.IsKeywordFuzzy { | |||
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) | |||
phraseQuery.FieldVal = "Content" | |||
phraseQuery.Analyzer = repoIndexerAnalyzer | |||
keywordQuery = phraseQuery | |||
} else { | |||
prefixQuery := bleve.NewPrefixQuery(opts.Keyword) | |||
prefixQuery.FieldVal = "Content" | |||
keywordQuery = prefixQuery | |||
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator | |||
} | |||
if len(opts.RepoIDs) > 0 { |
@@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery { | |||
} | |||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer | |||
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { | |||
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery { | |||
q := bleve.NewMatchPhraseQuery(matchPhrase) | |||
q.FieldVal = field | |||
q.Analyzer = analyzer | |||
return q | |||
} | |||
// PrefixQuery generates a match prefix query for the given prefix and field | |||
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery { | |||
q := bleve.NewPrefixQuery(matchPrefix) | |||
q.FieldVal = field | |||
q.Fuzziness = fuzziness | |||
return q | |||
} | |||
@@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { | |||
}) | |||
} | |||
const maxBatchSize = 16 | |||
const ( | |||
maxBatchSize = 16 | |||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword | |||
fuzzyDenominator = 4 | |||
) | |||
// IndexerData an update to the issue indexer | |||
type IndexerData internal.IndexerData | |||
@@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( | |||
var queries []query.Query | |||
if options.Keyword != "" { | |||
fuzziness := 0 | |||
if options.IsFuzzyKeyword { | |||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), | |||
}...)) | |||
} else { | |||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | |||
inner_bleve.PrefixQuery(options.Keyword, "title"), | |||
inner_bleve.PrefixQuery(options.Keyword, "content"), | |||
inner_bleve.PrefixQuery(options.Keyword, "comments"), | |||
}...)) | |||
fuzziness = len(options.Keyword) / fuzzyDenominator | |||
} | |||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), | |||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), | |||
}...)) | |||
} | |||
if len(options.RepoIDs) > 0 || options.AllPublic { |
@@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) { | |||
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1") | |||
assert.NoError(t, err) | |||
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) | |||
code_indexer.UpdateRepoIndexer(repo) | |||
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"}) | |||
@@ -42,12 +42,14 @@ func TestSearchRepo(t *testing.T) { | |||
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob") | |||
assert.NoError(t, err) | |||
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer) | |||
code_indexer.UpdateRepoIndexer(repo) | |||
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{}) | |||
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{}) | |||
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"}) | |||
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"}) | |||
} | |||
func testSearch(t *testing.T, url string, expected []string) { | |||
@@ -57,7 +59,3 @@ func testSearch(t *testing.T, url string, expected []string) { | |||
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body)) | |||
assert.EqualValues(t, expected, filenames) | |||
} | |||
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) { | |||
op(repo) | |||
} |