// Search searches for files in the specified repo.
// Returns the matching file-paths
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var (
indexerQuery query.Query
keywordQuery query.Query
)
- if isMatch {
- prefixQuery := bleve.NewPrefixQuery(keyword)
- prefixQuery.FieldVal = "Content"
- keywordQuery = prefixQuery
- } else {
+ if isFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
+ } else {
+ prefixQuery := bleve.NewPrefixQuery(keyword)
+ prefixQuery.FieldVal = "Content"
+ keywordQuery = prefixQuery
}
if len(repoIDs) > 0 {
}
// Search searches for codes and language stats by given conditions.
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
- searchType := esMultiMatchTypeBestFields
- if isMatch {
- searchType = esMultiMatchTypePhrasePrefix
+func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+ searchType := esMultiMatchTypePhrasePrefix
+ if isFuzzy {
+ searchType = esMultiMatchTypeBestFields
}
kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
- total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, false)
+ total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
assert.NoError(t, err)
assert.Len(t, kw.IDs, int(total))
assert.Len(t, langs, kw.Langs)
internal.Indexer
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
- Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
+ Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
}
// NewDummyIndexer returns a dummy indexer
return fmt.Errorf("indexer is not ready")
}
-func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
+func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
return 0, nil, nil, fmt.Errorf("indexer is not ready")
}
}
// PerformSearch perform a search on a repository
-func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
+// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
+func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
if len(keyword) == 0 {
return 0, nil, nil, nil
}
- total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch)
+ total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
if err != nil {
return 0, nil, nil, err
}
return q
}
+// PrefixQuery generates a match prefix query for the given prefix and field
+func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
+ q := bleve.NewPrefixQuery(matchPrefix)
+ q.FieldVal = field
+ return q
+}
+
// BoolFieldQuery generates a bool field query for the given value and field
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
q := bleve.NewBoolFieldQuery(value)
var queries []query.Query
if options.Keyword != "" {
- keywordQueries := []query.Query{
- inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
- inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
+ if options.IsFuzzyKeyword {
+ queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
+ inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
+ inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
+ }...))
+ } else {
+ queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
+ inner_bleve.PrefixQuery(options.Keyword, "title"),
+ inner_bleve.PrefixQuery(options.Keyword, "content"),
+ inner_bleve.PrefixQuery(options.Keyword, "comments"),
+ }...))
}
- queries = append(queries, bleve.NewDisjunctionQuery(keywordQueries...))
}
if len(options.RepoIDs) > 0 || options.AllPublic {
const (
issueIndexerLatestVersion = 1
+ // multi-match-types, currently only 2 types are used
+ // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
+ esMultiMatchTypeBestFields = "best_fields"
+ esMultiMatchTypePhrasePrefix = "phrase_prefix"
)
var _ internal.Indexer = &Indexer{}
query := elastic.NewBoolQuery()
if options.Keyword != "" {
- query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments"))
+
+ searchType := esMultiMatchTypePhrasePrefix
+ if options.IsFuzzyKeyword {
+ searchType = esMultiMatchTypeBestFields
+ }
+
+ query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
}
if len(options.RepoIDs) > 0 {
type SearchOptions struct {
Keyword string // keyword to search
+ IsFuzzyKeyword bool // if false the levenshtein distance is 0
+
RepoIDs []int64 // repository IDs which the issues belong to
AllPublic bool // if include all public repositories
import (
"context"
+ "errors"
"strconv"
"strings"
)
const (
- issueIndexerLatestVersion = 2
+ issueIndexerLatestVersion = 3
// TODO: make this configurable if necessary
maxTotalHits = 10000
)
+// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
+var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
+
var _ internal.Indexer = &Indexer{}
// Indexer implements Indexer interface
},
DisplayedAttributes: []string{
"id",
+ "title",
+ "content",
+ "comments",
},
FilterableAttributes: []string{
"repo_id",
return nil, err
}
- hits := make([]internal.Match, 0, len(searchRes.Hits))
- for _, hit := range searchRes.Hits {
- hits = append(hits, internal.Match{
- ID: int64(hit.(map[string]any)["id"].(float64)),
- })
+ hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword)
+ if err != nil {
+ return nil, err
}
return &internal.SearchResult{
}
return field + ":asc"
}
+
+// nonFuzzyWorkaround is needed as meilisearch does not have an exact search
+// and you can only change "typo tolerance" per index. So we have to post-filter the results
+// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance
+// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed
+func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) {
+ hits := make([]internal.Match, 0, len(searchRes.Hits))
+ for _, hit := range searchRes.Hits {
+ hit, ok := hit.(map[string]any)
+ if !ok {
+ return nil, ErrMalformedResponse
+ }
+
+ if !isFuzzy {
+ keyword = strings.ToLower(keyword)
+
+ // declare a anon func to check if the title, content or at least one comment contains the keyword
+ found, err := func() (bool, error) {
+ // check if title match first
+ title, ok := hit["title"].(string)
+ if !ok {
+ return false, ErrMalformedResponse
+ } else if strings.Contains(strings.ToLower(title), keyword) {
+ return true, nil
+ }
+
+ // check if content has a match
+ content, ok := hit["content"].(string)
+ if !ok {
+ return false, ErrMalformedResponse
+ } else if strings.Contains(strings.ToLower(content), keyword) {
+ return true, nil
+ }
+
+ // now check for each comment if one has a match
+ // so we first try to cast and skip if there are no comments
+ comments, ok := hit["comments"].([]any)
+ if !ok {
+ return false, ErrMalformedResponse
+ } else if len(comments) == 0 {
+ return false, nil
+ }
+
+ // now we iterate over all and report as soon as we detect one match
+ for i := range comments {
+ comment, ok := comments[i].(string)
+ if !ok {
+ return false, ErrMalformedResponse
+ }
+ if strings.Contains(strings.ToLower(comment), keyword) {
+ return true, nil
+ }
+ }
+
+ // we got no match
+ return false, nil
+ }()
+
+ if err != nil {
+ return nil, err
+ } else if !found {
+ continue
+ }
+ }
+ issueID, ok := hit["id"].(float64)
+ if !ok {
+ return nil, ErrMalformedResponse
+ }
+ hits = append(hits, internal.Match{
+ ID: int64(issueID),
+ })
+ }
+ return hits, nil
+}
"testing"
"time"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
+
+ "github.com/meilisearch/meilisearch-go"
+ "github.com/stretchr/testify/assert"
)
func TestMeilisearchIndexer(t *testing.T) {
tests.TestIndexer(t, indexer)
}
+
+func TestNonFuzzyWorkaround(t *testing.T) {
+ // get unexpected return
+ _, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{
+ Hits: []any{"aa", "bb", "cc", "dd"},
+ }, "bowling", false)
+ assert.ErrorIs(t, err, ErrMalformedResponse)
+
+ validResponse := &meilisearch.SearchResponse{
+ Hits: []any{
+ map[string]any{
+ "id": float64(11),
+ "title": "a title",
+ "content": "issue body with no match",
+ "comments": []any{"hey whats up?", "I'm currently bowling", "nice"},
+ },
+ map[string]any{
+ "id": float64(22),
+ "title": "Bowling as title",
+ "content": "",
+ "comments": []any{},
+ },
+ map[string]any{
+ "id": float64(33),
+ "title": "Bowl-ing as fuzzy match",
+ "content": "",
+ "comments": []any{},
+ },
+ },
+ }
+
+ // nonFuzzy
+ hits, err := nonFuzzyWorkaround(validResponse, "bowling", false)
+ assert.NoError(t, err)
+ assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits)
+
+ // fuzzy
+ hits, err = nonFuzzyWorkaround(validResponse, "bowling", true)
+ assert.NoError(t, err)
+ assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
+}
keyword := ctx.FormTrim("q")
queryType := ctx.FormTrim("t")
- isMatch := queryType == "match"
+ isFuzzy := queryType != "match"
ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
)
if (len(repoIDs) > 0) || isAdmin {
- total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
+ total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
keyword := ctx.FormTrim("q")
queryType := ctx.FormTrim("t")
- isMatch := queryType == "match"
+ isFuzzy := queryType != "match"
ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
}
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
- language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
+ language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
keyword := ctx.FormTrim("q")
queryType := ctx.FormTrim("t")
- isMatch := queryType == "match"
+ isFuzzy := queryType != "match"
ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
)
if len(repoIDs) > 0 {
- total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
+ total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)