summaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
author6543 <m.huber@kithara.com>2024-03-16 14:19:41 +0100
committerGitHub <noreply@github.com>2024-03-16 13:19:41 +0000
commitc6e5ec51bd5d2d3ede30b7506e7cc47f18a49ca8 (patch)
treedb252a3c927a2127ce2286933ceb9cfe0af5dfb6 /modules/indexer
parent3cd64949ae1402a4ff45fba0a27c4acca1c5aead (diff)
downloadgitea-c6e5ec51bd5d2d3ede30b7506e7cc47f18a49ca8.tar.gz
gitea-c6e5ec51bd5d2d3ede30b7506e7cc47f18a49ca8.zip
Meilisearch double quote on "match" query (#29740)
make `nonFuzzyWorkaround` unessesary cc @Kerollmops
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch.go83
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch_test.go24
2 files changed, 37 insertions, 70 deletions
diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go
index 34066bf559..b735c26968 100644
--- a/modules/indexer/issues/meilisearch/meilisearch.go
+++ b/modules/indexer/issues/meilisearch/meilisearch.go
@@ -6,6 +6,7 @@ package meilisearch
import (
"context"
"errors"
+ "fmt"
"strconv"
"strings"
@@ -217,7 +218,14 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
- searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{
+ keyword := options.Keyword
+ if !options.IsFuzzyKeyword {
+ // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s)
+ // https://www.meilisearch.com/docs/reference/api/search#phrase-search
+ keyword = doubleQuoteKeyword(keyword)
+ }
+
+ searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
Filter: query.Statement(),
Limit: int64(limit),
Offset: int64(skip),
@@ -228,7 +236,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
return nil, err
}
- hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword)
+ hits, err := convertHits(searchRes)
if err != nil {
return nil, err
}
@@ -247,11 +255,20 @@ func parseSortBy(sortBy internal.SortBy) string {
return field + ":asc"
}
-// nonFuzzyWorkaround is needed as meilisearch does not have an exact search
-// and you can only change "typo tolerance" per index. So we have to post-filter the results
-// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance
-// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed
-func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) {
+func doubleQuoteKeyword(k string) string {
+ kp := strings.Split(k, " ")
+ parts := 0
+ for i := range kp {
+ part := strings.Trim(kp[i], "\"")
+ if part != "" {
+ kp[parts] = fmt.Sprintf(`"%s"`, part)
+ parts++
+ }
+ }
+ return strings.Join(kp[:parts], " ")
+}
+
+func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
hits := make([]internal.Match, 0, len(searchRes.Hits))
for _, hit := range searchRes.Hits {
hit, ok := hit.(map[string]any)
@@ -259,61 +276,11 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i
return nil, ErrMalformedResponse
}
- if !isFuzzy {
- keyword = strings.ToLower(keyword)
-
- // declare a anon func to check if the title, content or at least one comment contains the keyword
- found, err := func() (bool, error) {
- // check if title match first
- title, ok := hit["title"].(string)
- if !ok {
- return false, ErrMalformedResponse
- } else if strings.Contains(strings.ToLower(title), keyword) {
- return true, nil
- }
-
- // check if content has a match
- content, ok := hit["content"].(string)
- if !ok {
- return false, ErrMalformedResponse
- } else if strings.Contains(strings.ToLower(content), keyword) {
- return true, nil
- }
-
- // now check for each comment if one has a match
- // so we first try to cast and skip if there are no comments
- comments, ok := hit["comments"].([]any)
- if !ok {
- return false, ErrMalformedResponse
- } else if len(comments) == 0 {
- return false, nil
- }
-
- // now we iterate over all and report as soon as we detect one match
- for i := range comments {
- comment, ok := comments[i].(string)
- if !ok {
- return false, ErrMalformedResponse
- }
- if strings.Contains(strings.ToLower(comment), keyword) {
- return true, nil
- }
- }
-
- // we got no match
- return false, nil
- }()
-
- if err != nil {
- return nil, err
- } else if !found {
- continue
- }
- }
issueID, ok := hit["id"].(float64)
if !ok {
return nil, ErrMalformedResponse
}
+
hits = append(hits, internal.Match{
ID: int64(issueID),
})
diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go
index ecce704236..4666df136a 100644
--- a/modules/indexer/issues/meilisearch/meilisearch_test.go
+++ b/modules/indexer/issues/meilisearch/meilisearch_test.go
@@ -53,11 +53,10 @@ func TestMeilisearchIndexer(t *testing.T) {
tests.TestIndexer(t, indexer)
}
-func TestNonFuzzyWorkaround(t *testing.T) {
- // get unexpected return
- _, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{
+func TestConvertHits(t *testing.T) {
+ _, err := convertHits(&meilisearch.SearchResponse{
Hits: []any{"aa", "bb", "cc", "dd"},
- }, "bowling", false)
+ })
assert.ErrorIs(t, err, ErrMalformedResponse)
validResponse := &meilisearch.SearchResponse{
@@ -82,14 +81,15 @@ func TestNonFuzzyWorkaround(t *testing.T) {
},
},
}
-
- // nonFuzzy
- hits, err := nonFuzzyWorkaround(validResponse, "bowling", false)
- assert.NoError(t, err)
- assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits)
-
- // fuzzy
- hits, err = nonFuzzyWorkaround(validResponse, "bowling", true)
+ hits, err := convertHits(validResponse)
assert.NoError(t, err)
assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
}
+
+func TestDoubleQuoteKeyword(t *testing.T) {
+ assert.EqualValues(t, "", doubleQuoteKeyword(""))
+ assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`))
+}