Fixes #22714 ### Changes: 1. Add a token filter which named "camelCase" between custom unicode token filter and "to_lower" token filter when add custom analyzer. ### Notice: If users want this feature to work, they should delete folder under {giteaPath}/data/indexers and restart application. Then application will create a new IndexMapping. ### Screenshots: ![image](https://user-images.githubusercontent.com/33891828/217715692-c18c41f2-57a1-4727-861c-470935c8e0c8.png) ### Others: I originally attempted to give users the ability to configure the "token_filters" in the "app.ini" file. But I found that if users does not strictly follow a right order to register "token_filters", they won't get the expected results. I think it is difficult to ask users to do this. So I finally give up this idea. --------- Co-authored-by: Jason Song <i@wolfogre.com> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>tags/v1.19.0-rc0
@@ -27,6 +27,7 @@ import ( | |||
"github.com/blevesearch/bleve/v2" | |||
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" | |||
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" | |||
"github.com/blevesearch/bleve/v2/analysis/token/camelcase" | |||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" | |||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" | |||
@@ -107,7 +108,7 @@ func (d *RepoIndexerData) Type() string { | |||
const ( | |||
repoIndexerAnalyzer = "repoIndexerAnalyzer" | |||
repoIndexerDocType = "repoIndexerDocType" | |||
repoIndexerLatestVersion = 5 | |||
repoIndexerLatestVersion = 6 | |||
) | |||
// createBleveIndexer create a bleve repo indexer if one does not already exist | |||
@@ -138,7 +139,7 @@ func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) { | |||
"type": analyzer_custom.Name, | |||
"char_filters": []string{}, | |||
"tokenizer": unicode.Name, | |||
"token_filters": []string{unicodeNormalizeName, lowercase.Name}, | |||
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, | |||
}); err != nil { | |||
return nil, err | |||
} |
@@ -15,6 +15,7 @@ import ( | |||
"github.com/blevesearch/bleve/v2" | |||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom" | |||
"github.com/blevesearch/bleve/v2/analysis/token/camelcase" | |||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" | |||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" | |||
@@ -27,7 +28,7 @@ import ( | |||
const ( | |||
issueIndexerAnalyzer = "issueIndexer" | |||
issueIndexerDocType = "issueIndexerDocType" | |||
issueIndexerLatestVersion = 1 | |||
issueIndexerLatestVersion = 2 | |||
) | |||
// indexerID a bleve-compatible unique identifier for an integer id | |||
@@ -134,7 +135,7 @@ func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) { | |||
"type": custom.Name, | |||
"char_filters": []string{}, | |||
"tokenizer": unicode.Name, | |||
"token_filters": []string{unicodeNormalizeName, lowercase.Name}, | |||
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, | |||
}); err != nil { | |||
return nil, err | |||
} |