aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsillyguodong <33891828+sillyguodong@users.noreply.github.com>2023-02-12 18:09:03 +0800
committerGitHub <noreply@github.com>2023-02-12 18:09:03 +0800
commit34399cfd7a618198822ddb2d0052adc39e5568e4 (patch)
tree18ab25cc36eaa8b8c5c99e54f2b9bf49cf21b66b
parent00f695da482be70eaca544472e7f2df4ec5c2f68 (diff)
downloadgitea-34399cfd7a618198822ddb2d0052adc39e5568e4.tar.gz
gitea-34399cfd7a618198822ddb2d0052adc39e5568e4.zip
Make issue and code search support camel case (#22829)
Fixes #22714 ### Changes: 1. Add a token filter which named "camelCase" between custom unicode token filter and "to_lower" token filter when add custom analyzer. ### Notice: If users want this feature to work, they should delete folder under {giteaPath}/data/indexers and restart application. Then application will create a new IndexMapping. ### Screenshots: ![image](https://user-images.githubusercontent.com/33891828/217715692-c18c41f2-57a1-4727-861c-470935c8e0c8.png) ### Others: I originally attempted to give users the ability to configure the "token_filters" in the "app.ini" file. But I found that if users does not strictly follow a right order to register "token_filters", they won't get the expected results. I think it is difficult to ask users to do this. So I finally give up this idea. --------- Co-authored-by: Jason Song <i@wolfogre.com> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
-rw-r--r--modules/indexer/code/bleve.go5
-rw-r--r--modules/indexer/issues/bleve.go5
2 files changed, 6 insertions, 4 deletions
diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go
index 6ee2639d34..e9085f4107 100644
--- a/modules/indexer/code/bleve.go
+++ b/modules/indexer/code/bleve.go
@@ -27,6 +27,7 @@ import (
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
+ "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
@@ -107,7 +108,7 @@ func (d *RepoIndexerData) Type() string {
const (
repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType"
- repoIndexerLatestVersion = 5
+ repoIndexerLatestVersion = 6
)
// createBleveIndexer create a bleve repo indexer if one does not already exist
@@ -138,7 +139,7 @@ func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) {
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
- "token_filters": []string{unicodeNormalizeName, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
}
diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go
index 952bddfb29..e3ef9af5b9 100644
--- a/modules/indexer/issues/bleve.go
+++ b/modules/indexer/issues/bleve.go
@@ -15,6 +15,7 @@ import (
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+ "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
@@ -27,7 +28,7 @@ import (
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
- issueIndexerLatestVersion = 1
+ issueIndexerLatestVersion = 2
)
// indexerID a bleve-compatible unique identifier for an integer id
@@ -134,7 +135,7 @@ func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
- "token_filters": []string{unicodeNormalizeName, lowercase.Name},
+ "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
}