You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

custom.go 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package custom
  15. import (
  16. "fmt"
  17. "github.com/blevesearch/bleve/analysis"
  18. "github.com/blevesearch/bleve/registry"
  19. )
  20. const Name = "custom"
  21. func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
  22. var err error
  23. var charFilters []analysis.CharFilter
  24. charFiltersValue, ok := config["char_filters"]
  25. if ok {
  26. switch charFiltersValue := charFiltersValue.(type) {
  27. case []string:
  28. charFilters, err = getCharFilters(charFiltersValue, cache)
  29. if err != nil {
  30. return nil, err
  31. }
  32. case []interface{}:
  33. charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
  34. if err != nil {
  35. return nil, err
  36. }
  37. charFilters, err = getCharFilters(charFiltersNames, cache)
  38. if err != nil {
  39. return nil, err
  40. }
  41. default:
  42. return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
  43. }
  44. }
  45. var tokenizerName string
  46. tokenizerValue, ok := config["tokenizer"]
  47. if ok {
  48. tokenizerName, ok = tokenizerValue.(string)
  49. if !ok {
  50. return nil, fmt.Errorf("must specify tokenizer as string")
  51. }
  52. } else {
  53. return nil, fmt.Errorf("must specify tokenizer")
  54. }
  55. tokenizer, err := cache.TokenizerNamed(tokenizerName)
  56. if err != nil {
  57. return nil, err
  58. }
  59. var tokenFilters []analysis.TokenFilter
  60. tokenFiltersValue, ok := config["token_filters"]
  61. if ok {
  62. switch tokenFiltersValue := tokenFiltersValue.(type) {
  63. case []string:
  64. tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
  65. if err != nil {
  66. return nil, err
  67. }
  68. case []interface{}:
  69. tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
  70. if err != nil {
  71. return nil, err
  72. }
  73. tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
  74. if err != nil {
  75. return nil, err
  76. }
  77. default:
  78. return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
  79. }
  80. }
  81. rv := analysis.Analyzer{
  82. Tokenizer: tokenizer,
  83. }
  84. if charFilters != nil {
  85. rv.CharFilters = charFilters
  86. }
  87. if tokenFilters != nil {
  88. rv.TokenFilters = tokenFilters
  89. }
  90. return &rv, nil
  91. }
  92. func init() {
  93. registry.RegisterAnalyzer(Name, AnalyzerConstructor)
  94. }
  95. func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
  96. charFilters := make([]analysis.CharFilter, len(charFilterNames))
  97. for i, charFilterName := range charFilterNames {
  98. charFilter, err := cache.CharFilterNamed(charFilterName)
  99. if err != nil {
  100. return nil, err
  101. }
  102. charFilters[i] = charFilter
  103. }
  104. return charFilters, nil
  105. }
  106. func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
  107. tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
  108. for i, tokenFilterName := range tokenFilterNames {
  109. tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
  110. if err != nil {
  111. return nil, err
  112. }
  113. tokenFilters[i] = tokenFilter
  114. }
  115. return tokenFilters, nil
  116. }
  117. func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
  118. stringSlice := make([]string, len(interfaceSlice))
  119. for i, interfaceObj := range interfaceSlice {
  120. stringObj, ok := interfaceObj.(string)
  121. if ok {
  122. stringSlice[i] = stringObj
  123. } else {
  124. return nil, fmt.Errorf(objType + " name must be a string")
  125. }
  126. }
  127. return stringSlice, nil
  128. }