You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stemmer_en_snowball.go 1.4KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. // Copyright (c) 2020 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package en
  15. import (
  16. "github.com/blevesearch/bleve/analysis"
  17. "github.com/blevesearch/bleve/registry"
  18. "github.com/blevesearch/snowballstem"
  19. "github.com/blevesearch/snowballstem/english"
  20. )
  21. const SnowballStemmerName = "stemmer_en_snowball"
  22. type EnglishStemmerFilter struct {
  23. }
  24. func NewEnglishStemmerFilter() *EnglishStemmerFilter {
  25. return &EnglishStemmerFilter{}
  26. }
  27. func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
  28. for _, token := range input {
  29. env := snowballstem.NewEnv(string(token.Term))
  30. english.Stem(env)
  31. token.Term = []byte(env.Current())
  32. }
  33. return input
  34. }
  35. func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
  36. return NewEnglishStemmerFilter(), nil
  37. }
  38. func init() {
  39. registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
  40. }