Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

unique.go 1.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. // Copyright (c) 2018 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package unique
  15. import (
  16. "github.com/blevesearch/bleve/analysis"
  17. "github.com/blevesearch/bleve/registry"
  18. )
  19. const Name = "unique"
  20. // UniqueTermFilter retains only the tokens which mark the first occurence of
  21. // a term. Tokens whose term appears in a preceding token are dropped.
  22. type UniqueTermFilter struct{}
  23. func NewUniqueTermFilter() *UniqueTermFilter {
  24. return &UniqueTermFilter{}
  25. }
  26. func (f *UniqueTermFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
  27. encounteredTerms := make(map[string]struct{}, len(input)/4)
  28. j := 0
  29. for _, token := range input {
  30. term := string(token.Term)
  31. if _, ok := encounteredTerms[term]; ok {
  32. continue
  33. }
  34. encounteredTerms[term] = struct{}{}
  35. input[j] = token
  36. j++
  37. }
  38. return input[:j]
  39. }
  40. func UniqueTermFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
  41. return NewUniqueTermFilter(), nil
  42. }
  43. func init() {
  44. registry.RegisterTokenFilter(Name, UniqueTermFilterConstructor)
  45. }