You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

search_fuzzy.go 2.1KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "github.com/blevesearch/bleve/index"
  17. "github.com/blevesearch/bleve/search"
  18. )
  19. func NewFuzzySearcher(indexReader index.IndexReader, term string,
  20. prefix, fuzziness int, field string, boost float64,
  21. options search.SearcherOptions) (search.Searcher, error) {
  22. // Note: we don't byte slice the term for a prefix because of runes.
  23. prefixTerm := ""
  24. for i, r := range term {
  25. if i < prefix {
  26. prefixTerm += string(r)
  27. } else {
  28. break
  29. }
  30. }
  31. candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
  32. field, prefixTerm)
  33. if err != nil {
  34. return nil, err
  35. }
  36. return NewMultiTermSearcher(indexReader, candidateTerms, field,
  37. boost, options, true)
  38. }
  39. func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
  40. fuzziness int, field, prefixTerm string) (rv []string, err error) {
  41. rv = make([]string, 0)
  42. var fieldDict index.FieldDict
  43. if len(prefixTerm) > 0 {
  44. fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
  45. } else {
  46. fieldDict, err = indexReader.FieldDict(field)
  47. }
  48. defer func() {
  49. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  50. err = cerr
  51. }
  52. }()
  53. // enumerate terms and check levenshtein distance
  54. tfd, err := fieldDict.Next()
  55. for err == nil && tfd != nil {
  56. ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness)
  57. if !exceeded && ld <= fuzziness {
  58. rv = append(rv, tfd.Term)
  59. if tooManyClauses(len(rv)) {
  60. return rv, tooManyClausesErr()
  61. }
  62. }
  63. tfd, err = fieldDict.Next()
  64. }
  65. return rv, err
  66. }