You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

levenshtein.go 2.2KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. // Copyright (c) 2018 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package levenshtein
  15. import "fmt"
  16. // StateLimit is the maximum number of states allowed
  17. const StateLimit = 10000
  18. // ErrTooManyStates is returned if you attempt to build a Levenshtein
  19. // automaton which requires too many states.
  20. var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
  21. StateLimit)
  22. // LevenshteinAutomatonBuilder wraps a precomputed
  23. // datastructure that allows to produce small (but not minimal) DFA.
  24. type LevenshteinAutomatonBuilder struct {
  25. pDfa *ParametricDFA
  26. }
  27. // NewLevenshteinAutomatonBuilder creates a
  28. // reusable, threadsafe Levenshtein automaton builder.
  29. // `maxDistance` - maximum distance considered by the automaton.
  30. // `transposition` - assign a distance of 1 for transposition
  31. //
  32. // Building this automaton builder is computationally intensive.
  33. // While it takes only a few milliseconds for `d=2`, it grows
  34. // exponentially with `d`. It is only reasonable to `d <= 5`.
  35. func NewLevenshteinAutomatonBuilder(maxDistance uint8,
  36. transposition bool) (*LevenshteinAutomatonBuilder, error) {
  37. lnfa := newLevenshtein(maxDistance, transposition)
  38. pdfa, err := fromNfa(lnfa)
  39. if err != nil {
  40. return nil, err
  41. }
  42. return &LevenshteinAutomatonBuilder{pDfa: pdfa}, nil
  43. }
  44. // BuildDfa builds the levenshtein automaton for serving
  45. // queries with a given edit distance.
  46. func (lab *LevenshteinAutomatonBuilder) BuildDfa(query string,
  47. fuzziness uint8) (*DFA, error) {
  48. return lab.pDfa.buildDfa(query, fuzziness, false)
  49. }
  50. // MaxDistance returns the MaxEdit distance supported by the
  51. // LevenshteinAutomatonBuilder builder.
  52. func (lab *LevenshteinAutomatonBuilder) MaxDistance() uint8 {
  53. return lab.pDfa.maxDistance
  54. }