summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go')
-rw-r--r--vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go125
1 files changed, 125 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
new file mode 100644
index 0000000000..4bf64fef2e
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
@@ -0,0 +1,125 @@
+// Copyright (c) 2018 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package levenshtein2
+
+import (
+ "fmt"
+ "sort"
+ "unicode/utf8"
+)
+
+type FullCharacteristicVector []uint32
+
+func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
+ bucketID := offset / 32
+ align := offset - bucketID*32
+ if align == 0 {
+ return fcv[bucketID] & mask
+ }
+ left := fcv[bucketID] >> align
+ right := fcv[bucketID+1] << (32 - align)
+ return (left | right) & mask
+}
+
+type tuple struct {
+ char rune
+ fcv FullCharacteristicVector
+}
+
+type sortRunes []rune
+
+func (s sortRunes) Less(i, j int) bool {
+ return s[i] < s[j]
+}
+
+func (s sortRunes) Swap(i, j int) {
+ s[i], s[j] = s[j], s[i]
+}
+
+func (s sortRunes) Len() int {
+ return len(s)
+}
+
+func sortRune(r []rune) []rune {
+ sort.Sort(sortRunes(r))
+ return r
+}
+
+type Alphabet struct {
+ charset []tuple
+ index uint32
+}
+
+func (a *Alphabet) resetNext() {
+ a.index = 0
+}
+
+func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
+ if int(a.index) >= len(a.charset) {
+ return 0, nil, fmt.Errorf("eof")
+ }
+
+ rv := a.charset[a.index]
+ a.index++
+ return rv.char, rv.fcv, nil
+}
+
+func dedupe(in string) string {
+ lookUp := make(map[rune]struct{}, len(in))
+ var rv string
+ for len(in) > 0 {
+ r, size := utf8.DecodeRuneInString(in)
+ in = in[size:]
+ if _, ok := lookUp[r]; !ok {
+ rv += string(r)
+ lookUp[r] = struct{}{}
+ }
+ }
+ return rv
+}
+
+func queryChars(qChars string) Alphabet {
+ chars := dedupe(qChars)
+ inChars := sortRune([]rune(chars))
+ charsets := make([]tuple, 0, len(inChars))
+
+ for _, c := range inChars {
+ tempChars := qChars
+ var bits []uint32
+ for len(tempChars) > 0 {
+ var chunk string
+ if len(tempChars) > 32 {
+ chunk = tempChars[0:32]
+ tempChars = tempChars[32:]
+ } else {
+ chunk = tempChars
+ tempChars = tempChars[:0]
+ }
+
+ chunkBits := uint32(0)
+ bit := uint32(1)
+ for _, chr := range chunk {
+ if chr == c {
+ chunkBits |= bit
+ }
+ bit <<= 1
+ }
+ bits = append(bits, chunkBits)
+ }
+ bits = append(bits, 0)
+ charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
+ }
+ return Alphabet{charset: charsets}
+}