summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/blevesearch/snowballstem/env.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/blevesearch/snowballstem/env.go')
-rw-r--r--vendor/github.com/blevesearch/snowballstem/env.go389
1 files changed, 389 insertions, 0 deletions
diff --git a/vendor/github.com/blevesearch/snowballstem/env.go b/vendor/github.com/blevesearch/snowballstem/env.go
new file mode 100644
index 0000000000..6636994ac7
--- /dev/null
+++ b/vendor/github.com/blevesearch/snowballstem/env.go
@@ -0,0 +1,389 @@
+package snowballstem
+
+import (
+ "log"
+ "strings"
+ "unicode/utf8"
+)
+
+// Env represents the Snowball execution environment
+type Env struct {
+ current string
+ Cursor int
+ Limit int
+ LimitBackward int
+ Bra int
+ Ket int
+}
+
+// NewEnv creates a new Snowball execution environment on the provided string
+func NewEnv(val string) *Env {
+ return &Env{
+ current: val,
+ Cursor: 0,
+ Limit: len(val),
+ LimitBackward: 0,
+ Bra: 0,
+ Ket: len(val),
+ }
+}
+
+func (env *Env) Current() string {
+ return env.current
+}
+
+func (env *Env) SetCurrent(s string) {
+ env.current = s
+ env.Cursor = 0
+ env.Limit = len(s)
+ env.LimitBackward = 0
+ env.Bra = 0
+ env.Ket = len(s)
+}
+
+func (env *Env) ReplaceS(bra, ket int, s string) int32 {
+ adjustment := int32(len(s)) - (int32(ket) - int32(bra))
+ result, _ := splitAt(env.current, bra)
+ rsplit := ket
+ if ket < bra {
+ rsplit = bra
+ }
+ _, rhs := splitAt(env.current, rsplit)
+ result += s
+ result += rhs
+
+ newLim := int32(env.Limit) + adjustment
+ env.Limit = int(newLim)
+
+ if env.Cursor >= ket {
+ newCur := int32(env.Cursor) + adjustment
+ env.Cursor = int(newCur)
+ } else if env.Cursor > bra {
+ env.Cursor = bra
+ }
+
+ env.current = result
+ return adjustment
+}
+
+func (env *Env) EqS(s string) bool {
+ if env.Cursor >= env.Limit {
+ return false
+ }
+
+ if strings.HasPrefix(env.current[env.Cursor:], s) {
+ env.Cursor += len(s)
+ for !onCharBoundary(env.current, env.Cursor) {
+ env.Cursor++
+ }
+ return true
+ }
+ return false
+}
+
+func (env *Env) EqSB(s string) bool {
+ if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
+ return false
+ } else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
+ !strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
+ return false
+ } else {
+ env.Cursor -= len(s)
+ return true
+ }
+}
+
+func (env *Env) SliceFrom(s string) bool {
+ bra, ket := env.Bra, env.Ket
+ env.ReplaceS(bra, ket, s)
+ return true
+}
+
+func (env *Env) NextChar() {
+ env.Cursor++
+ for !onCharBoundary(env.current, env.Cursor) {
+ env.Cursor++
+ }
+}
+
+func (env *Env) PrevChar() {
+ env.Cursor--
+ for !onCharBoundary(env.current, env.Cursor) {
+ env.Cursor--
+ }
+}
+
+func (env *Env) ByteIndexForHop(delta int32) int32 {
+ if delta > 0 {
+ res := env.Cursor
+ for delta > 0 {
+ res++
+ delta--
+ for res <= len(env.current) && !onCharBoundary(env.current, res) {
+ res++
+ }
+ }
+ return int32(res)
+ } else if delta < 0 {
+ res := env.Cursor
+ for delta < 0 {
+ res--
+ delta++
+ for res >= 0 && !onCharBoundary(env.current, res) {
+ res--
+ }
+ }
+ return int32(res)
+ } else {
+ return int32(env.Cursor)
+ }
+}
+
+func (env *Env) InGrouping(chars []byte, min, max int32) bool {
+ if env.Cursor >= env.Limit {
+ return false
+ }
+
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
+ if r != utf8.RuneError {
+ if r > max || r < min {
+ return false
+ }
+ r -= min
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
+ return false
+ }
+ env.NextChar()
+ return true
+ }
+ return false
+}
+
+func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
+ if env.Cursor <= env.LimitBackward {
+ return false
+ }
+ env.PrevChar()
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
+ if r != utf8.RuneError {
+ env.NextChar()
+ if r > max || r < min {
+ return false
+ }
+ r -= min
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
+ return false
+ }
+ env.PrevChar()
+ return true
+ }
+ return false
+}
+
+func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
+ if env.Cursor >= env.Limit {
+ return false
+ }
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
+ if r != utf8.RuneError {
+ if r > max || r < min {
+ env.NextChar()
+ return true
+ }
+ r -= min
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
+ env.NextChar()
+ return true
+ }
+ }
+ return false
+}
+
+func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
+ if env.Cursor <= env.LimitBackward {
+ return false
+ }
+ env.PrevChar()
+ r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
+ if r != utf8.RuneError {
+ env.NextChar()
+ if r > max || r < min {
+ env.PrevChar()
+ return true
+ }
+ r -= min
+ if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
+ env.PrevChar()
+ return true
+ }
+ }
+ return false
+}
+
+func (env *Env) SliceDel() bool {
+ return env.SliceFrom("")
+}
+
+func (env *Env) Insert(bra, ket int, s string) {
+ adjustment := env.ReplaceS(bra, ket, s)
+ if bra <= env.Bra {
+ env.Bra = int(int32(env.Bra) + adjustment)
+ }
+ if bra <= env.Ket {
+ env.Ket = int(int32(env.Ket) + adjustment)
+ }
+}
+
+func (env *Env) SliceTo() string {
+ return env.current[env.Bra:env.Ket]
+}
+
+func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
+ var i int32
+ j := int32(len(amongs))
+
+ c := env.Cursor
+ l := env.Limit
+
+ var commonI, commonJ int
+
+ firstKeyInspected := false
+ for {
+ k := i + ((j - i) >> 1)
+ var diff int32
+ common := min(commonI, commonJ)
+ w := amongs[k]
+ for lvar := common; lvar < len(w.Str); lvar++ {
+ if c+common == l {
+ diff--
+ break
+ }
+ diff = int32(env.current[c+common]) - int32(w.Str[lvar])
+ if diff != 0 {
+ break
+ }
+ common++
+ }
+ if diff < 0 {
+ j = k
+ commonJ = common
+ } else {
+ i = k
+ commonI = common
+ }
+ if j-i <= 1 {
+ if i > 0 {
+ break
+ }
+ if j == i {
+ break
+ }
+ if firstKeyInspected {
+ break
+ }
+ firstKeyInspected = true
+ }
+ }
+
+ for {
+ w := amongs[i]
+ if commonI >= len(w.Str) {
+ env.Cursor = c + len(w.Str)
+ if w.F != nil {
+ res := w.F(env, ctx)
+ env.Cursor = c + len(w.Str)
+ if res {
+ return w.B
+ }
+ } else {
+ return w.B
+ }
+ }
+ i = w.A
+ if i < 0 {
+ return 0
+ }
+ }
+}
+
+func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
+ var i int32
+ j := int32(len(amongs))
+
+ c := env.Cursor
+ lb := env.LimitBackward
+
+ var commonI, commonJ int
+
+ firstKeyInspected := false
+
+ for {
+ k := i + ((j - i) >> 1)
+ diff := int32(0)
+ common := min(commonI, commonJ)
+ w := amongs[k]
+ for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
+ if c-common == lb {
+ diff--
+ break
+ }
+ diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
+ if diff != 0 {
+ break
+ }
+ // Count up commons. But not one character but the byte width of that char
+ common++
+ }
+ if diff < 0 {
+ j = k
+ commonJ = common
+ } else {
+ i = k
+ commonI = common
+ }
+ if j-i <= 1 {
+ if i > 0 {
+ break
+ }
+ if j == i {
+ break
+ }
+ if firstKeyInspected {
+ break
+ }
+ firstKeyInspected = true
+ }
+ }
+ for {
+ w := amongs[i]
+ if commonI >= len(w.Str) {
+ env.Cursor = c - len(w.Str)
+ if w.F != nil {
+ res := w.F(env, ctx)
+ env.Cursor = c - len(w.Str)
+ if res {
+ return w.B
+ }
+ } else {
+ return w.B
+ }
+ }
+ i = w.A
+ if i < 0 {
+ return 0
+ }
+ }
+}
+
+func (env *Env) Debug(count, lineNumber int) {
+ log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
+}
+
+func (env *Env) Clone() *Env {
+ clone := *env
+ return &clone
+}
+
+func (env *Env) AssignTo() string {
+ return env.Current()
+}