123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- package snowballstem
-
- import (
- "log"
- "strings"
- "unicode/utf8"
- )
-
- // Env represents the Snowball execution environment
- type Env struct {
- current string
- Cursor int
- Limit int
- LimitBackward int
- Bra int
- Ket int
- }
-
- // NewEnv creates a new Snowball execution environment on the provided string
- func NewEnv(val string) *Env {
- return &Env{
- current: val,
- Cursor: 0,
- Limit: len(val),
- LimitBackward: 0,
- Bra: 0,
- Ket: len(val),
- }
- }
-
- func (env *Env) Current() string {
- return env.current
- }
-
- func (env *Env) SetCurrent(s string) {
- env.current = s
- env.Cursor = 0
- env.Limit = len(s)
- env.LimitBackward = 0
- env.Bra = 0
- env.Ket = len(s)
- }
-
- func (env *Env) ReplaceS(bra, ket int, s string) int32 {
- adjustment := int32(len(s)) - (int32(ket) - int32(bra))
- result, _ := splitAt(env.current, bra)
- rsplit := ket
- if ket < bra {
- rsplit = bra
- }
- _, rhs := splitAt(env.current, rsplit)
- result += s
- result += rhs
-
- newLim := int32(env.Limit) + adjustment
- env.Limit = int(newLim)
-
- if env.Cursor >= ket {
- newCur := int32(env.Cursor) + adjustment
- env.Cursor = int(newCur)
- } else if env.Cursor > bra {
- env.Cursor = bra
- }
-
- env.current = result
- return adjustment
- }
-
- func (env *Env) EqS(s string) bool {
- if env.Cursor >= env.Limit {
- return false
- }
-
- if strings.HasPrefix(env.current[env.Cursor:], s) {
- env.Cursor += len(s)
- for !onCharBoundary(env.current, env.Cursor) {
- env.Cursor++
- }
- return true
- }
- return false
- }
-
- func (env *Env) EqSB(s string) bool {
- if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
- return false
- } else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
- !strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
- return false
- } else {
- env.Cursor -= len(s)
- return true
- }
- }
-
- func (env *Env) SliceFrom(s string) bool {
- bra, ket := env.Bra, env.Ket
- env.ReplaceS(bra, ket, s)
- return true
- }
-
- func (env *Env) NextChar() {
- env.Cursor++
- for !onCharBoundary(env.current, env.Cursor) {
- env.Cursor++
- }
- }
-
- func (env *Env) PrevChar() {
- env.Cursor--
- for !onCharBoundary(env.current, env.Cursor) {
- env.Cursor--
- }
- }
-
- func (env *Env) ByteIndexForHop(delta int32) int32 {
- if delta > 0 {
- res := env.Cursor
- for delta > 0 {
- res++
- delta--
- for res <= len(env.current) && !onCharBoundary(env.current, res) {
- res++
- }
- }
- return int32(res)
- } else if delta < 0 {
- res := env.Cursor
- for delta < 0 {
- res--
- delta++
- for res >= 0 && !onCharBoundary(env.current, res) {
- res--
- }
- }
- return int32(res)
- } else {
- return int32(env.Cursor)
- }
- }
-
- func (env *Env) InGrouping(chars []byte, min, max int32) bool {
- if env.Cursor >= env.Limit {
- return false
- }
-
- r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
- if r != utf8.RuneError {
- if r > max || r < min {
- return false
- }
- r -= min
- if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
- return false
- }
- env.NextChar()
- return true
- }
- return false
- }
-
- func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
- if env.Cursor <= env.LimitBackward {
- return false
- }
- env.PrevChar()
- r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
- if r != utf8.RuneError {
- env.NextChar()
- if r > max || r < min {
- return false
- }
- r -= min
- if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
- return false
- }
- env.PrevChar()
- return true
- }
- return false
- }
-
- func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
- if env.Cursor >= env.Limit {
- return false
- }
- r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
- if r != utf8.RuneError {
- if r > max || r < min {
- env.NextChar()
- return true
- }
- r -= min
- if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
- env.NextChar()
- return true
- }
- }
- return false
- }
-
- func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
- if env.Cursor <= env.LimitBackward {
- return false
- }
- env.PrevChar()
- r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
- if r != utf8.RuneError {
- env.NextChar()
- if r > max || r < min {
- env.PrevChar()
- return true
- }
- r -= min
- if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
- env.PrevChar()
- return true
- }
- }
- return false
- }
-
- func (env *Env) SliceDel() bool {
- return env.SliceFrom("")
- }
-
- func (env *Env) Insert(bra, ket int, s string) {
- adjustment := env.ReplaceS(bra, ket, s)
- if bra <= env.Bra {
- env.Bra = int(int32(env.Bra) + adjustment)
- }
- if bra <= env.Ket {
- env.Ket = int(int32(env.Ket) + adjustment)
- }
- }
-
- func (env *Env) SliceTo() string {
- return env.current[env.Bra:env.Ket]
- }
-
- func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
- var i int32
- j := int32(len(amongs))
-
- c := env.Cursor
- l := env.Limit
-
- var commonI, commonJ int
-
- firstKeyInspected := false
- for {
- k := i + ((j - i) >> 1)
- var diff int32
- common := min(commonI, commonJ)
- w := amongs[k]
- for lvar := common; lvar < len(w.Str); lvar++ {
- if c+common == l {
- diff--
- break
- }
- diff = int32(env.current[c+common]) - int32(w.Str[lvar])
- if diff != 0 {
- break
- }
- common++
- }
- if diff < 0 {
- j = k
- commonJ = common
- } else {
- i = k
- commonI = common
- }
- if j-i <= 1 {
- if i > 0 {
- break
- }
- if j == i {
- break
- }
- if firstKeyInspected {
- break
- }
- firstKeyInspected = true
- }
- }
-
- for {
- w := amongs[i]
- if commonI >= len(w.Str) {
- env.Cursor = c + len(w.Str)
- if w.F != nil {
- res := w.F(env, ctx)
- env.Cursor = c + len(w.Str)
- if res {
- return w.B
- }
- } else {
- return w.B
- }
- }
- i = w.A
- if i < 0 {
- return 0
- }
- }
- }
-
- func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
- var i int32
- j := int32(len(amongs))
-
- c := env.Cursor
- lb := env.LimitBackward
-
- var commonI, commonJ int
-
- firstKeyInspected := false
-
- for {
- k := i + ((j - i) >> 1)
- diff := int32(0)
- common := min(commonI, commonJ)
- w := amongs[k]
- for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
- if c-common == lb {
- diff--
- break
- }
- diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
- if diff != 0 {
- break
- }
- // Count up commons. But not one character but the byte width of that char
- common++
- }
- if diff < 0 {
- j = k
- commonJ = common
- } else {
- i = k
- commonI = common
- }
- if j-i <= 1 {
- if i > 0 {
- break
- }
- if j == i {
- break
- }
- if firstKeyInspected {
- break
- }
- firstKeyInspected = true
- }
- }
- for {
- w := amongs[i]
- if commonI >= len(w.Str) {
- env.Cursor = c - len(w.Str)
- if w.F != nil {
- res := w.F(env, ctx)
- env.Cursor = c - len(w.Str)
- if res {
- return w.B
- }
- } else {
- return w.B
- }
- }
- i = w.A
- if i < 0 {
- return 0
- }
- }
- }
-
- func (env *Env) Debug(count, lineNumber int) {
- log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
- }
-
- func (env *Env) Clone() *Env {
- clone := *env
- return &clone
- }
-
- func (env *Env) AssignTo() string {
- return env.Current()
- }
|