summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase/vellum/regexp/dfa.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/couchbase/vellum/regexp/dfa.go')
-rw-r--r--vendor/github.com/couchbase/vellum/regexp/dfa.go188
1 files changed, 188 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/regexp/dfa.go b/vendor/github.com/couchbase/vellum/regexp/dfa.go
new file mode 100644
index 0000000000..9864606b6a
--- /dev/null
+++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go
@@ -0,0 +1,188 @@
+// Copyright (c) 2017 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package regexp
+
+import (
+ "encoding/binary"
+ "fmt"
+)
+
+// StateLimit is the maximum number of states allowed
+const StateLimit = 10000
+
+// ErrTooManyStates is returned if you attempt to build a Levenshtein
+// automaton which requries too many states.
+var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
+ StateLimit)
+
+type dfaBuilder struct {
+ dfa *dfa
+ cache map[string]int
+ keyBuf []byte
+}
+
+func newDfaBuilder(insts prog) *dfaBuilder {
+ d := &dfaBuilder{
+ dfa: &dfa{
+ insts: insts,
+ states: make([]*state, 0, 16),
+ },
+ cache: make(map[string]int, 1024),
+ }
+ // add 0 state that is invalid
+ d.dfa.states = append(d.dfa.states, &state{
+ next: make([]int, 256),
+ match: false,
+ })
+ return d
+}
+
+func (d *dfaBuilder) build() (*dfa, error) {
+ cur := newSparseSet(uint(len(d.dfa.insts)))
+ next := newSparseSet(uint(len(d.dfa.insts)))
+
+ d.dfa.add(cur, 0)
+ states := intStack{d.cachedState(cur)}
+ seen := make(map[int]struct{})
+ var s int
+ states, s = states.Pop()
+ for s != 0 {
+ for b := 0; b < 256; b++ {
+ ns := d.runState(cur, next, s, byte(b))
+ if ns != 0 {
+ if _, ok := seen[ns]; !ok {
+ seen[ns] = struct{}{}
+ states = states.Push(ns)
+ }
+ }
+ if len(d.dfa.states) > StateLimit {
+ return nil, ErrTooManyStates
+ }
+ }
+ states, s = states.Pop()
+ }
+ return d.dfa, nil
+}
+
+func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
+ cur.Clear()
+ for _, ip := range d.dfa.states[state].insts {
+ cur.Add(ip)
+ }
+ d.dfa.run(cur, next, b)
+ nextState := d.cachedState(next)
+ d.dfa.states[state].next[b] = nextState
+ return nextState
+}
+
+func instsKey(insts []uint, buf []byte) []byte {
+ if cap(buf) < 8*len(insts) {
+ buf = make([]byte, 8*len(insts))
+ } else {
+ buf = buf[0 : 8*len(insts)]
+ }
+ for i, inst := range insts {
+ binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst))
+ }
+ return buf
+}
+
+func (d *dfaBuilder) cachedState(set *sparseSet) int {
+ var insts []uint
+ var isMatch bool
+ for i := uint(0); i < uint(set.Len()); i++ {
+ ip := set.Get(i)
+ switch d.dfa.insts[ip].op {
+ case OpRange:
+ insts = append(insts, ip)
+ case OpMatch:
+ isMatch = true
+ insts = append(insts, ip)
+ }
+ }
+ if len(insts) == 0 {
+ return 0
+ }
+ d.keyBuf = instsKey(insts, d.keyBuf)
+ v, ok := d.cache[string(d.keyBuf)]
+ if ok {
+ return v
+ }
+ d.dfa.states = append(d.dfa.states, &state{
+ insts: insts,
+ next: make([]int, 256),
+ match: isMatch,
+ })
+ newV := len(d.dfa.states) - 1
+ d.cache[string(d.keyBuf)] = newV
+ return newV
+}
+
+type dfa struct {
+ insts prog
+ states []*state
+}
+
+func (d *dfa) add(set *sparseSet, ip uint) {
+ if set.Contains(ip) {
+ return
+ }
+ set.Add(ip)
+ switch d.insts[ip].op {
+ case OpJmp:
+ d.add(set, d.insts[ip].to)
+ case OpSplit:
+ d.add(set, d.insts[ip].splitA)
+ d.add(set, d.insts[ip].splitB)
+ }
+}
+
+func (d *dfa) run(from, to *sparseSet, b byte) bool {
+ to.Clear()
+ var isMatch bool
+ for i := uint(0); i < uint(from.Len()); i++ {
+ ip := from.Get(i)
+ switch d.insts[ip].op {
+ case OpMatch:
+ isMatch = true
+ case OpRange:
+ if d.insts[ip].rangeStart <= b &&
+ b <= d.insts[ip].rangeEnd {
+ d.add(to, ip+1)
+ }
+ }
+ }
+ return isMatch
+}
+
+type state struct {
+ insts []uint
+ next []int
+ match bool
+}
+
+type intStack []int
+
+func (s intStack) Push(v int) intStack {
+ return append(s, v)
+}
+
+func (s intStack) Pop() (intStack, int) {
+ l := len(s)
+ if l < 1 {
+ return s, 0
+ }
+ return s[:l-1], s[l-1]
+}