diff options
Diffstat (limited to 'vendor/github.com/couchbase/vellum/regexp/regexp.go')
-rw-r--r-- | vendor/github.com/couchbase/vellum/regexp/regexp.go | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/vendor/github.com/couchbase/vellum/regexp/regexp.go b/vendor/github.com/couchbase/vellum/regexp/regexp.go new file mode 100644 index 0000000000..ed0e7823e1 --- /dev/null +++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go @@ -0,0 +1,113 @@ +// Copyright (c) 2017 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package regexp + +import ( + "fmt" + "regexp/syntax" +) + +// ErrNoEmpty returned when "zero width assertions" are used +var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed") + +// ErrNoWordBoundary returned when word boundaries are used +var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed") + +// ErrNoBytes returned when byte literals are used +var ErrNoBytes = fmt.Errorf("byte literals are not allowed") + +// ErrNoLazy returned when lazy quantifiers are used +var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed") + +// ErrCompiledTooBig returned when regular expression parses into +// too many instructions +var ErrCompiledTooBig = fmt.Errorf("too many instructions") + +// Regexp implements the vellum.Automaton interface for matcing a user +// specified regular expression. +type Regexp struct { + orig string + dfa *dfa +} + +// NewRegexp creates a new Regular Expression automaton with the specified +// expression. By default it is limited to approximately 10MB for the +// compiled finite state automaton. If this size is exceeded, +// ErrCompiledTooBig will be returned. +func New(expr string) (*Regexp, error) { + return NewWithLimit(expr, 10*(1<<20)) +} + +// NewRegexpWithLimit creates a new Regular Expression automaton with +// the specified expression. The size of the compiled finite state +// automaton exceeds the user specified size, ErrCompiledTooBig will be +// returned. +func NewWithLimit(expr string, size uint) (*Regexp, error) { + parsed, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return nil, err + } + compiler := newCompiler(size) + insts, err := compiler.compile(parsed) + if err != nil { + return nil, err + } + dfaBuilder := newDfaBuilder(insts) + dfa, err := dfaBuilder.build() + if err != nil { + return nil, err + } + return &Regexp{ + orig: expr, + dfa: dfa, + }, nil +} + +// Start returns the start state of this automaton. +func (r *Regexp) Start() int { + return 1 +} + +// IsMatch returns if the specified state is a matching state. +func (r *Regexp) IsMatch(s int) bool { + if s < len(r.dfa.states) { + return r.dfa.states[s].match + } + return false +} + +// CanMatch returns if the specified state can ever transition to a matching +// state. +func (r *Regexp) CanMatch(s int) bool { + if s < len(r.dfa.states) && s > 0 { + return true + } + return false +} + +// WillAlwaysMatch returns if the specified state will always end in a +// matching state. +func (r *Regexp) WillAlwaysMatch(int) bool { + return false +} + +// Accept returns the new state, resulting from the transite byte b +// when currently in the state s. +func (r *Regexp) Accept(s int, b byte) int { + if s < len(r.dfa.states) { + return r.dfa.states[s].next[b] + } + return 0 +} |