aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/couchbase/vellum/regexp/regexp.go
blob: ed0e7823e101b551b6e7061d82b8e46fc620b6c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//  Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// 		http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package regexp

import (
	"fmt"
	"regexp/syntax"
)

// ErrNoEmpty returned when "zero width assertions" are used
var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed")

// ErrNoWordBoundary returned when word boundaries are used
var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed")

// ErrNoBytes returned when byte literals are used
var ErrNoBytes = fmt.Errorf("byte literals are not allowed")

// ErrNoLazy returned when lazy quantifiers are used
var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")

// ErrCompiledTooBig returned when regular expression parses into
// too many instructions
var ErrCompiledTooBig = fmt.Errorf("too many instructions")

// Regexp implements the vellum.Automaton interface for matcing a user
// specified regular expression.
type Regexp struct {
	orig string
	dfa  *dfa
}

// NewRegexp creates a new Regular Expression automaton with the specified
// expression.  By default it is limited to approximately 10MB for the
// compiled finite state automaton.  If this size is exceeded,
// ErrCompiledTooBig will be returned.
func New(expr string) (*Regexp, error) {
	return NewWithLimit(expr, 10*(1<<20))
}

// NewRegexpWithLimit creates a new Regular Expression automaton with
// the specified expression.  The size of the compiled finite state
// automaton exceeds the user specified size,  ErrCompiledTooBig will be
// returned.
func NewWithLimit(expr string, size uint) (*Regexp, error) {
	parsed, err := syntax.Parse(expr, syntax.Perl)
	if err != nil {
		return nil, err
	}
	compiler := newCompiler(size)
	insts, err := compiler.compile(parsed)
	if err != nil {
		return nil, err
	}
	dfaBuilder := newDfaBuilder(insts)
	dfa, err := dfaBuilder.build()
	if err != nil {
		return nil, err
	}
	return &Regexp{
		orig: expr,
		dfa:  dfa,
	}, nil
}

// Start returns the start state of this automaton.
func (r *Regexp) Start() int {
	return 1
}

// IsMatch returns if the specified state is a matching state.
func (r *Regexp) IsMatch(s int) bool {
	if s < len(r.dfa.states) {
		return r.dfa.states[s].match
	}
	return false
}

// CanMatch returns if the specified state can ever transition to a matching
// state.
func (r *Regexp) CanMatch(s int) bool {
	if s < len(r.dfa.states) && s > 0 {
		return true
	}
	return false
}

// WillAlwaysMatch returns if the specified state will always end in a
// matching state.
func (r *Regexp) WillAlwaysMatch(int) bool {
	return false
}

// Accept returns the new state, resulting from the transite byte b
// when currently in the state s.
func (r *Regexp) Accept(s int, b byte) int {
	if s < len(r.dfa.states) {
		return r.dfa.states[s].next[b]
	}
	return 0
}