summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/dlclark
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/dlclark')
-rw-r--r--vendor/github.com/dlclark/regexp2/.gitignore27
-rw-r--r--vendor/github.com/dlclark/regexp2/.travis.yml5
-rw-r--r--vendor/github.com/dlclark/regexp2/ATTRIB133
-rw-r--r--vendor/github.com/dlclark/regexp2/LICENSE21
-rw-r--r--vendor/github.com/dlclark/regexp2/README.md82
-rw-r--r--vendor/github.com/dlclark/regexp2/match.go347
-rw-r--r--vendor/github.com/dlclark/regexp2/regexp.go358
-rw-r--r--vendor/github.com/dlclark/regexp2/replace.go177
-rw-r--r--vendor/github.com/dlclark/regexp2/runner.go1621
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/charclass.go854
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/code.go274
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/escape.go94
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/fuzz.go20
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/parser.go2202
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/prefix.go896
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/replacerdata.go87
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/tree.go654
-rw-r--r--vendor/github.com/dlclark/regexp2/syntax/writer.go500
-rw-r--r--vendor/github.com/dlclark/regexp2/testoutput17061
19 files changed, 15413 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/.gitignore b/vendor/github.com/dlclark/regexp2/.gitignore
new file mode 100644
index 0000000000..fb844c330c
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/.gitignore
@@ -0,0 +1,27 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+*.out
+
+.DS_Store
diff --git a/vendor/github.com/dlclark/regexp2/.travis.yml b/vendor/github.com/dlclark/regexp2/.travis.yml
new file mode 100644
index 0000000000..a24aededa9
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/.travis.yml
@@ -0,0 +1,5 @@
+language: go
+
+go:
+ - 1.5
+ - tip \ No newline at end of file
diff --git a/vendor/github.com/dlclark/regexp2/ATTRIB b/vendor/github.com/dlclark/regexp2/ATTRIB
new file mode 100644
index 0000000000..cdf4560b9e
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/ATTRIB
@@ -0,0 +1,133 @@
+============
+These pieces of code were ported from dotnet/corefx:
+
+syntax/charclass.go (from RegexCharClass.cs): ported to use the built-in Go unicode classes. Canonicalize is
+ a direct port, but most of the other code required large changes because the C# implementation
+ used a string to represent the CharSet data structure and I cleaned that up in my implementation.
+
+syntax/code.go (from RegexCode.cs): ported literally with various cleanups and layout to make it more Go-ish.
+
+syntax/escape.go (from RegexParser.cs): ported Escape method and added some optimizations. Unescape is inspired by
+ the C# implementation but couldn't be directly ported because of the lack of do-while syntax in Go.
+
+syntax/parser.go (from RegexpParser.cs and RegexOptions.cs): ported parser struct and associated methods as
+ literally as possible. Several language differences required changes. E.g. lack pre/post-fix increments as
+ expressions, lack of do-while loops, lack of overloads, etc.
+
+syntax/prefix.go (from RegexFCD.cs and RegexBoyerMoore.cs): ported as literally as possible and added support
+ for unicode chars that are longer than the 16-bit char in C# for the 32-bit rune in Go.
+
+syntax/replacerdata.go (from RegexReplacement.cs): conceptually ported and re-organized to handle differences
+ in charclass implementation, and fix odd code layout between RegexParser.cs, Regex.cs, and RegexReplacement.cs.
+
+syntax/tree.go (from RegexTree.cs and RegexNode.cs): ported literally as possible.
+
+syntax/writer.go (from RegexWriter.cs): ported literally with minor changes to make it more Go-ish.
+
+match.go (from RegexMatch.cs): ported, simplified, and changed to handle Go's lack of inheritence.
+
+regexp.go (from Regex.cs and RegexOptions.cs): conceptually serves the same "starting point", but is simplified
+ and changed to handle differences in C# strings and Go strings/runes.
+
+replace.go (from RegexReplacement.cs): ported closely and then cleaned up to combine the MatchEvaluator and
+ simple string replace implementations.
+
+runner.go (from RegexRunner.cs): ported literally as possible.
+
+regexp_test.go (from CaptureTests.cs and GroupNamesAndNumbers.cs): conceptually ported, but the code was
+ manually structured like Go tests.
+
+replace_test.go (from RegexReplaceStringTest0.cs): conceptually ported
+
+rtl_test.go (from RightToLeft.cs): conceptually ported
+---
+dotnet/corefx was released under this license:
+
+The MIT License (MIT)
+
+Copyright (c) Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+============
+These pieces of code are copied from the Go framework:
+
+- The overall directory structure of regexp2 was inspired by the Go runtime regexp package.
+- The optimization in the escape method of syntax/escape.go is from the Go runtime QuoteMeta() func in regexp/regexp.go
+- The method signatures in regexp.go are designed to match the Go framework regexp methods closely
+- func regexp2.MustCompile and func quote are almost identifical to the regexp package versions
+- BenchmarkMatch* and TestProgramTooLong* funcs in regexp_performance_test.go were copied from the framework
+ regexp/exec_test.go
+---
+The Go framework was released under this license:
+
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+============
+Some test data were gathered from the Mono project.
+
+regexp_mono_test.go: ported from https://github.com/mono/mono/blob/master/mcs/class/System/Test/System.Text.RegularExpressions/PerlTrials.cs
+---
+Mono tests released under this license:
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/vendor/github.com/dlclark/regexp2/LICENSE b/vendor/github.com/dlclark/regexp2/LICENSE
new file mode 100644
index 0000000000..fe83dfdc92
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) Doug Clark
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/dlclark/regexp2/README.md b/vendor/github.com/dlclark/regexp2/README.md
new file mode 100644
index 0000000000..ec7b5abed4
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/README.md
@@ -0,0 +1,82 @@
+# regexp2 - full featured regular expressions for Go
+Regexp2 is a feature-rich RegExp engine for Go. It doesn't have constant time guarantees like the built-in `regexp` package, but it allows backtracking and is compatible with Perl5 and .NET. You'll likely be better off with the RE2 engine from the `regexp` package and should only use this if you need to write very complex patterns or require compatibility with .NET.
+
+## Basis of the engine
+The engine is ported from the .NET framework's System.Text.RegularExpressions.Regex engine. That engine was open sourced in 2015 under the MIT license. There are some fundamental differences between .NET strings and Go strings that required a bit of borrowing from the Go framework regex engine as well. I cleaned up a couple of the dirtier bits during the port (regexcharclass.cs was terrible), but the parse tree, code emmitted, and therefore patterns matched should be identical.
+
+## Installing
+This is a go-gettable library, so install is easy:
+
+ go get github.com/dlclark/regexp2/...
+
+## Usage
+Usage is similar to the Go `regexp` package. Just like in `regexp`, you start by converting a regex into a state machine via the `Compile` or `MustCompile` methods. They ultimately do the same thing, but `MustCompile` will panic if the regex is invalid. You can then use the provided `Regexp` struct to find matches repeatedly. A `Regexp` struct is safe to use across goroutines.
+
+```go
+re := regexp2.MustCompile(`Your pattern`, 0)
+if isMatch, _ := re.MatchString(`Something to match`); isMatch {
+ //do something
+}
+```
+
+The only error that the `*Match*` methods *should* return is a Timeout if you set the `re.MatchTimeout` field. Any other error is a bug in the `regexp2` package. If you need more details about capture groups in a match then use the `FindStringMatch` method, like so:
+
+```go
+if m, _ := re.FindStringMatch(`Something to match`); m != nil {
+ // the whole match is always group 0
+ fmt.Printf("Group 0: %v\n", m.String())
+
+ // you can get all the groups too
+ gps := m.Groups()
+
+ // a group can be captured multiple times, so each cap is separately addressable
+ fmt.Printf("Group 1, first capture", gps[1].Captures[0].String())
+ fmt.Printf("Group 1, second capture", gps[1].Captures[1].String())
+}
+```
+
+Group 0 is embedded in the Match. Group 0 is an automatically-assigned group that encompasses the whole pattern. This means that `m.String()` is the same as `m.Group.String()` and `m.Groups()[0].String()`
+
+The __last__ capture is embedded in each group, so `g.String()` will return the same thing as `g.Capture.String()` and `g.Captures[len(g.Captures)-1].String()`.
+
+## Compare `regexp` and `regexp2`
+| Category | regexp | regexp2 |
+| --- | --- | --- |
+| Catastrophic backtracking possible | no, constant execution time guarantees | yes, if your pattern is at risk you can use the `re.MatchTimeout` field |
+| Python-style capture groups `(P<name>re)` | yes | no |
+| .NET-style capture groups `(<name>re)` or `('name're)` | no | yes |
+| comments `(?#comment)` | no | yes |
+| branch numbering reset `(?\|a\|b)` | no | no |
+| possessive match `(?>re)` | no | yes |
+| positive lookahead `(?=re)` | no | yes |
+| negative lookahead `(?!re)` | no | yes |
+| positive lookbehind `(?<=re)` | no | yes |
+| negative lookbehind `(?<!re)` | no | yes |
+| back reference `\1` | no | yes |
+| named back reference `\k'name'` | no | yes |
+| named ascii character class `[[:foo:]]`| yes | no |
+| conditionals `((expr)yes\|no)` | no | yes |
+
+## RE2 compatibility mode
+The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2. Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:
+* add support for named ascii character classes (e.g. `[[:foo:]]`)
+* add support for python-style capture groups (e.g. `(P<name>re)`)
+
+```go
+re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
+if isMatch, _ := re.MatchString(`Something to match`); isMatch {
+ //do something
+}
+```
+
+This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
+
+
+## Library features that I'm still working on
+- Regex split
+
+## Potential bugs
+I've run a battery of tests against regexp2 from various sources and found the debug output matches the .NET engine, but .NET and Go handle strings very differently. I've attempted to handle these differences, but most of my testing deals with basic ASCII with a little bit of multi-byte Unicode. There's a chance that there are bugs in the string handling related to character sets with supplementary Unicode chars. Right-to-Left support is coded, but not well tested either.
+
+## Find a bug?
+I'm open to new issues and pull requests with tests if you find something odd!
diff --git a/vendor/github.com/dlclark/regexp2/match.go b/vendor/github.com/dlclark/regexp2/match.go
new file mode 100644
index 0000000000..1871cffe30
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/match.go
@@ -0,0 +1,347 @@
+package regexp2
+
+import (
+ "bytes"
+ "fmt"
+)
+
+// Match is a single regex result match that contains groups and repeated captures
+// -Groups
+// -Capture
+type Match struct {
+ Group //embeded group 0
+
+ regex *Regexp
+ otherGroups []Group
+
+ // input to the match
+ textpos int
+ textstart int
+
+ capcount int
+ caps []int
+ sparseCaps map[int]int
+
+ // output from the match
+ matches [][]int
+ matchcount []int
+
+ // whether we've done any balancing with this match. If we
+ // have done balancing, we'll need to do extra work in Tidy().
+ balancing bool
+}
+
+// Group is an explicit or implit (group 0) matched group within the pattern
+type Group struct {
+ Capture // the last capture of this group is embeded for ease of use
+
+ Name string // group name
+ Captures []Capture // captures of this group
+}
+
+// Capture is a single capture of text within the larger original string
+type Capture struct {
+ // the original string
+ text []rune
+ // the position in the original string where the first character of
+ // captured substring was found.
+ Index int
+ // the length of the captured substring.
+ Length int
+}
+
+// String returns the captured text as a String
+func (c *Capture) String() string {
+ return string(c.text[c.Index : c.Index+c.Length])
+}
+
+// Runes returns the captured text as a rune slice
+func (c *Capture) Runes() []rune {
+ return c.text[c.Index : c.Index+c.Length]
+}
+
+func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
+ m := Match{
+ regex: regex,
+ matchcount: make([]int, capcount),
+ matches: make([][]int, capcount),
+ textstart: startpos,
+ balancing: false,
+ }
+ m.Name = "0"
+ m.text = text
+ m.matches[0] = make([]int, 2)
+ return &m
+}
+
+func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
+ m := newMatch(regex, capcount, text, startpos)
+ m.sparseCaps = caps
+ return m
+}
+
+func (m *Match) reset(text []rune, textstart int) {
+ m.text = text
+ m.textstart = textstart
+ for i := 0; i < len(m.matchcount); i++ {
+ m.matchcount[i] = 0
+ }
+ m.balancing = false
+}
+
+func (m *Match) tidy(textpos int) {
+
+ interval := m.matches[0]
+ m.Index = interval[0]
+ m.Length = interval[1]
+ m.textpos = textpos
+ m.capcount = m.matchcount[0]
+ //copy our root capture to the list
+ m.Group.Captures = []Capture{m.Group.Capture}
+
+ if m.balancing {
+ // The idea here is that we want to compact all of our unbalanced captures. To do that we
+ // use j basically as a count of how many unbalanced captures we have at any given time
+ // (really j is an index, but j/2 is the count). First we skip past all of the real captures
+ // until we find a balance captures. Then we check each subsequent entry. If it's a balance
+ // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
+ // it down to the last free position.
+ for cap := 0; cap < len(m.matchcount); cap++ {
+ limit := m.matchcount[cap] * 2
+ matcharray := m.matches[cap]
+
+ var i, j int
+
+ for i = 0; i < limit; i++ {
+ if matcharray[i] < 0 {
+ break
+ }
+ }
+
+ for j = i; i < limit; i++ {
+ if matcharray[i] < 0 {
+ // skip negative values
+ j--
+ } else {
+ // but if we find something positive (an actual capture), copy it back to the last
+ // unbalanced position.
+ if i != j {
+ matcharray[j] = matcharray[i]
+ }
+ j++
+ }
+ }
+
+ m.matchcount[cap] = j / 2
+ }
+
+ m.balancing = false
+ }
+}
+
+// isMatched tells if a group was matched by capnum
+func (m *Match) isMatched(cap int) bool {
+ return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
+}
+
+// matchIndex returns the index of the last specified matched group by capnum
+func (m *Match) matchIndex(cap int) int {
+ i := m.matches[cap][m.matchcount[cap]*2-2]
+ if i >= 0 {
+ return i
+ }
+
+ return m.matches[cap][-3-i]
+}
+
+// matchLength returns the length of the last specified matched group by capnum
+func (m *Match) matchLength(cap int) int {
+ i := m.matches[cap][m.matchcount[cap]*2-1]
+ if i >= 0 {
+ return i
+ }
+
+ return m.matches[cap][-3-i]
+}
+
+// Nonpublic builder: add a capture to the group specified by "c"
+func (m *Match) addMatch(c, start, l int) {
+
+ if m.matches[c] == nil {
+ m.matches[c] = make([]int, 2)
+ }
+
+ capcount := m.matchcount[c]
+
+ if capcount*2+2 > len(m.matches[c]) {
+ oldmatches := m.matches[c]
+ newmatches := make([]int, capcount*8)
+ copy(newmatches, oldmatches[:capcount*2])
+ m.matches[c] = newmatches
+ }
+
+ m.matches[c][capcount*2] = start
+ m.matches[c][capcount*2+1] = l
+ m.matchcount[c] = capcount + 1
+ //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
+}
+
+// Nonpublic builder: Add a capture to balance the specified group. This is used by the
+// balanced match construct. (?<foo-foo2>...)
+//
+// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
+// However, since we have backtracking, we need to keep track of everything.
+func (m *Match) balanceMatch(c int) {
+ m.balancing = true
+
+ // we'll look at the last capture first
+ capcount := m.matchcount[c]
+ target := capcount*2 - 2
+
+ // first see if it is negative, and therefore is a reference to the next available
+ // capture group for balancing. If it is, we'll reset target to point to that capture.
+ if m.matches[c][target] < 0 {
+ target = -3 - m.matches[c][target]
+ }
+
+ // move back to the previous capture
+ target -= 2
+
+ // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
+ if target >= 0 && m.matches[c][target] < 0 {
+ m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
+ } else {
+ m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
+ }
+}
+
+// Nonpublic builder: removes a group match by capnum
+func (m *Match) removeMatch(c int) {
+ m.matchcount[c]--
+}
+
+// GroupCount returns the number of groups this match has matched
+func (m *Match) GroupCount() int {
+ return len(m.matchcount)
+}
+
+// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
+func (m *Match) GroupByName(name string) *Group {
+ num := m.regex.GroupNumberFromName(name)
+ if num < 0 {
+ return nil
+ }
+ return m.GroupByNumber(num)
+}
+
+// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
+func (m *Match) GroupByNumber(num int) *Group {
+ // check our sparse map
+ if m.sparseCaps != nil {
+ if newNum, ok := m.sparseCaps[num]; ok {
+ num = newNum
+ }
+ }
+ if num >= len(m.matchcount) || num < 0 {
+ return nil
+ }
+
+ if num == 0 {
+ return &m.Group
+ }
+
+ m.populateOtherGroups()
+
+ return &m.otherGroups[num-1]
+}
+
+// Groups returns all the capture groups, starting with group 0 (the full match)
+func (m *Match) Groups() []Group {
+ m.populateOtherGroups()
+ g := make([]Group, len(m.otherGroups)+1)
+ g[0] = m.Group
+ copy(g[1:], m.otherGroups)
+ return g
+}
+
+func (m *Match) populateOtherGroups() {
+ // Construct all the Group objects first time called
+ if m.otherGroups == nil {
+ m.otherGroups = make([]Group, len(m.matchcount)-1)
+ for i := 0; i < len(m.otherGroups); i++ {
+ m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
+ }
+ }
+}
+
+func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
+ c := m.matchcount[groupnum]
+ if c == 0 {
+ return
+ }
+
+ matches := m.matches[groupnum]
+
+ index := matches[(c-1)*2]
+ last := index + matches[(c*2)-1]
+
+ for ; index < last; index++ {
+ buf.WriteRune(m.text[index])
+ }
+}
+
+func newGroup(name string, text []rune, caps []int, capcount int) Group {
+ g := Group{}
+ g.text = text
+ if capcount > 0 {
+ g.Index = caps[(capcount-1)*2]
+ g.Length = caps[(capcount*2)-1]
+ }
+ g.Name = name
+ g.Captures = make([]Capture, capcount)
+ for i := 0; i < capcount; i++ {
+ g.Captures[i] = Capture{
+ text: text,
+ Index: caps[i*2],
+ Length: caps[i*2+1],
+ }
+ }
+ //log.Printf("newGroup! capcount %v, %+v", capcount, g)
+
+ return g
+}
+
+func (m *Match) dump() string {
+ buf := &bytes.Buffer{}
+ buf.WriteRune('\n')
+ if len(m.sparseCaps) > 0 {
+ for k, v := range m.sparseCaps {
+ fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
+ }
+ }
+
+ for i, g := range m.Groups() {
+ fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
+
+ for _, c := range g.Captures {
+ fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
+ }
+ }
+ /*
+ for i := 0; i < len(m.matchcount); i++ {
+ fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
+
+ for j := 0; j < m.matchcount[i]; j++ {
+ text := ""
+
+ if m.matches[i][j*2] >= 0 {
+ start := m.matches[i][j*2]
+ text = m.text[start : start+m.matches[i][j*2+1]]
+ }
+
+ fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
+ }
+ }
+ */
+ return buf.String()
+}
diff --git a/vendor/github.com/dlclark/regexp2/regexp.go b/vendor/github.com/dlclark/regexp2/regexp.go
new file mode 100644
index 0000000000..709e034259
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/regexp.go
@@ -0,0 +1,358 @@
+/*
+Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
+more feature full regex engine behind the scenes.
+
+It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
+You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
+need to write very complex patterns or require compatibility with .NET.
+*/
+package regexp2
+
+import (
+ "errors"
+ "math"
+ "strconv"
+ "sync"
+ "time"
+
+ "github.com/dlclark/regexp2/syntax"
+)
+
+// Default timeout used when running regexp matches -- "forever"
+var DefaultMatchTimeout = time.Duration(math.MaxInt64)
+
+// Regexp is the representation of a compiled regular expression.
+// A Regexp is safe for concurrent use by multiple goroutines.
+type Regexp struct {
+ //timeout when trying to find matches
+ MatchTimeout time.Duration
+
+ // read-only after Compile
+ pattern string // as passed to Compile
+ options RegexOptions // options
+
+ caps map[int]int // capnum->index
+ capnames map[string]int //capture group name -> index
+ capslist []string //sorted list of capture group names
+ capsize int // size of the capture array
+
+ code *syntax.Code // compiled program
+
+ // cache of machines for running regexp
+ muRun sync.Mutex
+ runner []*runner
+}
+
+// Compile parses a regular expression and returns, if successful,
+// a Regexp object that can be used to match against text.
+func Compile(expr string, opt RegexOptions) (*Regexp, error) {
+ // parse it
+ tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
+ if err != nil {
+ return nil, err
+ }
+
+ // translate it to code
+ code, err := syntax.Write(tree)
+ if err != nil {
+ return nil, err
+ }
+
+ // return it
+ return &Regexp{
+ pattern: expr,
+ options: opt,
+ caps: code.Caps,
+ capnames: tree.Capnames,
+ capslist: tree.Caplist,
+ capsize: code.Capsize,
+ code: code,
+ MatchTimeout: DefaultMatchTimeout,
+ }, nil
+}
+
+// MustCompile is like Compile but panics if the expression cannot be parsed.
+// It simplifies safe initialization of global variables holding compiled regular
+// expressions.
+func MustCompile(str string, opt RegexOptions) *Regexp {
+ regexp, error := Compile(str, opt)
+ if error != nil {
+ panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
+ }
+ return regexp
+}
+
+// Escape adds backslashes to any special characters in the input string
+func Escape(input string) string {
+ return syntax.Escape(input)
+}
+
+// Unescape removes any backslashes from previously-escaped special characters in the input string
+func Unescape(input string) (string, error) {
+ return syntax.Unescape(input)
+}
+
+// String returns the source text used to compile the regular expression.
+func (re *Regexp) String() string {
+ return re.pattern
+}
+
+func quote(s string) string {
+ if strconv.CanBackquote(s) {
+ return "`" + s + "`"
+ }
+ return strconv.Quote(s)
+}
+
+// RegexOptions impact the runtime and parsing behavior
+// for each specific regex. They are setable in code as well
+// as in the regex pattern itself.
+type RegexOptions int32
+
+const (
+ None RegexOptions = 0x0
+ IgnoreCase = 0x0001 // "i"
+ Multiline = 0x0002 // "m"
+ ExplicitCapture = 0x0004 // "n"
+ Compiled = 0x0008 // "c"
+ Singleline = 0x0010 // "s"
+ IgnorePatternWhitespace = 0x0020 // "x"
+ RightToLeft = 0x0040 // "r"
+ Debug = 0x0080 // "d"
+ ECMAScript = 0x0100 // "e"
+ RE2 = 0x0200 // RE2 (regexp package) compatibility mode
+)
+
+func (re *Regexp) RightToLeft() bool {
+ return re.options&RightToLeft != 0
+}
+
+func (re *Regexp) Debug() bool {
+ return re.options&Debug != 0
+}
+
+// Replace searches the input string and replaces each match found with the replacement text.
+// Count will limit the number of matches attempted and startAt will allow
+// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
+// Set startAt and count to -1 to go through the whole string
+func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
+ data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
+ if err != nil {
+ return "", err
+ }
+ //TODO: cache ReplacerData
+
+ return replace(re, data, nil, input, startAt, count)
+}
+
+// ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
+// Count will limit the number of matches attempted and startAt will allow
+// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
+// Set startAt and count to -1 to go through the whole string.
+func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
+ return replace(re, nil, evaluator, input, startAt, count)
+}
+
+// FindStringMatch searches the input string for a Regexp match
+func (re *Regexp) FindStringMatch(s string) (*Match, error) {
+ // convert string to runes
+ return re.run(false, -1, getRunes(s))
+}
+
+// FindRunesMatch searches the input rune slice for a Regexp match
+func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
+ return re.run(false, -1, r)
+}
+
+// FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
+func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
+ if startAt > len(s) {
+ return nil, errors.New("startAt must be less than the length of the input string")
+ }
+ r, startAt := re.getRunesAndStart(s, startAt)
+ if startAt == -1 {
+ // we didn't find our start index in the string -- that's a problem
+ return nil, errors.New("startAt must align to the start of a valid rune in the input string")
+ }
+
+ return re.run(false, startAt, r)
+}
+
+// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
+func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
+ return re.run(false, startAt, r)
+}
+
+// FindNextMatch returns the next match in the same input string as the match parameter.
+// Will return nil if there is no next match or if given a nil match.
+func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
+ if m == nil {
+ return nil, nil
+ }
+
+ // If previous match was empty, advance by one before matching to prevent
+ // infinite loop
+ startAt := m.textpos
+ if m.Length == 0 {
+ if m.textpos == len(m.text) {
+ return nil, nil
+ }
+
+ if re.RightToLeft() {
+ startAt--
+ } else {
+ startAt++
+ }
+ }
+ return re.run(false, startAt, m.text)
+}
+
+// MatchString return true if the string matches the regex
+// error will be set if a timeout occurs
+func (re *Regexp) MatchString(s string) (bool, error) {
+ m, err := re.run(true, -1, getRunes(s))
+ if err != nil {
+ return false, err
+ }
+ return m != nil, nil
+}
+
+func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
+ if startAt < 0 {
+ if re.RightToLeft() {
+ r := getRunes(s)
+ return r, len(r)
+ }
+ return getRunes(s), 0
+ }
+ ret := make([]rune, len(s))
+ i := 0
+ runeIdx := -1
+ for strIdx, r := range s {
+ if strIdx == startAt {
+ runeIdx = i
+ }
+ ret[i] = r
+ i++
+ }
+ return ret[:i], runeIdx
+}
+
+func getRunes(s string) []rune {
+ ret := make([]rune, len(s))
+ i := 0
+ for _, r := range s {
+ ret[i] = r
+ i++
+ }
+ return ret[:i]
+}
+
+// MatchRunes return true if the runes matches the regex
+// error will be set if a timeout occurs
+func (re *Regexp) MatchRunes(r []rune) (bool, error) {
+ m, err := re.run(true, -1, r)
+ if err != nil {
+ return false, err
+ }
+ return m != nil, nil
+}
+
+// GetGroupNames Returns the set of strings used to name capturing groups in the expression.
+func (re *Regexp) GetGroupNames() []string {
+ var result []string
+
+ if re.capslist == nil {
+ result = make([]string, re.capsize)
+
+ for i := 0; i < len(result); i++ {
+ result[i] = strconv.Itoa(i)
+ }
+ } else {
+ result = make([]string, len(re.capslist))
+ copy(result, re.capslist)
+ }
+
+ return result
+}
+
+// GetGroupNumbers returns the integer group numbers corresponding to a group name.
+func (re *Regexp) GetGroupNumbers() []int {
+ var result []int
+
+ if re.caps == nil {
+ result = make([]int, re.capsize)
+
+ for i := 0; i < len(result); i++ {
+ result[i] = i
+ }
+ } else {
+ result = make([]int, len(re.caps))
+
+ for k, v := range re.caps {
+ result[v] = k
+ }
+ }
+
+ return result
+}
+
+// GroupNameFromNumber retrieves a group name that corresponds to a group number.
+// It will return "" for and unknown group number. Unnamed groups automatically
+// receive a name that is the decimal string equivalent of its number.
+func (re *Regexp) GroupNameFromNumber(i int) string {
+ if re.capslist == nil {
+ if i >= 0 && i < re.capsize {
+ return strconv.Itoa(i)
+ }
+
+ return ""
+ }
+
+ if re.caps != nil {
+ var ok bool
+ if i, ok = re.caps[i]; !ok {
+ return ""
+ }
+ }
+
+ if i >= 0 && i < len(re.capslist) {
+ return re.capslist[i]
+ }
+
+ return ""
+}
+
+// GroupNumberFromName returns a group number that corresponds to a group name.
+// Returns -1 if the name is not a recognized group name. Numbered groups
+// automatically get a group name that is the decimal string equivalent of its number.
+func (re *Regexp) GroupNumberFromName(name string) int {
+ // look up name if we have a hashtable of names
+ if re.capnames != nil {
+ if k, ok := re.capnames[name]; ok {
+ return k
+ }
+
+ return -1
+ }
+
+ // convert to an int if it looks like a number
+ result := 0
+ for i := 0; i < len(name); i++ {
+ ch := name[i]
+
+ if ch > '9' || ch < '0' {
+ return -1
+ }
+
+ result *= 10
+ result += int(ch - '0')
+ }
+
+ // return int if it's in range
+ if result >= 0 && result < re.capsize {
+ return result
+ }
+
+ return -1
+}
diff --git a/vendor/github.com/dlclark/regexp2/replace.go b/vendor/github.com/dlclark/regexp2/replace.go
new file mode 100644
index 0000000000..0376bd9d37
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/replace.go
@@ -0,0 +1,177 @@
+package regexp2
+
+import (
+ "bytes"
+ "errors"
+
+ "github.com/dlclark/regexp2/syntax"
+)
+
+const (
+ replaceSpecials = 4
+ replaceLeftPortion = -1
+ replaceRightPortion = -2
+ replaceLastGroup = -3
+ replaceWholeString = -4
+)
+
+// MatchEvaluator is a function that takes a match and returns a replacement string to be used
+type MatchEvaluator func(Match) string
+
+// Three very similar algorithms appear below: replace (pattern),
+// replace (evaluator), and split.
+
+// Replace Replaces all occurrences of the regex in the string with the
+// replacement pattern.
+//
+// Note that the special case of no matches is handled on its own:
+// with no matches, the input string is returned unchanged.
+// The right-to-left case is split out because StringBuilder
+// doesn't handle right-to-left string building directly very well.
+func replace(regex *Regexp, data *syntax.ReplacerData, evaluator MatchEvaluator, input string, startAt, count int) (string, error) {
+ if count < -1 {
+ return "", errors.New("Count too small")
+ }
+ if count == 0 {
+ return "", nil
+ }
+
+ m, err := regex.FindStringMatchStartingAt(input, startAt)
+
+ if err != nil {
+ return "", err
+ }
+ if m == nil {
+ return input, nil
+ }
+
+ buf := &bytes.Buffer{}
+ text := m.text
+
+ if !regex.RightToLeft() {
+ prevat := 0
+ for m != nil {
+ if m.Index != prevat {
+ buf.WriteString(string(text[prevat:m.Index]))
+ }
+ prevat = m.Index + m.Length
+ if evaluator == nil {
+ replacementImpl(data, buf, m)
+ } else {
+ buf.WriteString(evaluator(*m))
+ }
+
+ count--
+ if count == 0 {
+ break
+ }
+ m, err = regex.FindNextMatch(m)
+ if err != nil {
+ return "", nil
+ }
+ }
+
+ if prevat < len(text) {
+ buf.WriteString(string(text[prevat:]))
+ }
+ } else {
+ prevat := len(text)
+ var al []string
+
+ for m != nil {
+ if m.Index+m.Length != prevat {
+ al = append(al, string(text[m.Index+m.Length:prevat]))
+ }
+ prevat = m.Index
+ if evaluator == nil {
+ replacementImplRTL(data, &al, m)
+ } else {
+ al = append(al, evaluator(*m))
+ }
+
+ count--
+ if count == 0 {
+ break
+ }
+ m, err = regex.FindNextMatch(m)
+ if err != nil {
+ return "", nil
+ }
+ }
+
+ if prevat > 0 {
+ buf.WriteString(string(text[:prevat]))
+ }
+
+ for i := len(al) - 1; i >= 0; i-- {
+ buf.WriteString(al[i])
+ }
+ }
+
+ return buf.String(), nil
+}
+
+// Given a Match, emits into the StringBuilder the evaluated
+// substitution pattern.
+func replacementImpl(data *syntax.ReplacerData, buf *bytes.Buffer, m *Match) {
+ for _, r := range data.Rules {
+
+ if r >= 0 { // string lookup
+ buf.WriteString(data.Strings[r])
+ } else if r < -replaceSpecials { // group lookup
+ m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
+ } else {
+ switch -replaceSpecials - 1 - r { // special insertion patterns
+ case replaceLeftPortion:
+ for i := 0; i < m.Index; i++ {
+ buf.WriteRune(m.text[i])
+ }
+ case replaceRightPortion:
+ for i := m.Index + m.Length; i < len(m.text); i++ {
+ buf.WriteRune(m.text[i])
+ }
+ case replaceLastGroup:
+ m.groupValueAppendToBuf(m.GroupCount()-1, buf)
+ case replaceWholeString:
+ for i := 0; i < len(m.text); i++ {
+ buf.WriteRune(m.text[i])
+ }
+ }
+ }
+ }
+}
+
+func replacementImplRTL(data *syntax.ReplacerData, al *[]string, m *Match) {
+ l := *al
+ buf := &bytes.Buffer{}
+
+ for _, r := range data.Rules {
+ buf.Reset()
+ if r >= 0 { // string lookup
+ l = append(l, data.Strings[r])
+ } else if r < -replaceSpecials { // group lookup
+ m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
+ l = append(l, buf.String())
+ } else {
+ switch -replaceSpecials - 1 - r { // special insertion patterns
+ case replaceLeftPortion:
+ for i := 0; i < m.Index; i++ {
+ buf.WriteRune(m.text[i])
+ }
+ case replaceRightPortion:
+ for i := m.Index + m.Length; i < len(m.text); i++ {
+ buf.WriteRune(m.text[i])
+ }
+ case replaceLastGroup:
+ m.groupValueAppendToBuf(m.GroupCount()-1, buf)
+ case replaceWholeString:
+ for i := 0; i < len(m.text); i++ {
+ buf.WriteRune(m.text[i])
+ }
+ }
+ l = append(l, buf.String())
+ }
+ }
+
+ *al = l
+}
diff --git a/vendor/github.com/dlclark/regexp2/runner.go b/vendor/github.com/dlclark/regexp2/runner.go
new file mode 100644
index 0000000000..2d84a934b0
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/runner.go
@@ -0,0 +1,1621 @@
+package regexp2
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+ "time"
+ "unicode"
+
+ "github.com/dlclark/regexp2/syntax"
+)
+
+type runner struct {
+ re *Regexp
+ code *syntax.Code
+
+ runtextstart int // starting point for search
+
+ runtext []rune // text to search
+ runtextpos int // current position in text
+ runtextend int
+
+ // The backtracking stack. Opcodes use this to store data regarding
+ // what they have matched and where to backtrack to. Each "frame" on
+ // the stack takes the form of [CodePosition Data1 Data2...], where
+ // CodePosition is the position of the current opcode and
+ // the data values are all optional. The CodePosition can be negative, and
+ // these values (also called "back2") are used by the BranchMark family of opcodes
+ // to indicate whether they are backtracking after a successful or failed
+ // match.
+ // When we backtrack, we pop the CodePosition off the stack, set the current
+ // instruction pointer to that code position, and mark the opcode
+ // with a backtracking flag ("Back"). Each opcode then knows how to
+ // handle its own data.
+ runtrack []int
+ runtrackpos int
+
+ // This stack is used to track text positions across different opcodes.
+ // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
+ // pair. SetMark records the text position before we match a*b. Then
+ // CaptureMark uses that position to figure out where the capture starts.
+ // Opcodes which push onto this stack are always paired with other opcodes
+ // which will pop the value from it later. A successful match should mean
+ // that this stack is empty.
+ runstack []int
+ runstackpos int
+
+ // The crawl stack is used to keep track of captures. Every time a group
+ // has a capture, we push its group number onto the runcrawl stack. In
+ // the case of a balanced match, we push BOTH groups onto the stack.
+ runcrawl []int
+ runcrawlpos int
+
+ runtrackcount int // count of states that may do backtracking
+
+ runmatch *Match // result object
+
+ ignoreTimeout bool
+ timeout time.Duration // timeout in milliseconds (needed for actual)
+ timeoutChecksToSkip int
+ timeoutAt time.Time
+
+ operator syntax.InstOp
+ codepos int
+ rightToLeft bool
+ caseInsensitive bool
+}
+
+// run searches for matches and can continue from the previous match
+//
+// quick is usually false, but can be true to not return matches, just put it in caches
+// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
+// input is the string to search for our regex pattern
+func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
+
+ // get a cached runner
+ runner := re.getRunner()
+ defer re.putRunner(runner)
+
+ if textstart < 0 {
+ if re.RightToLeft() {
+ textstart = len(input)
+ } else {
+ textstart = 0
+ }
+ }
+
+ return runner.scan(input, textstart, quick, re.MatchTimeout)
+}
+
+// Scans the string to find the first match. Uses the Match object
+// both to feed text in and as a place to store matches that come out.
+//
+// All the action is in the Go() method. Our
+// responsibility is to load up the class members before
+// calling Go.
+//
+// The optimizer can compute a set of candidate starting characters,
+// and we could use a separate method Skip() that will quickly scan past
+// any characters that we know can't match.
+func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
+ r.timeout = timeout
+ r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
+ r.runtextstart = textstart
+ r.runtext = rt
+ r.runtextend = len(rt)
+
+ stoppos := r.runtextend
+ bump := 1
+
+ if r.re.RightToLeft() {
+ bump = -1
+ stoppos = 0
+ }
+
+ r.runtextpos = textstart
+ initted := false
+
+ r.startTimeoutWatch()
+ for {
+ if r.re.Debug() {
+ //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
+ fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
+ fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
+ }
+
+ if r.findFirstChar() {
+ if err := r.checkTimeout(); err != nil {
+ return nil, err
+ }
+
+ if !initted {
+ r.initMatch()
+ initted = true
+ }
+
+ if r.re.Debug() {
+ fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
+ }
+
+ if err := r.execute(); err != nil {
+ return nil, err
+ }
+
+ if r.runmatch.matchcount[0] > 0 {
+ // We'll return a match even if it touches a previous empty match
+ return r.tidyMatch(quick), nil
+ }
+
+ // reset state for another go
+ r.runtrackpos = len(r.runtrack)
+ r.runstackpos = len(r.runstack)
+ r.runcrawlpos = len(r.runcrawl)
+ }
+
+ // failure!
+
+ if r.runtextpos == stoppos {
+ r.tidyMatch(true)
+ return nil, nil
+ }
+
+ // Recognize leading []* and various anchors, and bump on failure accordingly
+
+ // r.bump by one and start again
+
+ r.runtextpos += bump
+ }
+ // We never get here
+}
+
+func (r *runner) execute() error {
+
+ r.goTo(0)
+
+ for {
+
+ if r.re.Debug() {
+ r.dumpState()
+ }
+
+ if err := r.checkTimeout(); err != nil {
+ return err
+ }
+
+ switch r.operator {
+ case syntax.Stop:
+ return nil
+
+ case syntax.Nothing:
+ break
+
+ case syntax.Goto:
+ r.goTo(r.operand(0))
+ continue
+
+ case syntax.Testref:
+ if !r.runmatch.isMatched(r.operand(0)) {
+ break
+ }
+ r.advance(1)
+ continue
+
+ case syntax.Lazybranch:
+ r.trackPush1(r.textPos())
+ r.advance(1)
+ continue
+
+ case syntax.Lazybranch | syntax.Back:
+ r.trackPop()
+ r.textto(r.trackPeek())
+ r.goTo(r.operand(0))
+ continue
+
+ case syntax.Setmark:
+ r.stackPush(r.textPos())
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Nullmark:
+ r.stackPush(-1)
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
+ r.stackPop()
+ break
+
+ case syntax.Getmark:
+ r.stackPop()
+ r.trackPush1(r.stackPeek())
+ r.textto(r.stackPeek())
+ r.advance(0)
+ continue
+
+ case syntax.Getmark | syntax.Back:
+ r.trackPop()
+ r.stackPush(r.trackPeek())
+ break
+
+ case syntax.Capturemark:
+ if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
+ break
+ }
+ r.stackPop()
+ if r.operand(1) != -1 {
+ r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
+ } else {
+ r.capture(r.operand(0), r.stackPeek(), r.textPos())
+ }
+ r.trackPush1(r.stackPeek())
+
+ r.advance(2)
+
+ continue
+
+ case syntax.Capturemark | syntax.Back:
+ r.trackPop()
+ r.stackPush(r.trackPeek())
+ r.uncapture()
+ if r.operand(0) != -1 && r.operand(1) != -1 {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Branchmark:
+ r.stackPop()
+
+ matched := r.textPos() - r.stackPeek()
+
+ if matched != 0 { // Nonempty match -> loop now
+ r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
+ r.stackPush(r.textPos()) // Make new mark
+ r.goTo(r.operand(0)) // Loop
+ } else { // Empty match -> straight now
+ r.trackPushNeg1(r.stackPeek()) // Save old mark
+ r.advance(1) // Straight
+ }
+ continue
+
+ case syntax.Branchmark | syntax.Back:
+ r.trackPopN(2)
+ r.stackPop()
+ r.textto(r.trackPeekN(1)) // Recall position
+ r.trackPushNeg1(r.trackPeek()) // Save old mark
+ r.advance(1) // Straight
+ continue
+
+ case syntax.Branchmark | syntax.Back2:
+ r.trackPop()
+ r.stackPush(r.trackPeek()) // Recall old mark
+ break // Backtrack
+
+ case syntax.Lazybranchmark:
+ {
+ // We hit this the first time through a lazy loop and after each
+ // successful match of the inner expression. It simply continues
+ // on and doesn't loop.
+ r.stackPop()
+
+ oldMarkPos := r.stackPeek()
+
+ if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
+ if oldMarkPos != -1 {
+ r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
+ } else {
+ r.trackPush2(r.textPos(), r.textPos())
+ }
+ } else {
+ // The inner expression found an empty match, so we'll go directly to 'back2' if we
+ // backtrack. In this case, we need to push something on the stack, since back2 pops.
+ // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
+ // position associated with that empty match.
+ r.stackPush(oldMarkPos)
+
+ r.trackPushNeg1(r.stackPeek()) // Save old mark
+ }
+ r.advance(1)
+ continue
+ }
+
+ case syntax.Lazybranchmark | syntax.Back:
+
+ // After the first time, Lazybranchmark | syntax.Back occurs
+ // with each iteration of the loop, and therefore with every attempted
+ // match of the inner expression. We'll try to match the inner expression,
+ // then go back to Lazybranchmark if successful. If the inner expression
+ // fails, we go to Lazybranchmark | syntax.Back2
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.trackPushNeg1(r.trackPeek()) // Save old mark
+ r.stackPush(pos) // Make new mark
+ r.textto(pos) // Recall position
+ r.goTo(r.operand(0)) // Loop
+ continue
+
+ case syntax.Lazybranchmark | syntax.Back2:
+ // The lazy loop has failed. We'll do a true backtrack and
+ // start over before the lazy loop.
+ r.stackPop()
+ r.trackPop()
+ r.stackPush(r.trackPeek()) // Recall old mark
+ break
+
+ case syntax.Setcount:
+ r.stackPush2(r.textPos(), r.operand(0))
+ r.trackPush()
+ r.advance(1)
+ continue
+
+ case syntax.Nullcount:
+ r.stackPush2(-1, r.operand(0))
+ r.trackPush()
+ r.advance(1)
+ continue
+
+ case syntax.Setcount | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Nullcount | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Branchcount:
+ // r.stackPush:
+ // 0: Mark
+ // 1: Count
+
+ r.stackPopN(2)
+ mark := r.stackPeek()
+ count := r.stackPeekN(1)
+ matched := r.textPos() - mark
+
+ if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
+ r.trackPushNeg2(mark, count) // Save old mark, count
+ r.advance(2) // Straight
+ } else { // Nonempty match -> count+loop now
+ r.trackPush1(mark) // remember mark
+ r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
+ r.goTo(r.operand(0)) // Loop
+ }
+ continue
+
+ case syntax.Branchcount | syntax.Back:
+ // r.trackPush:
+ // 0: Previous mark
+ // r.stackPush:
+ // 0: Mark (= current pos, discarded)
+ // 1: Count
+ r.trackPop()
+ r.stackPopN(2)
+ if r.stackPeekN(1) > 0 { // Positive -> can go straight
+ r.textto(r.stackPeek()) // Zap to mark
+ r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
+ r.advance(2) // Straight
+ continue
+ }
+ r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
+ break
+
+ case syntax.Branchcount | syntax.Back2:
+ // r.trackPush:
+ // 0: Previous mark
+ // 1: Previous count
+ r.trackPopN(2)
+ r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
+ break // Backtrack
+
+ case syntax.Lazybranchcount:
+ // r.stackPush:
+ // 0: Mark
+ // 1: Count
+
+ r.stackPopN(2)
+ mark := r.stackPeek()
+ count := r.stackPeekN(1)
+
+ if count < 0 { // Negative count -> loop now
+ r.trackPushNeg1(mark) // Save old mark
+ r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
+ r.goTo(r.operand(0)) // Loop
+ } else { // Nonneg count -> straight now
+ r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
+ r.advance(2) // Straight
+ }
+ continue
+
+ case syntax.Lazybranchcount | syntax.Back:
+ // r.trackPush:
+ // 0: Mark
+ // 1: Count
+ // 2: r.textPos
+
+ r.trackPopN(3)
+ mark := r.trackPeek()
+ textpos := r.trackPeekN(2)
+
+ if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
+ r.textto(textpos) // Recall position
+ r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
+ r.trackPushNeg1(mark) // Save old mark
+ r.goTo(r.operand(0)) // Loop
+ continue
+ } else { // Max loops or empty match -> backtrack
+ r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
+ break // backtrack
+ }
+
+ case syntax.Lazybranchcount | syntax.Back2:
+ // r.trackPush:
+ // 0: Previous mark
+ // r.stackPush:
+ // 0: Mark (== current pos, discarded)
+ // 1: Count
+ r.trackPop()
+ r.stackPopN(2)
+ r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
+ break // Backtrack
+
+ case syntax.Setjump:
+ r.stackPush2(r.trackpos(), r.crawlpos())
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Setjump | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Backjump:
+ // r.stackPush:
+ // 0: Saved trackpos
+ // 1: r.crawlpos
+ r.stackPopN(2)
+ r.trackto(r.stackPeek())
+
+ for r.crawlpos() != r.stackPeekN(1) {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Forejump:
+ // r.stackPush:
+ // 0: Saved trackpos
+ // 1: r.crawlpos
+ r.stackPopN(2)
+ r.trackto(r.stackPeek())
+ r.trackPush1(r.stackPeekN(1))
+ r.advance(0)
+ continue
+
+ case syntax.Forejump | syntax.Back:
+ // r.trackPush:
+ // 0: r.crawlpos
+ r.trackPop()
+
+ for r.crawlpos() != r.trackPeek() {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Bol:
+ if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Eol:
+ if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Boundary:
+ if !r.isBoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Nonboundary:
+ if r.isBoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.ECMABoundary:
+ if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.NonECMABoundary:
+ if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Beginning:
+ if r.leftchars() > 0 {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Start:
+ if r.textPos() != r.textstart() {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.EndZ:
+ if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.End:
+ if r.rightchars() > 0 {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.One:
+ if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Notone:
+ if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Set:
+
+ if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Multi:
+ if !r.runematch(r.code.Strings[r.operand(0)]) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Ref:
+
+ capnum := r.operand(0)
+
+ if r.runmatch.isMatched(capnum) {
+ if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
+ break
+ }
+ } else {
+ if (r.re.options & ECMAScript) == 0 {
+ break
+ }
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Onerep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+
+ ch := rune(r.operand(0))
+
+ for c > 0 {
+ if r.forwardcharnext() != ch {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notonerep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+ ch := rune(r.operand(0))
+
+ for c > 0 {
+ if r.forwardcharnext() == ch {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setrep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+
+ set := r.code.Sets[r.operand(0)]
+
+ for c > 0 {
+ if !set.CharIn(r.forwardcharnext()) {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Oneloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ ch := rune(r.operand(0))
+ i := c
+
+ for ; i > 0; i-- {
+ if r.forwardcharnext() != ch {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notoneloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ ch := rune(r.operand(0))
+ i := c
+
+ for ; i > 0; i-- {
+ if r.forwardcharnext() == ch {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ set := r.code.Sets[r.operand(0)]
+ i := c
+
+ for ; i > 0; i-- {
+ if !set.CharIn(r.forwardcharnext()) {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
+
+ r.trackPopN(2)
+ i := r.trackPeek()
+ pos := r.trackPeekN(1)
+
+ r.textto(pos)
+
+ if i > 0 {
+ r.trackPush2(i-1, pos-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setloop | syntax.Back:
+
+ r.trackPopN(2)
+ i := r.trackPeek()
+ pos := r.trackPeekN(1)
+
+ r.textto(pos)
+
+ if i > 0 {
+ r.trackPush2(i-1, pos-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Onelazy, syntax.Notonelazy:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ if c > 0 {
+ r.trackPush2(c-1, r.textPos())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setlazy:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ if c > 0 {
+ r.trackPush2(c-1, r.textPos())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Onelazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if r.forwardcharnext() != rune(r.operand(0)) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notonelazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if r.forwardcharnext() == rune(r.operand(0)) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setlazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ default:
+ return errors.New("unknown state in regex runner")
+ }
+
+ BreakBackward:
+ ;
+
+ // "break Backward" comes here:
+ r.backtrack()
+ }
+}
+
+// increase the size of stack and track storage
+func (r *runner) ensureStorage() {
+ if r.runstackpos < r.runtrackcount*4 {
+ doubleIntSlice(&r.runstack, &r.runstackpos)
+ }
+ if r.runtrackpos < r.runtrackcount*4 {
+ doubleIntSlice(&r.runtrack, &r.runtrackpos)
+ }
+}
+
+func doubleIntSlice(s *[]int, pos *int) {
+ oldLen := len(*s)
+ newS := make([]int, oldLen*2)
+
+ copy(newS[oldLen:], *s)
+ *pos += oldLen
+ *s = newS
+}
+
+// Save a number on the longjump unrolling stack
+func (r *runner) crawl(i int) {
+ if r.runcrawlpos == 0 {
+ doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
+ }
+ r.runcrawlpos--
+ r.runcrawl[r.runcrawlpos] = i
+}
+
+// Remove a number from the longjump unrolling stack
+func (r *runner) popcrawl() int {
+ val := r.runcrawl[r.runcrawlpos]
+ r.runcrawlpos++
+ return val
+}
+
+// Get the height of the stack
+func (r *runner) crawlpos() int {
+ return len(r.runcrawl) - r.runcrawlpos
+}
+
+func (r *runner) advance(i int) {
+ r.codepos += (i + 1)
+ r.setOperator(r.code.Codes[r.codepos])
+}
+
+func (r *runner) goTo(newpos int) {
+ // when branching backward, ensure storage
+ if newpos < r.codepos {
+ r.ensureStorage()
+ }
+
+ r.setOperator(r.code.Codes[newpos])
+ r.codepos = newpos
+}
+
+func (r *runner) textto(newpos int) {
+ r.runtextpos = newpos
+}
+
+func (r *runner) trackto(newpos int) {
+ r.runtrackpos = len(r.runtrack) - newpos
+}
+
+func (r *runner) textstart() int {
+ return r.runtextstart
+}
+
+func (r *runner) textPos() int {
+ return r.runtextpos
+}
+
+// push onto the backtracking stack
+func (r *runner) trackpos() int {
+ return len(r.runtrack) - r.runtrackpos
+}
+
+func (r *runner) trackPush() {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush1(I1 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush2(I1, I2 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush3(I1, I2, I3 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I3
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPushNeg1(I1 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = -r.codepos
+}
+
+func (r *runner) trackPushNeg2(I1, I2 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = -r.codepos
+}
+
+func (r *runner) backtrack() {
+ newpos := r.runtrack[r.runtrackpos]
+ r.runtrackpos++
+
+ if r.re.Debug() {
+ if newpos < 0 {
+ fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
+ } else {
+ fmt.Printf(" Backtracking to code position %v\n", newpos)
+ }
+ }
+
+ if newpos < 0 {
+ newpos = -newpos
+ r.setOperator(r.code.Codes[newpos] | syntax.Back2)
+ } else {
+ r.setOperator(r.code.Codes[newpos] | syntax.Back)
+ }
+
+ // When branching backward, ensure storage
+ if newpos < r.codepos {
+ r.ensureStorage()
+ }
+
+ r.codepos = newpos
+}
+
+func (r *runner) setOperator(op int) {
+ r.caseInsensitive = (0 != (op & syntax.Ci))
+ r.rightToLeft = (0 != (op & syntax.Rtl))
+ r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
+}
+
+func (r *runner) trackPop() {
+ r.runtrackpos++
+}
+
+// pop framesize items from the backtracking stack
+func (r *runner) trackPopN(framesize int) {
+ r.runtrackpos += framesize
+}
+
+// Technically we are actually peeking at items already popped. So if you want to
+// get and pop the top item from the stack, you do
+// r.trackPop();
+// r.trackPeek();
+func (r *runner) trackPeek() int {
+ return r.runtrack[r.runtrackpos-1]
+}
+
+// get the ith element down on the backtracking stack
+func (r *runner) trackPeekN(i int) int {
+ return r.runtrack[r.runtrackpos-i-1]
+}
+
+// Push onto the grouping stack
+func (r *runner) stackPush(I1 int) {
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I1
+}
+
+func (r *runner) stackPush2(I1, I2 int) {
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I1
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I2
+}
+
+func (r *runner) stackPop() {
+ r.runstackpos++
+}
+
+// pop framesize items from the grouping stack
+func (r *runner) stackPopN(framesize int) {
+ r.runstackpos += framesize
+}
+
+// Technically we are actually peeking at items already popped. So if you want to
+// get and pop the top item from the stack, you do
+// r.stackPop();
+// r.stackPeek();
+func (r *runner) stackPeek() int {
+ return r.runstack[r.runstackpos-1]
+}
+
+// get the ith element down on the grouping stack
+func (r *runner) stackPeekN(i int) int {
+ return r.runstack[r.runstackpos-i-1]
+}
+
+func (r *runner) operand(i int) int {
+ return r.code.Codes[r.codepos+i+1]
+}
+
+func (r *runner) leftchars() int {
+ return r.runtextpos
+}
+
+func (r *runner) rightchars() int {
+ return r.runtextend - r.runtextpos
+}
+
+func (r *runner) bump() int {
+ if r.rightToLeft {
+ return -1
+ }
+ return 1
+}
+
+func (r *runner) forwardchars() int {
+ if r.rightToLeft {
+ return r.runtextpos
+ }
+ return r.runtextend - r.runtextpos
+}
+
+func (r *runner) forwardcharnext() rune {
+ var ch rune
+ if r.rightToLeft {
+ r.runtextpos--
+ ch = r.runtext[r.runtextpos]
+ } else {
+ ch = r.runtext[r.runtextpos]
+ r.runtextpos++
+ }
+
+ if r.caseInsensitive {
+ return unicode.ToLower(ch)
+ }
+ return ch
+}
+
+func (r *runner) runematch(str []rune) bool {
+ var pos int
+
+ c := len(str)
+ if !r.rightToLeft {
+ if r.runtextend-r.runtextpos < c {
+ return false
+ }
+
+ pos = r.runtextpos + c
+ } else {
+ if r.runtextpos-0 < c {
+ return false
+ }
+
+ pos = r.runtextpos
+ }
+
+ if !r.caseInsensitive {
+ for c != 0 {
+ c--
+ pos--
+ if str[c] != r.runtext[pos] {
+ return false
+ }
+ }
+ } else {
+ for c != 0 {
+ c--
+ pos--
+ if str[c] != unicode.ToLower(r.runtext[pos]) {
+ return false
+ }
+ }
+ }
+
+ if !r.rightToLeft {
+ pos += len(str)
+ }
+
+ r.runtextpos = pos
+
+ return true
+}
+
+func (r *runner) refmatch(index, len int) bool {
+ var c, pos, cmpos int
+
+ if !r.rightToLeft {
+ if r.runtextend-r.runtextpos < len {
+ return false
+ }
+
+ pos = r.runtextpos + len
+ } else {
+ if r.runtextpos-0 < len {
+ return false
+ }
+
+ pos = r.runtextpos
+ }
+ cmpos = index + len
+
+ c = len
+
+ if !r.caseInsensitive {
+ for c != 0 {
+ c--
+ cmpos--
+ pos--
+ if r.runtext[cmpos] != r.runtext[pos] {
+ return false
+ }
+
+ }
+ } else {
+ for c != 0 {
+ c--
+ cmpos--
+ pos--
+
+ if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
+ return false
+ }
+ }
+ }
+
+ if !r.rightToLeft {
+ pos += len
+ }
+
+ r.runtextpos = pos
+
+ return true
+}
+
+func (r *runner) backwardnext() {
+ if r.rightToLeft {
+ r.runtextpos++
+ } else {
+ r.runtextpos--
+ }
+}
+
+func (r *runner) charAt(j int) rune {
+ return r.runtext[j]
+}
+
+func (r *runner) findFirstChar() bool {
+
+ if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
+ if !r.code.RightToLeft {
+ if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
+ (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
+ r.runtextpos = r.runtextend
+ return false
+ }
+ if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
+ r.runtextpos = r.runtextend - 1
+ } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
+ r.runtextpos = r.runtextend
+ }
+ } else {
+ if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
+ (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
+ (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
+ (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
+ r.runtextpos = 0
+ return false
+ }
+ if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
+ r.runtextpos = 0
+ }
+ }
+
+ if r.code.BmPrefix != nil {
+ return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
+ }
+
+ return true // found a valid start or end anchor
+ } else if r.code.BmPrefix != nil {
+ r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
+
+ if r.runtextpos == -1 {
+ if r.code.RightToLeft {
+ r.runtextpos = 0
+ } else {
+ r.runtextpos = r.runtextend
+ }
+ return false
+ }
+
+ return true
+ } else if r.code.FcPrefix == nil {
+ return true
+ }
+
+ r.rightToLeft = r.code.RightToLeft
+ r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
+
+ set := r.code.FcPrefix.PrefixSet
+ if set.IsSingleton() {
+ ch := set.SingletonChar()
+ for i := r.forwardchars(); i > 0; i-- {
+ if ch == r.forwardcharnext() {
+ r.backwardnext()
+ return true
+ }
+ }
+ } else {
+ for i := r.forwardchars(); i > 0; i-- {
+ n := r.forwardcharnext()
+ //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
+ if set.CharIn(n) {
+ r.backwardnext()
+ return true
+ }
+ }
+ }
+
+ return false
+}
+
+func (r *runner) initMatch() {
+ // Use a hashtable'ed Match object if the capture numbers are sparse
+
+ if r.runmatch == nil {
+ if r.re.caps != nil {
+ r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
+ } else {
+ r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
+ }
+ } else {
+ r.runmatch.reset(r.runtext, r.runtextstart)
+ }
+
+ // note we test runcrawl, because it is the last one to be allocated
+ // If there is an alloc failure in the middle of the three allocations,
+ // we may still return to reuse this instance, and we want to behave
+ // as if the allocations didn't occur. (we used to test _trackcount != 0)
+
+ if r.runcrawl != nil {
+ r.runtrackpos = len(r.runtrack)
+ r.runstackpos = len(r.runstack)
+ r.runcrawlpos = len(r.runcrawl)
+ return
+ }
+
+ r.initTrackCount()
+
+ tracksize := r.runtrackcount * 8
+ stacksize := r.runtrackcount * 8
+
+ if tracksize < 32 {
+ tracksize = 32
+ }
+ if stacksize < 16 {
+ stacksize = 16
+ }
+
+ r.runtrack = make([]int, tracksize)
+ r.runtrackpos = tracksize
+
+ r.runstack = make([]int, stacksize)
+ r.runstackpos = stacksize
+
+ r.runcrawl = make([]int, 32)
+ r.runcrawlpos = 32
+}
+
+func (r *runner) tidyMatch(quick bool) *Match {
+ if !quick {
+ match := r.runmatch
+
+ r.runmatch = nil
+
+ match.tidy(r.runtextpos)
+ return match
+ } else {
+ // send back our match -- it's not leaving the package, so it's safe to not clean it up
+ // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
+ return r.runmatch
+ }
+}
+
+// capture captures a subexpression. Note that the
+// capnum used here has already been mapped to a non-sparse
+// index (by the code generator RegexWriter).
+func (r *runner) capture(capnum, start, end int) {
+ if end < start {
+ T := end
+ end = start
+ start = T
+ }
+
+ r.crawl(capnum)
+ r.runmatch.addMatch(capnum, start, end-start)
+}
+
+// transferCapture captures a subexpression. Note that the
+// capnum used here has already been mapped to a non-sparse
+// index (by the code generator RegexWriter).
+func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
+ var start2, end2 int
+
+ // these are the two intervals that are cancelling each other
+
+ if end < start {
+ T := end
+ end = start
+ start = T
+ }
+
+ start2 = r.runmatch.matchIndex(uncapnum)
+ end2 = start2 + r.runmatch.matchLength(uncapnum)
+
+ // The new capture gets the innermost defined interval
+
+ if start >= end2 {
+ end = start
+ start = end2
+ } else if end <= start2 {
+ start = start2
+ } else {
+ if end > end2 {
+ end = end2
+ }
+ if start2 > start {
+ start = start2
+ }
+ }
+
+ r.crawl(uncapnum)
+ r.runmatch.balanceMatch(uncapnum)
+
+ if capnum != -1 {
+ r.crawl(capnum)
+ r.runmatch.addMatch(capnum, start, end-start)
+ }
+}
+
+// revert the last capture
+func (r *runner) uncapture() {
+ capnum := r.popcrawl()
+ r.runmatch.removeMatch(capnum)
+}
+
+//debug
+
+func (r *runner) dumpState() {
+ back := ""
+ if r.operator&syntax.Back != 0 {
+ back = " Back"
+ }
+ if r.operator&syntax.Back2 != 0 {
+ back += " Back2"
+ }
+ fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
+ r.textposDescription(),
+ r.stackDescription(r.runtrack, r.runtrackpos),
+ r.stackDescription(r.runstack, r.runstackpos),
+ r.code.OpcodeDescription(r.codepos),
+ back)
+}
+
+func (r *runner) stackDescription(a []int, index int) string {
+ buf := &bytes.Buffer{}
+
+ fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
+ if buf.Len() < 8 {
+ buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
+ }
+
+ buf.WriteRune('(')
+ for i := index; i < len(a); i++ {
+ if i > index {
+ buf.WriteRune(' ')
+ }
+
+ buf.WriteString(strconv.Itoa(a[i]))
+ }
+
+ buf.WriteRune(')')
+
+ return buf.String()
+}
+
+func (r *runner) textposDescription() string {
+ buf := &bytes.Buffer{}
+
+ buf.WriteString(strconv.Itoa(r.runtextpos))
+
+ if buf.Len() < 8 {
+ buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
+ }
+
+ if r.runtextpos > 0 {
+ buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
+ } else {
+ buf.WriteRune('^')
+ }
+
+ buf.WriteRune('>')
+
+ for i := r.runtextpos; i < r.runtextend; i++ {
+ buf.WriteString(syntax.CharDescription(r.runtext[i]))
+ }
+ if buf.Len() >= 64 {
+ buf.Truncate(61)
+ buf.WriteString("...")
+ } else {
+ buf.WriteRune('$')
+ }
+
+ return buf.String()
+}
+
+// decide whether the pos
+// at the specified index is a boundary or not. It's just not worth
+// emitting inline code for this logic.
+func (r *runner) isBoundary(index, startpos, endpos int) bool {
+ return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
+ (index < endpos && syntax.IsWordChar(r.runtext[index]))
+}
+
+func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
+ return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
+ (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
+}
+
+// this seems like a comment to justify randomly picking 1000 :-P
+// We have determined this value in a series of experiments where x86 retail
+// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
+// of TimeoutCheckFrequency did not tend to increase performance; smaller values
+// of TimeoutCheckFrequency tended to slow down the execution.
+const timeoutCheckFrequency int = 1000
+
+func (r *runner) startTimeoutWatch() {
+ if r.ignoreTimeout {
+ return
+ }
+
+ r.timeoutChecksToSkip = timeoutCheckFrequency
+ r.timeoutAt = time.Now().Add(r.timeout)
+}
+
+func (r *runner) checkTimeout() error {
+ if r.ignoreTimeout {
+ return nil
+ }
+ r.timeoutChecksToSkip--
+ if r.timeoutChecksToSkip != 0 {
+ return nil
+ }
+
+ r.timeoutChecksToSkip = timeoutCheckFrequency
+ return r.doCheckTimeout()
+}
+
+func (r *runner) doCheckTimeout() error {
+ current := time.Now()
+
+ if current.Before(r.timeoutAt) {
+ return nil
+ }
+
+ if r.re.Debug() {
+ //Debug.WriteLine("")
+ //Debug.WriteLine("RegEx match timeout occurred!")
+ //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
+ //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
+ //Debug.WriteLine("Search pattern: " + _runregex._pattern)
+ //Debug.WriteLine("Input: " + r.runtext)
+ //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
+ }
+
+ return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
+}
+
+func (r *runner) initTrackCount() {
+ r.runtrackcount = r.code.TrackCount
+}
+
+// getRunner returns a run to use for matching re.
+// It uses the re's runner cache if possible, to avoid
+// unnecessary allocation.
+func (re *Regexp) getRunner() *runner {
+ re.muRun.Lock()
+ if n := len(re.runner); n > 0 {
+ z := re.runner[n-1]
+ re.runner = re.runner[:n-1]
+ re.muRun.Unlock()
+ return z
+ }
+ re.muRun.Unlock()
+ z := &runner{
+ re: re,
+ code: re.code,
+ }
+ return z
+}
+
+// putRunner returns a runner to the re's cache.
+// There is no attempt to limit the size of the cache, so it will
+// grow to the maximum number of simultaneous matches
+// run using re. (The cache empties when re gets garbage collected.)
+func (re *Regexp) putRunner(r *runner) {
+ re.muRun.Lock()
+ re.runner = append(re.runner, r)
+ re.muRun.Unlock()
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/charclass.go b/vendor/github.com/dlclark/regexp2/syntax/charclass.go
new file mode 100644
index 0000000000..53974d1013
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/charclass.go
@@ -0,0 +1,854 @@
+package syntax
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "sort"
+ "unicode"
+ "unicode/utf8"
+)
+
+// CharSet combines start-end rune ranges and unicode categories representing a set of characters
+type CharSet struct {
+ ranges []singleRange
+ categories []category
+ sub *CharSet //optional subtractor
+ negate bool
+ anything bool
+}
+
+type category struct {
+ negate bool
+ cat string
+}
+
+type singleRange struct {
+ first rune
+ last rune
+}
+
+const (
+ spaceCategoryText = " "
+ wordCategoryText = "W"
+)
+
+var (
+ ecmaSpace = []rune{0x0009, 0x000e, 0x0020, 0x0021, 0x00a0, 0x00a1, 0x1680, 0x1681, 0x2000, 0x200b, 0x2028, 0x202a, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000, 0x3001, 0xfeff, 0xff00}
+ ecmaWord = []rune{0x0030, 0x003a, 0x0041, 0x005b, 0x005f, 0x0060, 0x0061, 0x007b}
+ ecmaDigit = []rune{0x0030, 0x003a}
+)
+
+var (
+ AnyClass = getCharSetFromOldString([]rune{0}, false)
+ ECMAAnyClass = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false)
+ NoneClass = getCharSetFromOldString(nil, false)
+ ECMAWordClass = getCharSetFromOldString(ecmaWord, false)
+ NotECMAWordClass = getCharSetFromOldString(ecmaWord, true)
+ ECMASpaceClass = getCharSetFromOldString(ecmaSpace, false)
+ NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true)
+ ECMADigitClass = getCharSetFromOldString(ecmaDigit, false)
+ NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true)
+
+ WordClass = getCharSetFromCategoryString(false, false, wordCategoryText)
+ NotWordClass = getCharSetFromCategoryString(true, false, wordCategoryText)
+ SpaceClass = getCharSetFromCategoryString(false, false, spaceCategoryText)
+ NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
+ DigitClass = getCharSetFromCategoryString(false, false, "Nd")
+ NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")
+)
+
+var unicodeCategories = func() map[string]*unicode.RangeTable {
+ retVal := make(map[string]*unicode.RangeTable)
+ for k, v := range unicode.Scripts {
+ retVal[k] = v
+ }
+ for k, v := range unicode.Categories {
+ retVal[k] = v
+ }
+ for k, v := range unicode.Properties {
+ retVal[k] = v
+ }
+ return retVal
+}()
+
+func getCharSetFromCategoryString(negateSet bool, negateCat bool, cats ...string) func() *CharSet {
+ if negateCat && negateSet {
+ panic("BUG! You should only negate the set OR the category in a constant setup, but not both")
+ }
+
+ c := CharSet{negate: negateSet}
+
+ c.categories = make([]category, len(cats))
+ for i, cat := range cats {
+ c.categories[i] = category{cat: cat, negate: negateCat}
+ }
+ return func() *CharSet {
+ //make a copy each time
+ local := c
+ //return that address
+ return &local
+ }
+}
+
+func getCharSetFromOldString(setText []rune, negate bool) func() *CharSet {
+ c := CharSet{}
+ if len(setText) > 0 {
+ fillFirst := false
+ l := len(setText)
+ if negate {
+ if setText[0] == 0 {
+ setText = setText[1:]
+ } else {
+ l++
+ fillFirst = true
+ }
+ }
+
+ if l%2 == 0 {
+ c.ranges = make([]singleRange, l/2)
+ } else {
+ c.ranges = make([]singleRange, l/2+1)
+ }
+
+ first := true
+ if fillFirst {
+ c.ranges[0] = singleRange{first: 0}
+ first = false
+ }
+
+ i := 0
+ for _, r := range setText {
+ if first {
+ // lower bound in a new range
+ c.ranges[i] = singleRange{first: r}
+ first = false
+ } else {
+ c.ranges[i].last = r - 1
+ i++
+ first = true
+ }
+ }
+ if !first {
+ c.ranges[i].last = utf8.MaxRune
+ }
+ }
+
+ return func() *CharSet {
+ local := c
+ return &local
+ }
+}
+
+// Copy makes a deep copy to prevent accidental mutation of a set
+func (c CharSet) Copy() CharSet {
+ ret := CharSet{
+ anything: c.anything,
+ negate: c.negate,
+ }
+
+ ret.ranges = append(ret.ranges, c.ranges...)
+ ret.categories = append(ret.categories, c.categories...)
+
+ if c.sub != nil {
+ sub := c.sub.Copy()
+ ret.sub = &sub
+ }
+
+ return ret
+}
+
+// gets a human-readable description for a set string
+func (c CharSet) String() string {
+ buf := &bytes.Buffer{}
+ buf.WriteRune('[')
+
+ if c.IsNegated() {
+ buf.WriteRune('^')
+ }
+
+ for _, r := range c.ranges {
+
+ buf.WriteString(CharDescription(r.first))
+ if r.first != r.last {
+ if r.last-r.first != 1 {
+ //groups that are 1 char apart skip the dash
+ buf.WriteRune('-')
+ }
+ buf.WriteString(CharDescription(r.last))
+ }
+ }
+
+ for _, c := range c.categories {
+ buf.WriteString(c.String())
+ }
+
+ if c.sub != nil {
+ buf.WriteRune('-')
+ buf.WriteString(c.sub.String())
+ }
+
+ buf.WriteRune(']')
+
+ return buf.String()
+}
+
+// mapHashFill converts a charset into a buffer for use in maps
+func (c CharSet) mapHashFill(buf *bytes.Buffer) {
+ if c.negate {
+ buf.WriteByte(0)
+ } else {
+ buf.WriteByte(1)
+ }
+
+ binary.Write(buf, binary.LittleEndian, len(c.ranges))
+ binary.Write(buf, binary.LittleEndian, len(c.categories))
+ for _, r := range c.ranges {
+ buf.WriteRune(r.first)
+ buf.WriteRune(r.last)
+ }
+ for _, ct := range c.categories {
+ buf.WriteString(ct.cat)
+ if ct.negate {
+ buf.WriteByte(1)
+ } else {
+ buf.WriteByte(0)
+ }
+ }
+
+ if c.sub != nil {
+ c.sub.mapHashFill(buf)
+ }
+}
+
+// CharIn returns true if the rune is in our character set (either ranges or categories).
+// It handles negations and subtracted sub-charsets.
+func (c CharSet) CharIn(ch rune) bool {
+ val := false
+ // in s && !s.subtracted
+
+ //check ranges
+ for _, r := range c.ranges {
+ if ch < r.first {
+ continue
+ }
+ if ch <= r.last {
+ val = true
+ break
+ }
+ }
+
+ //check categories if we haven't already found a range
+ if !val && len(c.categories) > 0 {
+ for _, ct := range c.categories {
+ // special categories...then unicode
+ if ct.cat == spaceCategoryText {
+ if unicode.IsSpace(ch) {
+ // we found a space so we're done
+ // negate means this is a "bad" thing
+ val = !ct.negate
+ break
+ } else if ct.negate {
+ val = true
+ break
+ }
+ } else if ct.cat == wordCategoryText {
+ if IsWordChar(ch) {
+ val = !ct.negate
+ break
+ } else if ct.negate {
+ val = true
+ break
+ }
+ } else if unicode.Is(unicodeCategories[ct.cat], ch) {
+ // if we're in this unicode category then we're done
+ // if negate=true on this category then we "failed" our test
+ // otherwise we're good that we found it
+ val = !ct.negate
+ break
+ } else if ct.negate {
+ val = true
+ break
+ }
+ }
+ }
+
+ // negate the whole char set
+ if c.negate {
+ val = !val
+ }
+
+ // get subtracted recurse
+ if val && c.sub != nil {
+ val = !c.sub.CharIn(ch)
+ }
+
+ //log.Printf("Char '%v' in %v == %v", string(ch), c.String(), val)
+ return val
+}
+
+func (c category) String() string {
+ switch c.cat {
+ case spaceCategoryText:
+ if c.negate {
+ return "\\S"
+ }
+ return "\\s"
+ case wordCategoryText:
+ if c.negate {
+ return "\\W"
+ }
+ return "\\w"
+ }
+ if _, ok := unicodeCategories[c.cat]; ok {
+
+ if c.negate {
+ return "\\P{" + c.cat + "}"
+ }
+ return "\\p{" + c.cat + "}"
+ }
+ return "Unknown category: " + c.cat
+}
+
+// CharDescription Produces a human-readable description for a single character.
+func CharDescription(ch rune) string {
+ /*if ch == '\\' {
+ return "\\\\"
+ }
+
+ if ch > ' ' && ch <= '~' {
+ return string(ch)
+ } else if ch == '\n' {
+ return "\\n"
+ } else if ch == ' ' {
+ return "\\ "
+ }*/
+
+ b := &bytes.Buffer{}
+ escape(b, ch, false) //fmt.Sprintf("%U", ch)
+ return b.String()
+}
+
+// According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/)
+// RL 1.4 Simple Word Boundaries The class of <word_character> includes all Alphabetic
+// values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C
+// ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.
+func IsWordChar(r rune) bool {
+ //"L", "Mn", "Nd", "Pc"
+ return unicode.In(r,
+ unicode.Categories["L"], unicode.Categories["Mn"],
+ unicode.Categories["Nd"], unicode.Categories["Pc"]) || r == '\u200D' || r == '\u200C'
+ //return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
+}
+
+func IsECMAWordChar(r rune) bool {
+ return unicode.In(r,
+ unicode.Categories["L"], unicode.Categories["Mn"],
+ unicode.Categories["Nd"], unicode.Categories["Pc"])
+
+ //return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
+}
+
+// SingletonChar will return the char from the first range without validation.
+// It assumes you have checked for IsSingleton or IsSingletonInverse and will panic given bad input
+func (c CharSet) SingletonChar() rune {
+ return c.ranges[0].first
+}
+
+func (c CharSet) IsSingleton() bool {
+ return !c.negate && //negated is multiple chars
+ len(c.categories) == 0 && len(c.ranges) == 1 && // multiple ranges and unicode classes represent multiple chars
+ c.sub == nil && // subtraction means we've got multiple chars
+ c.ranges[0].first == c.ranges[0].last // first and last equal means we're just 1 char
+}
+
+func (c CharSet) IsSingletonInverse() bool {
+ return c.negate && //same as above, but requires negated
+ len(c.categories) == 0 && len(c.ranges) == 1 && // multiple ranges and unicode classes represent multiple chars
+ c.sub == nil && // subtraction means we've got multiple chars
+ c.ranges[0].first == c.ranges[0].last // first and last equal means we're just 1 char
+}
+
+func (c CharSet) IsMergeable() bool {
+ return !c.IsNegated() && !c.HasSubtraction()
+}
+
+func (c CharSet) IsNegated() bool {
+ return c.negate
+}
+
+func (c CharSet) HasSubtraction() bool {
+ return c.sub != nil
+}
+
+func (c CharSet) IsEmpty() bool {
+ return len(c.ranges) == 0 && len(c.categories) == 0 && c.sub == nil
+}
+
+func (c *CharSet) addDigit(ecma, negate bool, pattern string) {
+ if ecma {
+ if negate {
+ c.addRanges(NotECMADigitClass().ranges)
+ } else {
+ c.addRanges(ECMADigitClass().ranges)
+ }
+ } else {
+ c.addCategories(category{cat: "Nd", negate: negate})
+ }
+}
+
+func (c *CharSet) addChar(ch rune) {
+ c.addRange(ch, ch)
+}
+
+func (c *CharSet) addSpace(ecma, negate bool) {
+ if ecma {
+ if negate {
+ c.addRanges(NotECMASpaceClass().ranges)
+ } else {
+ c.addRanges(ECMASpaceClass().ranges)
+ }
+ } else {
+ c.addCategories(category{cat: spaceCategoryText, negate: negate})
+ }
+}
+
+func (c *CharSet) addWord(ecma, negate bool) {
+ if ecma {
+ if negate {
+ c.addRanges(NotECMAWordClass().ranges)
+ } else {
+ c.addRanges(ECMAWordClass().ranges)
+ }
+ } else {
+ c.addCategories(category{cat: wordCategoryText, negate: negate})
+ }
+}
+
+// Add set ranges and categories into ours -- no deduping or anything
+func (c *CharSet) addSet(set CharSet) {
+ if c.anything {
+ return
+ }
+ if set.anything {
+ c.makeAnything()
+ return
+ }
+ // just append here to prevent double-canon
+ c.ranges = append(c.ranges, set.ranges...)
+ c.addCategories(set.categories...)
+ c.canonicalize()
+}
+
+func (c *CharSet) makeAnything() {
+ c.anything = true
+ c.categories = []category{}
+ c.ranges = AnyClass().ranges
+}
+
+func (c *CharSet) addCategories(cats ...category) {
+ // don't add dupes and remove positive+negative
+ if c.anything {
+ // if we've had a previous positive+negative group then
+ // just return, we're as broad as we can get
+ return
+ }
+
+ for _, ct := range cats {
+ found := false
+ for _, ct2 := range c.categories {
+ if ct.cat == ct2.cat {
+ if ct.negate != ct2.negate {
+ // oposite negations...this mean we just
+ // take us as anything and move on
+ c.makeAnything()
+ return
+ }
+ found = true
+ break
+ }
+ }
+
+ if !found {
+ c.categories = append(c.categories, ct)
+ }
+ }
+}
+
+// Merges new ranges to our own
+func (c *CharSet) addRanges(ranges []singleRange) {
+ if c.anything {
+ return
+ }
+ c.ranges = append(c.ranges, ranges...)
+ c.canonicalize()
+}
+
+// Merges everything but the new ranges into our own
+func (c *CharSet) addNegativeRanges(ranges []singleRange) {
+ if c.anything {
+ return
+ }
+
+ var hi rune
+
+ // convert incoming ranges into opposites, assume they are in order
+ for _, r := range ranges {
+ if hi < r.first {
+ c.ranges = append(c.ranges, singleRange{hi, r.first - 1})
+ }
+ hi = r.last + 1
+ }
+
+ if hi < utf8.MaxRune {
+ c.ranges = append(c.ranges, singleRange{hi, utf8.MaxRune})
+ }
+
+ c.canonicalize()
+}
+
+func isValidUnicodeCat(catName string) bool {
+ _, ok := unicodeCategories[catName]
+ return ok
+}
+
+func (c *CharSet) addCategory(categoryName string, negate, caseInsensitive bool, pattern string) {
+ if !isValidUnicodeCat(categoryName) {
+ // unknown unicode category, script, or property "blah"
+ panic(fmt.Errorf("Unknown unicode category, script, or property '%v'", categoryName))
+
+ }
+
+ if caseInsensitive && (categoryName == "Ll" || categoryName == "Lu" || categoryName == "Lt") {
+ // when RegexOptions.IgnoreCase is specified then {Ll} {Lu} and {Lt} cases should all match
+ c.addCategories(
+ category{cat: "Ll", negate: negate},
+ category{cat: "Lu", negate: negate},
+ category{cat: "Lt", negate: negate})
+ }
+ c.addCategories(category{cat: categoryName, negate: negate})
+}
+
+func (c *CharSet) addSubtraction(sub *CharSet) {
+ c.sub = sub
+}
+
+func (c *CharSet) addRange(chMin, chMax rune) {
+ c.ranges = append(c.ranges, singleRange{first: chMin, last: chMax})
+ c.canonicalize()
+}
+
+func (c *CharSet) addNamedASCII(name string, negate bool) bool {
+ var rs []singleRange
+
+ switch name {
+ case "alnum":
+ rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
+ case "alpha":
+ rs = []singleRange{singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
+ case "ascii":
+ rs = []singleRange{singleRange{0, 0x7f}}
+ case "blank":
+ rs = []singleRange{singleRange{'\t', '\t'}, singleRange{' ', ' '}}
+ case "cntrl":
+ rs = []singleRange{singleRange{0, 0x1f}, singleRange{0x7f, 0x7f}}
+ case "digit":
+ c.addDigit(false, negate, "")
+ case "graph":
+ rs = []singleRange{singleRange{'!', '~'}}
+ case "lower":
+ rs = []singleRange{singleRange{'a', 'z'}}
+ case "print":
+ rs = []singleRange{singleRange{' ', '~'}}
+ case "punct": //[!-/:-@[-`{-~]
+ rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
+ case "space":
+ c.addSpace(true, negate)
+ case "upper":
+ rs = []singleRange{singleRange{'A', 'Z'}}
+ case "word":
+ c.addWord(true, negate)
+ case "xdigit":
+ rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'F'}, singleRange{'a', 'f'}}
+ default:
+ return false
+ }
+
+ if len(rs) > 0 {
+ if negate {
+ c.addNegativeRanges(rs)
+ } else {
+ c.addRanges(rs)
+ }
+ }
+
+ return true
+}
+
+type singleRangeSorter []singleRange
+
+func (p singleRangeSorter) Len() int { return len(p) }
+func (p singleRangeSorter) Less(i, j int) bool { return p[i].first < p[j].first }
+func (p singleRangeSorter) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
+
+// Logic to reduce a character class to a unique, sorted form.
+func (c *CharSet) canonicalize() {
+ var i, j int
+ var last rune
+
+ //
+ // Find and eliminate overlapping or abutting ranges
+ //
+
+ if len(c.ranges) > 1 {
+ sort.Sort(singleRangeSorter(c.ranges))
+
+ done := false
+
+ for i, j = 1, 0; ; i++ {
+ for last = c.ranges[j].last; ; i++ {
+ if i == len(c.ranges) || last == utf8.MaxRune {
+ done = true
+ break
+ }
+
+ CurrentRange := c.ranges[i]
+ if CurrentRange.first > last+1 {
+ break
+ }
+
+ if last < CurrentRange.last {
+ last = CurrentRange.last
+ }
+ }
+
+ c.ranges[j] = singleRange{first: c.ranges[j].first, last: last}
+
+ j++
+
+ if done {
+ break
+ }
+
+ if j < i {
+ c.ranges[j] = c.ranges[i]
+ }
+ }
+
+ c.ranges = append(c.ranges[:j], c.ranges[len(c.ranges):]...)
+ }
+}
+
+// Adds to the class any lowercase versions of characters already
+// in the class. Used for case-insensitivity.
+func (c *CharSet) addLowercase() {
+ if c.anything {
+ return
+ }
+ toAdd := []singleRange{}
+ for i := 0; i < len(c.ranges); i++ {
+ r := c.ranges[i]
+ if r.first == r.last {
+ lower := unicode.ToLower(r.first)
+ c.ranges[i] = singleRange{first: lower, last: lower}
+ } else {
+ toAdd = append(toAdd, r)
+ }
+ }
+
+ for _, r := range toAdd {
+ c.addLowercaseRange(r.first, r.last)
+ }
+ c.canonicalize()
+}
+
+/**************************************************************************
+ Let U be the set of Unicode character values and let L be the lowercase
+ function, mapping from U to U. To perform case insensitive matching of
+ character sets, we need to be able to map an interval I in U, say
+
+ I = [chMin, chMax] = { ch : chMin <= ch <= chMax }
+
+ to a set A such that A contains L(I) and A is contained in the union of
+ I and L(I).
+
+ The table below partitions U into intervals on which L is non-decreasing.
+ Thus, for any interval J = [a, b] contained in one of these intervals,
+ L(J) is contained in [L(a), L(b)].
+
+ It is also true that for any such J, [L(a), L(b)] is contained in the
+ union of J and L(J). This does not follow from L being non-decreasing on
+ these intervals. It follows from the nature of the L on each interval.
+ On each interval, L has one of the following forms:
+
+ (1) L(ch) = constant (LowercaseSet)
+ (2) L(ch) = ch + offset (LowercaseAdd)
+ (3) L(ch) = ch | 1 (LowercaseBor)
+ (4) L(ch) = ch + (ch & 1) (LowercaseBad)
+
+ It is easy to verify that for any of these forms [L(a), L(b)] is
+ contained in the union of [a, b] and L([a, b]).
+***************************************************************************/
+
+const (
+ LowercaseSet = 0 // Set to arg.
+ LowercaseAdd = 1 // Add arg.
+ LowercaseBor = 2 // Bitwise or with 1.
+ LowercaseBad = 3 // Bitwise and with 1 and add original.
+)
+
+type lcMap struct {
+ chMin, chMax rune
+ op, data int32
+}
+
+var lcTable = []lcMap{
+ lcMap{'\u0041', '\u005A', LowercaseAdd, 32},
+ lcMap{'\u00C0', '\u00DE', LowercaseAdd, 32},
+ lcMap{'\u0100', '\u012E', LowercaseBor, 0},
+ lcMap{'\u0130', '\u0130', LowercaseSet, 0x0069},
+ lcMap{'\u0132', '\u0136', LowercaseBor, 0},
+ lcMap{'\u0139', '\u0147', LowercaseBad, 0},
+ lcMap{'\u014A', '\u0176', LowercaseBor, 0},
+ lcMap{'\u0178', '\u0178', LowercaseSet, 0x00FF},
+ lcMap{'\u0179', '\u017D', LowercaseBad, 0},
+ lcMap{'\u0181', '\u0181', LowercaseSet, 0x0253},
+ lcMap{'\u0182', '\u0184', LowercaseBor, 0},
+ lcMap{'\u0186', '\u0186', LowercaseSet, 0x0254},
+ lcMap{'\u0187', '\u0187', LowercaseSet, 0x0188},
+ lcMap{'\u0189', '\u018A', LowercaseAdd, 205},
+ lcMap{'\u018B', '\u018B', LowercaseSet, 0x018C},
+ lcMap{'\u018E', '\u018E', LowercaseSet, 0x01DD},
+ lcMap{'\u018F', '\u018F', LowercaseSet, 0x0259},
+ lcMap{'\u0190', '\u0190', LowercaseSet, 0x025B},
+ lcMap{'\u0191', '\u0191', LowercaseSet, 0x0192},
+ lcMap{'\u0193', '\u0193', LowercaseSet, 0x0260},
+ lcMap{'\u0194', '\u0194', LowercaseSet, 0x0263},
+ lcMap{'\u0196', '\u0196', LowercaseSet, 0x0269},
+ lcMap{'\u0197', '\u0197', LowercaseSet, 0x0268},
+ lcMap{'\u0198', '\u0198', LowercaseSet, 0x0199},
+ lcMap{'\u019C', '\u019C', LowercaseSet, 0x026F},
+ lcMap{'\u019D', '\u019D', LowercaseSet, 0x0272},
+ lcMap{'\u019F', '\u019F', LowercaseSet, 0x0275},
+ lcMap{'\u01A0', '\u01A4', LowercaseBor, 0},
+ lcMap{'\u01A7', '\u01A7', LowercaseSet, 0x01A8},
+ lcMap{'\u01A9', '\u01A9', LowercaseSet, 0x0283},
+ lcMap{'\u01AC', '\u01AC', LowercaseSet, 0x01AD},
+ lcMap{'\u01AE', '\u01AE', LowercaseSet, 0x0288},
+ lcMap{'\u01AF', '\u01AF', LowercaseSet, 0x01B0},
+ lcMap{'\u01B1', '\u01B2', LowercaseAdd, 217},
+ lcMap{'\u01B3', '\u01B5', LowercaseBad, 0},
+ lcMap{'\u01B7', '\u01B7', LowercaseSet, 0x0292},
+ lcMap{'\u01B8', '\u01B8', LowercaseSet, 0x01B9},
+ lcMap{'\u01BC', '\u01BC', LowercaseSet, 0x01BD},
+ lcMap{'\u01C4', '\u01C5', LowercaseSet, 0x01C6},
+ lcMap{'\u01C7', '\u01C8', LowercaseSet, 0x01C9},
+ lcMap{'\u01CA', '\u01CB', LowercaseSet, 0x01CC},
+ lcMap{'\u01CD', '\u01DB', LowercaseBad, 0},
+ lcMap{'\u01DE', '\u01EE', LowercaseBor, 0},
+ lcMap{'\u01F1', '\u01F2', LowercaseSet, 0x01F3},
+ lcMap{'\u01F4', '\u01F4', LowercaseSet, 0x01F5},
+ lcMap{'\u01FA', '\u0216', LowercaseBor, 0},
+ lcMap{'\u0386', '\u0386', LowercaseSet, 0x03AC},
+ lcMap{'\u0388', '\u038A', LowercaseAdd, 37},
+ lcMap{'\u038C', '\u038C', LowercaseSet, 0x03CC},
+ lcMap{'\u038E', '\u038F', LowercaseAdd, 63},
+ lcMap{'\u0391', '\u03AB', LowercaseAdd, 32},
+ lcMap{'\u03E2', '\u03EE', LowercaseBor, 0},
+ lcMap{'\u0401', '\u040F', LowercaseAdd, 80},
+ lcMap{'\u0410', '\u042F', LowercaseAdd, 32},
+ lcMap{'\u0460', '\u0480', LowercaseBor, 0},
+ lcMap{'\u0490', '\u04BE', LowercaseBor, 0},
+ lcMap{'\u04C1', '\u04C3', LowercaseBad, 0},
+ lcMap{'\u04C7', '\u04C7', LowercaseSet, 0x04C8},
+ lcMap{'\u04CB', '\u04CB', LowercaseSet, 0x04CC},
+ lcMap{'\u04D0', '\u04EA', LowercaseBor, 0},
+ lcMap{'\u04EE', '\u04F4', LowercaseBor, 0},
+ lcMap{'\u04F8', '\u04F8', LowercaseSet, 0x04F9},
+ lcMap{'\u0531', '\u0556', LowercaseAdd, 48},
+ lcMap{'\u10A0', '\u10C5', LowercaseAdd, 48},
+ lcMap{'\u1E00', '\u1EF8', LowercaseBor, 0},
+ lcMap{'\u1F08', '\u1F0F', LowercaseAdd, -8},
+ lcMap{'\u1F18', '\u1F1F', LowercaseAdd, -8},
+ lcMap{'\u1F28', '\u1F2F', LowercaseAdd, -8},
+ lcMap{'\u1F38', '\u1F3F', LowercaseAdd, -8},
+ lcMap{'\u1F48', '\u1F4D', LowercaseAdd, -8},
+ lcMap{'\u1F59', '\u1F59', LowercaseSet, 0x1F51},
+ lcMap{'\u1F5B', '\u1F5B', LowercaseSet, 0x1F53},
+ lcMap{'\u1F5D', '\u1F5D', LowercaseSet, 0x1F55},
+ lcMap{'\u1F5F', '\u1F5F', LowercaseSet, 0x1F57},
+ lcMap{'\u1F68', '\u1F6F', LowercaseAdd, -8},
+ lcMap{'\u1F88', '\u1F8F', LowercaseAdd, -8},
+ lcMap{'\u1F98', '\u1F9F', LowercaseAdd, -8},
+ lcMap{'\u1FA8', '\u1FAF', LowercaseAdd, -8},
+ lcMap{'\u1FB8', '\u1FB9', LowercaseAdd, -8},
+ lcMap{'\u1FBA', '\u1FBB', LowercaseAdd, -74},
+ lcMap{'\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3},
+ lcMap{'\u1FC8', '\u1FCB', LowercaseAdd, -86},
+ lcMap{'\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3},
+ lcMap{'\u1FD8', '\u1FD9', LowercaseAdd, -8},
+ lcMap{'\u1FDA', '\u1FDB', LowercaseAdd, -100},
+ lcMap{'\u1FE8', '\u1FE9', LowercaseAdd, -8},
+ lcMap{'\u1FEA', '\u1FEB', LowercaseAdd, -112},
+ lcMap{'\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5},
+ lcMap{'\u1FF8', '\u1FF9', LowercaseAdd, -128},
+ lcMap{'\u1FFA', '\u1FFB', LowercaseAdd, -126},
+ lcMap{'\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3},
+ lcMap{'\u2160', '\u216F', LowercaseAdd, 16},
+ lcMap{'\u24B6', '\u24D0', LowercaseAdd, 26},
+ lcMap{'\uFF21', '\uFF3A', LowercaseAdd, 32},
+}
+
+func (c *CharSet) addLowercaseRange(chMin, chMax rune) {
+ var i, iMax, iMid int
+ var chMinT, chMaxT rune
+ var lc lcMap
+
+ for i, iMax = 0, len(lcTable); i < iMax; {
+ iMid = (i + iMax) / 2
+ if lcTable[iMid].chMax < chMin {
+ i = iMid + 1
+ } else {
+ iMax = iMid
+ }
+ }
+
+ for ; i < len(lcTable); i++ {
+ lc = lcTable[i]
+ if lc.chMin > chMax {
+ return
+ }
+ chMinT = lc.chMin
+ if chMinT < chMin {
+ chMinT = chMin
+ }
+
+ chMaxT = lc.chMax
+ if chMaxT > chMax {
+ chMaxT = chMax
+ }
+
+ switch lc.op {
+ case LowercaseSet:
+ chMinT = rune(lc.data)
+ chMaxT = rune(lc.data)
+ break
+ case LowercaseAdd:
+ chMinT += lc.data
+ chMaxT += lc.data
+ break
+ case LowercaseBor:
+ chMinT |= 1
+ chMaxT |= 1
+ break
+ case LowercaseBad:
+ chMinT += (chMinT & 1)
+ chMaxT += (chMaxT & 1)
+ break
+ }
+
+ if chMinT < chMin || chMaxT > chMax {
+ c.addRange(chMinT, chMaxT)
+ }
+ }
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/code.go b/vendor/github.com/dlclark/regexp2/syntax/code.go
new file mode 100644
index 0000000000..686e822af8
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/code.go
@@ -0,0 +1,274 @@
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+)
+
+// similar to prog.go in the go regex package...also with comment 'may not belong in this package'
+
+// File provides operator constants for use by the Builder and the Machine.
+
+// Implementation notes:
+//
+// Regexps are built into RegexCodes, which contain an operation array,
+// a string table, and some constants.
+//
+// Each operation is one of the codes below, followed by the integer
+// operands specified for each op.
+//
+// Strings and sets are indices into a string table.
+
+type InstOp int
+
+const (
+ // lef/back operands description
+
+ Onerep InstOp = 0 // lef,back char,min,max a {n}
+ Notonerep = 1 // lef,back char,min,max .{n}
+ Setrep = 2 // lef,back set,min,max [\d]{n}
+
+ Oneloop = 3 // lef,back char,min,max a {,n}
+ Notoneloop = 4 // lef,back char,min,max .{,n}
+ Setloop = 5 // lef,back set,min,max [\d]{,n}
+
+ Onelazy = 6 // lef,back char,min,max a {,n}?
+ Notonelazy = 7 // lef,back char,min,max .{,n}?
+ Setlazy = 8 // lef,back set,min,max [\d]{,n}?
+
+ One = 9 // lef char a
+ Notone = 10 // lef char [^a]
+ Set = 11 // lef set [a-z\s] \w \s \d
+
+ Multi = 12 // lef string abcd
+ Ref = 13 // lef group \#
+
+ Bol = 14 // ^
+ Eol = 15 // $
+ Boundary = 16 // \b
+ Nonboundary = 17 // \B
+ Beginning = 18 // \A
+ Start = 19 // \G
+ EndZ = 20 // \Z
+ End = 21 // \Z
+
+ Nothing = 22 // Reject!
+
+ // Primitive control structures
+
+ Lazybranch = 23 // back jump straight first
+ Branchmark = 24 // back jump branch first for loop
+ Lazybranchmark = 25 // back jump straight first for loop
+ Nullcount = 26 // back val set counter, null mark
+ Setcount = 27 // back val set counter, make mark
+ Branchcount = 28 // back jump,limit branch++ if zero<=c<limit
+ Lazybranchcount = 29 // back jump,limit same, but straight first
+ Nullmark = 30 // back save position
+ Setmark = 31 // back save position
+ Capturemark = 32 // back group define group
+ Getmark = 33 // back recall position
+ Setjump = 34 // back save backtrack state
+ Backjump = 35 // zap back to saved state
+ Forejump = 36 // zap backtracking state
+ Testref = 37 // backtrack if ref undefined
+ Goto = 38 // jump just go
+
+ Prune = 39 // prune it baby
+ Stop = 40 // done!
+
+ ECMABoundary = 41 // \b
+ NonECMABoundary = 42 // \B
+
+ // Modifiers for alternate modes
+
+ Mask = 63 // Mask to get unmodified ordinary operator
+ Rtl = 64 // bit to indicate that we're reverse scanning.
+ Back = 128 // bit to indicate that we're backtracking.
+ Back2 = 256 // bit to indicate that we're backtracking on a second branch.
+ Ci = 512 // bit to indicate that we're case-insensitive.
+)
+
+type Code struct {
+ Codes []int // the code
+ Strings [][]rune // string table
+ Sets []*CharSet //character set table
+ TrackCount int // how many instructions use backtracking
+ Caps map[int]int // mapping of user group numbers -> impl group slots
+ Capsize int // number of impl group slots
+ FcPrefix *Prefix // the set of candidate first characters (may be null)
+ BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null)
+ Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc)
+ RightToLeft bool // true if right to left
+}
+
+func opcodeBacktracks(op InstOp) bool {
+ op &= Mask
+
+ switch op {
+ case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
+ Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
+ Forejump, Goto:
+ return true
+
+ default:
+ return false
+ }
+}
+
+func opcodeSize(op InstOp) int {
+ op &= Mask
+
+ switch op {
+ case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
+ End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
+ return 1
+
+ case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
+ Prune, Set:
+ return 2
+
+ case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
+ Setlazy, Setrep, Setloop:
+ return 3
+
+ default:
+ panic(fmt.Errorf("Unexpected op code: %v", op))
+ }
+}
+
+var codeStr = []string{
+ "Onerep", "Notonerep", "Setrep",
+ "Oneloop", "Notoneloop", "Setloop",
+ "Onelazy", "Notonelazy", "Setlazy",
+ "One", "Notone", "Set",
+ "Multi", "Ref",
+ "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
+ "Nothing",
+ "Lazybranch", "Branchmark", "Lazybranchmark",
+ "Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
+ "Nullmark", "Setmark", "Capturemark", "Getmark",
+ "Setjump", "Backjump", "Forejump", "Testref", "Goto",
+ "Prune", "Stop",
+ "ECMABoundary", "NonECMABoundary",
+}
+
+func operatorDescription(op InstOp) string {
+ desc := codeStr[op&Mask]
+ if (op & Ci) != 0 {
+ desc += "-Ci"
+ }
+ if (op & Rtl) != 0 {
+ desc += "-Rtl"
+ }
+ if (op & Back) != 0 {
+ desc += "-Back"
+ }
+ if (op & Back2) != 0 {
+ desc += "-Back2"
+ }
+
+ return desc
+}
+
+// OpcodeDescription is a humman readable string of the specific offset
+func (c *Code) OpcodeDescription(offset int) string {
+ buf := &bytes.Buffer{}
+
+ op := InstOp(c.Codes[offset])
+ fmt.Fprintf(buf, "%06d ", offset)
+
+ if opcodeBacktracks(op & Mask) {
+ buf.WriteString("*")
+ } else {
+ buf.WriteString(" ")
+ }
+ buf.WriteString(operatorDescription(op))
+ buf.WriteString("(")
+ op &= Mask
+
+ switch op {
+ case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
+ buf.WriteString("Ch = ")
+ buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
+
+ case Set, Setrep, Setloop, Setlazy:
+ buf.WriteString("Set = ")
+ buf.WriteString(c.Sets[c.Codes[offset+1]].String())
+
+ case Multi:
+ fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
+
+ case Ref, Testref:
+ fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
+
+ case Capturemark:
+ fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
+ if c.Codes[offset+2] != -1 {
+ fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
+ }
+
+ case Nullcount, Setcount:
+ fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
+
+ case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
+ fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
+ }
+
+ switch op {
+ case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
+ buf.WriteString(", Rep = ")
+ if c.Codes[offset+2] == math.MaxInt32 {
+ buf.WriteString("inf")
+ } else {
+ fmt.Fprintf(buf, "%d", c.Codes[offset+2])
+ }
+
+ case Branchcount, Lazybranchcount:
+ buf.WriteString(", Limit = ")
+ if c.Codes[offset+2] == math.MaxInt32 {
+ buf.WriteString("inf")
+ } else {
+ fmt.Fprintf(buf, "%d", c.Codes[offset+2])
+ }
+
+ }
+
+ buf.WriteString(")")
+
+ return buf.String()
+}
+
+func (c *Code) Dump() string {
+ buf := &bytes.Buffer{}
+
+ if c.RightToLeft {
+ fmt.Fprintln(buf, "Direction: right-to-left")
+ } else {
+ fmt.Fprintln(buf, "Direction: left-to-right")
+ }
+ if c.FcPrefix == nil {
+ fmt.Fprintln(buf, "Firstchars: n/a")
+ } else {
+ fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
+ }
+
+ if c.BmPrefix == nil {
+ fmt.Fprintln(buf, "Prefix: n/a")
+ } else {
+ fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
+ }
+
+ fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
+ fmt.Fprintln(buf)
+
+ if c.BmPrefix != nil {
+ fmt.Fprintln(buf, "BoyerMoore:")
+ fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
+ }
+ for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
+ fmt.Fprintln(buf, c.OpcodeDescription(i))
+ }
+
+ return buf.String()
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/escape.go b/vendor/github.com/dlclark/regexp2/syntax/escape.go
new file mode 100644
index 0000000000..609df10731
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/escape.go
@@ -0,0 +1,94 @@
+package syntax
+
+import (
+ "bytes"
+ "strconv"
+ "strings"
+ "unicode"
+)
+
+func Escape(input string) string {
+ b := &bytes.Buffer{}
+ for _, r := range input {
+ escape(b, r, false)
+ }
+ return b.String()
+}
+
+const meta = `\.+*?()|[]{}^$# `
+
+func escape(b *bytes.Buffer, r rune, force bool) {
+ if unicode.IsPrint(r) {
+ if strings.IndexRune(meta, r) >= 0 || force {
+ b.WriteRune('\\')
+ }
+ b.WriteRune(r)
+ return
+ }
+
+ switch r {
+ case '\a':
+ b.WriteString(`\a`)
+ case '\f':
+ b.WriteString(`\f`)
+ case '\n':
+ b.WriteString(`\n`)
+ case '\r':
+ b.WriteString(`\r`)
+ case '\t':
+ b.WriteString(`\t`)
+ case '\v':
+ b.WriteString(`\v`)
+ default:
+ if r < 0x100 {
+ b.WriteString(`\x`)
+ s := strconv.FormatInt(int64(r), 16)
+ if len(s) == 1 {
+ b.WriteRune('0')
+ }
+ b.WriteString(s)
+ break
+ }
+ b.WriteString(`\u`)
+ b.WriteString(strconv.FormatInt(int64(r), 16))
+ }
+}
+
+func Unescape(input string) (string, error) {
+ idx := strings.IndexRune(input, '\\')
+ // no slashes means no unescape needed
+ if idx == -1 {
+ return input, nil
+ }
+
+ buf := bytes.NewBufferString(input[:idx])
+ // get the runes for the rest of the string -- we're going full parser scan on this
+
+ p := parser{}
+ p.setPattern(input[idx+1:])
+ for {
+ if p.rightMost() {
+ return "", p.getErr(ErrIllegalEndEscape)
+ }
+ r, err := p.scanCharEscape()
+ if err != nil {
+ return "", err
+ }
+ buf.WriteRune(r)
+ // are we done?
+ if p.rightMost() {
+ return buf.String(), nil
+ }
+
+ r = p.moveRightGetChar()
+ for r != '\\' {
+ buf.WriteRune(r)
+ if p.rightMost() {
+ // we're done, no more slashes
+ return buf.String(), nil
+ }
+ // keep scanning until we get another slash
+ r = p.moveRightGetChar()
+ }
+ }
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/fuzz.go b/vendor/github.com/dlclark/regexp2/syntax/fuzz.go
new file mode 100644
index 0000000000..ee863866db
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/fuzz.go
@@ -0,0 +1,20 @@
+// +build gofuzz
+
+package syntax
+
+// Fuzz is the input point for go-fuzz
+func Fuzz(data []byte) int {
+ sdata := string(data)
+ tree, err := Parse(sdata, RegexOptions(0))
+ if err != nil {
+ return 0
+ }
+
+ // translate it to code
+ _, err = Write(tree)
+ if err != nil {
+ panic(err)
+ }
+
+ return 1
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/parser.go b/vendor/github.com/dlclark/regexp2/syntax/parser.go
new file mode 100644
index 0000000000..f13f779cf6
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/parser.go
@@ -0,0 +1,2202 @@
+package syntax
+
+import (
+ "fmt"
+ "math"
+ "os"
+ "sort"
+ "strconv"
+ "unicode"
+)
+
+type RegexOptions int32
+
+const (
+ IgnoreCase RegexOptions = 0x0001 // "i"
+ Multiline = 0x0002 // "m"
+ ExplicitCapture = 0x0004 // "n"
+ Compiled = 0x0008 // "c"
+ Singleline = 0x0010 // "s"
+ IgnorePatternWhitespace = 0x0020 // "x"
+ RightToLeft = 0x0040 // "r"
+ Debug = 0x0080 // "d"
+ ECMAScript = 0x0100 // "e"
+ RE2 = 0x0200 // RE2 compat mode
+)
+
+func optionFromCode(ch rune) RegexOptions {
+ // case-insensitive
+ switch ch {
+ case 'i', 'I':
+ return IgnoreCase
+ case 'r', 'R':
+ return RightToLeft
+ case 'm', 'M':
+ return Multiline
+ case 'n', 'N':
+ return ExplicitCapture
+ case 's', 'S':
+ return Singleline
+ case 'x', 'X':
+ return IgnorePatternWhitespace
+ case 'd', 'D':
+ return Debug
+ case 'e', 'E':
+ return ECMAScript
+ default:
+ return 0
+ }
+}
+
+// An Error describes a failure to parse a regular expression
+// and gives the offending expression.
+type Error struct {
+ Code ErrorCode
+ Expr string
+ Args []interface{}
+}
+
+func (e *Error) Error() string {
+ if len(e.Args) == 0 {
+ return "error parsing regexp: " + e.Code.String() + " in `" + e.Expr + "`"
+ }
+ return "error parsing regexp: " + fmt.Sprintf(e.Code.String(), e.Args...) + " in `" + e.Expr + "`"
+}
+
+// An ErrorCode describes a failure to parse a regular expression.
+type ErrorCode string
+
+const (
+ // internal issue
+ ErrInternalError ErrorCode = "regexp/syntax: internal error"
+ // Parser errors
+ ErrUnterminatedComment = "unterminated comment"
+ ErrInvalidCharRange = "invalid character class range"
+ ErrInvalidRepeatSize = "invalid repeat count"
+ ErrInvalidUTF8 = "invalid UTF-8"
+ ErrCaptureGroupOutOfRange = "capture group number out of range"
+ ErrUnexpectedParen = "unexpected )"
+ ErrMissingParen = "missing closing )"
+ ErrMissingBrace = "missing closing }"
+ ErrInvalidRepeatOp = "invalid nested repetition operator"
+ ErrMissingRepeatArgument = "missing argument to repetition operator"
+ ErrConditionalExpression = "illegal conditional (?(...)) expression"
+ ErrTooManyAlternates = "too many | in (?()|)"
+ ErrUnrecognizedGrouping = "unrecognized grouping construct: (%v"
+ ErrInvalidGroupName = "invalid group name: group names must begin with a word character and have a matching terminator"
+ ErrCapNumNotZero = "capture number cannot be zero"
+ ErrUndefinedBackRef = "reference to undefined group number %v"
+ ErrUndefinedNameRef = "reference to undefined group name %v"
+ ErrAlternationCantCapture = "alternation conditions do not capture and cannot be named"
+ ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
+ ErrMalformedReference = "(?(%v) ) malformed"
+ ErrUndefinedReference = "(?(%v) ) reference to undefined group"
+ ErrIllegalEndEscape = "illegal \\ at end of pattern"
+ ErrMalformedSlashP = "malformed \\p{X} character escape"
+ ErrIncompleteSlashP = "incomplete \\p{X} character escape"
+ ErrUnknownSlashP = "unknown unicode category, script, or property '%v'"
+ ErrUnrecognizedEscape = "unrecognized escape sequence \\%v"
+ ErrMissingControl = "missing control character"
+ ErrUnrecognizedControl = "unrecognized control character"
+ ErrTooFewHex = "insufficient hexadecimal digits"
+ ErrInvalidHex = "hex values may not be larger than 0x10FFFF"
+ ErrMalformedNameRef = "malformed \\k<...> named back reference"
+ ErrBadClassInCharRange = "cannot include class \\%v in character range"
+ ErrUnterminatedBracket = "unterminated [] set"
+ ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class"
+ ErrReversedCharRange = "[x-y] range in reverse order"
+)
+
+func (e ErrorCode) String() string {
+ return string(e)
+}
+
+type parser struct {
+ stack *regexNode
+ group *regexNode
+ alternation *regexNode
+ concatenation *regexNode
+ unit *regexNode
+
+ patternRaw string
+ pattern []rune
+
+ currentPos int
+ specialCase *unicode.SpecialCase
+
+ autocap int
+ capcount int
+ captop int
+ capsize int
+
+ caps map[int]int
+ capnames map[string]int
+
+ capnumlist []int
+ capnamelist []string
+
+ options RegexOptions
+ optionsStack []RegexOptions
+ ignoreNextParen bool
+}
+
+const (
+ maxValueDiv10 int = math.MaxInt32 / 10
+ maxValueMod10 = math.MaxInt32 % 10
+)
+
+// Parse converts a regex string into a parse tree
+func Parse(re string, op RegexOptions) (*RegexTree, error) {
+ p := parser{
+ options: op,
+ caps: make(map[int]int),
+ }
+ p.setPattern(re)
+
+ if err := p.countCaptures(); err != nil {
+ return nil, err
+ }
+
+ p.reset(op)
+ root, err := p.scanRegex()
+
+ if err != nil {
+ return nil, err
+ }
+ tree := &RegexTree{
+ root: root,
+ caps: p.caps,
+ capnumlist: p.capnumlist,
+ captop: p.captop,
+ Capnames: p.capnames,
+ Caplist: p.capnamelist,
+ options: op,
+ }
+
+ if tree.options&Debug > 0 {
+ os.Stdout.WriteString(tree.Dump())
+ }
+
+ return tree, nil
+}
+
+func (p *parser) setPattern(pattern string) {
+ p.patternRaw = pattern
+ p.pattern = make([]rune, 0, len(pattern))
+
+ //populate our rune array to handle utf8 encoding
+ for _, r := range pattern {
+ p.pattern = append(p.pattern, r)
+ }
+}
+func (p *parser) getErr(code ErrorCode, args ...interface{}) error {
+ return &Error{Code: code, Expr: p.patternRaw, Args: args}
+}
+
+func (p *parser) noteCaptureSlot(i, pos int) {
+ if _, ok := p.caps[i]; !ok {
+ // the rhs of the hashtable isn't used in the parser
+ p.caps[i] = pos
+ p.capcount++
+
+ if p.captop <= i {
+ if i == math.MaxInt32 {
+ p.captop = i
+ } else {
+ p.captop = i + 1
+ }
+ }
+ }
+}
+
+func (p *parser) noteCaptureName(name string, pos int) {
+ if p.capnames == nil {
+ p.capnames = make(map[string]int)
+ }
+
+ if _, ok := p.capnames[name]; !ok {
+ p.capnames[name] = pos
+ p.capnamelist = append(p.capnamelist, name)
+ }
+}
+
+func (p *parser) assignNameSlots() {
+ if p.capnames != nil {
+ for _, name := range p.capnamelist {
+ for p.isCaptureSlot(p.autocap) {
+ p.autocap++
+ }
+ pos := p.capnames[name]
+ p.capnames[name] = p.autocap
+ p.noteCaptureSlot(p.autocap, pos)
+
+ p.autocap++
+ }
+ }
+
+ // if the caps array has at least one gap, construct the list of used slots
+ if p.capcount < p.captop {
+ p.capnumlist = make([]int, p.capcount)
+ i := 0
+
+ for k := range p.caps {
+ p.capnumlist[i] = k
+ i++
+ }
+
+ sort.Ints(p.capnumlist)
+ }
+
+ // merge capsnumlist into capnamelist
+ if p.capnames != nil || p.capnumlist != nil {
+ var oldcapnamelist []string
+ var next int
+ var k int
+
+ if p.capnames == nil {
+ oldcapnamelist = nil
+ p.capnames = make(map[string]int)
+ p.capnamelist = []string{}
+ next = -1
+ } else {
+ oldcapnamelist = p.capnamelist
+ p.capnamelist = []string{}
+ next = p.capnames[oldcapnamelist[0]]
+ }
+
+ for i := 0; i < p.capcount; i++ {
+ j := i
+ if p.capnumlist != nil {
+ j = p.capnumlist[i]
+ }
+
+ if next == j {
+ p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
+ k++
+
+ if k == len(oldcapnamelist) {
+ next = -1
+ } else {
+ next = p.capnames[oldcapnamelist[k]]
+ }
+
+ } else {
+ //feature: culture?
+ str := strconv.Itoa(j)
+ p.capnamelist = append(p.capnamelist, str)
+ p.capnames[str] = j
+ }
+ }
+ }
+}
+
+func (p *parser) consumeAutocap() int {
+ r := p.autocap
+ p.autocap++
+ return r
+}
+
+// CountCaptures is a prescanner for deducing the slots used for
+// captures by doing a partial tokenization of the pattern.
+func (p *parser) countCaptures() error {
+ var ch rune
+
+ p.noteCaptureSlot(0, 0)
+
+ p.autocap = 1
+
+ for p.charsRight() > 0 {
+ pos := p.textpos()
+ ch = p.moveRightGetChar()
+ switch ch {
+ case '\\':
+ if p.charsRight() > 0 {
+ p.scanBackslash(true)
+ }
+
+ case '#':
+ if p.useOptionX() {
+ p.moveLeft()
+ p.scanBlank()
+ }
+
+ case '[':
+ p.scanCharSet(false, true)
+
+ case ')':
+ if !p.emptyOptionsStack() {
+ p.popOptions()
+ }
+
+ case '(':
+ if p.charsRight() >= 2 && p.rightChar(1) == '#' && p.rightChar(0) == '?' {
+ p.moveLeft()
+ p.scanBlank()
+ } else {
+ p.pushOptions()
+ if p.charsRight() > 0 && p.rightChar(0) == '?' {
+ // we have (?...
+ p.moveRight(1)
+
+ if p.charsRight() > 1 && (p.rightChar(0) == '<' || p.rightChar(0) == '\'') {
+ // named group: (?<... or (?'...
+
+ p.moveRight(1)
+ ch = p.rightChar(0)
+
+ if ch != '0' && IsWordChar(ch) {
+ if ch >= '1' && ch <= '9' {
+ dec, err := p.scanDecimal()
+ if err != nil {
+ return err
+ }
+ p.noteCaptureSlot(dec, pos)
+ } else {
+ p.noteCaptureName(p.scanCapname(), pos)
+ }
+ }
+ } else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
+ // RE2-compat (?P<)
+ p.moveRight(2)
+ ch = p.rightChar(0)
+ if IsWordChar(ch) {
+ p.noteCaptureName(p.scanCapname(), pos)
+ }
+
+ } else {
+ // (?...
+
+ // get the options if it's an option construct (?cimsx-cimsx...)
+ p.scanOptions()
+
+ if p.charsRight() > 0 {
+ if p.rightChar(0) == ')' {
+ // (?cimsx-cimsx)
+ p.moveRight(1)
+ p.popKeepOptions()
+ } else if p.rightChar(0) == '(' {
+ // alternation construct: (?(foo)yes|no)
+ // ignore the next paren so we don't capture the condition
+ p.ignoreNextParen = true
+
+ // break from here so we don't reset ignoreNextParen
+ continue
+ }
+ }
+ }
+ } else {
+ if !p.useOptionN() && !p.ignoreNextParen {
+ p.noteCaptureSlot(p.consumeAutocap(), pos)
+ }
+ }
+ }
+
+ p.ignoreNextParen = false
+
+ }
+ }
+
+ p.assignNameSlots()
+ return nil
+}
+
+func (p *parser) reset(topopts RegexOptions) {
+ p.currentPos = 0
+ p.autocap = 1
+ p.ignoreNextParen = false
+
+ if len(p.optionsStack) > 0 {
+ p.optionsStack = p.optionsStack[:0]
+ }
+
+ p.options = topopts
+ p.stack = nil
+}
+
+func (p *parser) scanRegex() (*regexNode, error) {
+ ch := '@' // nonspecial ch, means at beginning
+ isQuant := false
+
+ p.startGroup(newRegexNodeMN(ntCapture, p.options, 0, -1))
+
+ for p.charsRight() > 0 {
+ wasPrevQuantifier := isQuant
+ isQuant = false
+
+ if err := p.scanBlank(); err != nil {
+ return nil, err
+ }
+
+ startpos := p.textpos()
+
+ // move past all of the normal characters. We'll stop when we hit some kind of control character,
+ // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
+ if p.useOptionX() {
+ for p.charsRight() > 0 {
+ ch = p.rightChar(0)
+ //UGLY: clean up, this is ugly
+ if !(!isStopperX(ch) || (ch == '{' && !p.isTrueQuantifier())) {
+ break
+ }
+ p.moveRight(1)
+ }
+ } else {
+ for p.charsRight() > 0 {
+ ch = p.rightChar(0)
+ if !(!isSpecial(ch) || ch == '{' && !p.isTrueQuantifier()) {
+ break
+ }
+ p.moveRight(1)
+ }
+ }
+
+ endpos := p.textpos()
+
+ p.scanBlank()
+
+ if p.charsRight() == 0 {
+ ch = '!' // nonspecial, means at end
+ } else if ch = p.rightChar(0); isSpecial(ch) {
+ isQuant = isQuantifier(ch)
+ p.moveRight(1)
+ } else {
+ ch = ' ' // nonspecial, means at ordinary char
+ }
+
+ if startpos < endpos {
+ cchUnquantified := endpos - startpos
+ if isQuant {
+ cchUnquantified--
+ }
+ wasPrevQuantifier = false
+
+ if cchUnquantified > 0 {
+ p.addToConcatenate(startpos, cchUnquantified, false)
+ }
+
+ if isQuant {
+ p.addUnitOne(p.charAt(endpos - 1))
+ }
+ }
+
+ switch ch {
+ case '!':
+ goto BreakOuterScan
+
+ case ' ':
+ goto ContinueOuterScan
+
+ case '[':
+ cc, err := p.scanCharSet(p.useOptionI(), false)
+ if err != nil {
+ return nil, err
+ }
+ p.addUnitSet(cc)
+
+ case '(':
+ p.pushOptions()
+
+ if grouper, err := p.scanGroupOpen(); err != nil {
+ return nil, err
+ } else if grouper == nil {
+ p.popKeepOptions()
+ } else {
+ p.pushGroup()
+ p.startGroup(grouper)
+ }
+
+ continue
+
+ case '|':
+ p.addAlternate()
+ goto ContinueOuterScan
+
+ case ')':
+ if p.emptyStack() {
+ return nil, p.getErr(ErrUnexpectedParen)
+ }
+
+ if err := p.addGroup(); err != nil {
+ return nil, err
+ }
+ if err := p.popGroup(); err != nil {
+ return nil, err
+ }
+ p.popOptions()
+
+ if p.unit == nil {
+ goto ContinueOuterScan
+ }
+
+ case '\\':
+ n, err := p.scanBackslash(false)
+ if err != nil {
+ return nil, err
+ }
+ p.addUnitNode(n)
+
+ case '^':
+ if p.useOptionM() {
+ p.addUnitType(ntBol)
+ } else {
+ p.addUnitType(ntBeginning)
+ }
+
+ case '$':
+ if p.useOptionM() {
+ p.addUnitType(ntEol)
+ } else {
+ p.addUnitType(ntEndZ)
+ }
+
+ case '.':
+ if p.useOptionE() {
+ p.addUnitSet(ECMAAnyClass())
+ } else if p.useOptionS() {
+ p.addUnitSet(AnyClass())
+ } else {
+ p.addUnitNotone('\n')
+ }
+
+ case '{', '*', '+', '?':
+ if p.unit == nil {
+ if wasPrevQuantifier {
+ return nil, p.getErr(ErrInvalidRepeatOp)
+ } else {
+ return nil, p.getErr(ErrMissingRepeatArgument)
+ }
+ }
+ p.moveLeft()
+
+ default:
+ return nil, p.getErr(ErrInternalError)
+ }
+
+ if err := p.scanBlank(); err != nil {
+ return nil, err
+ }
+
+ if p.charsRight() > 0 {
+ isQuant = p.isTrueQuantifier()
+ }
+ if p.charsRight() == 0 || !isQuant {
+ //maintain odd C# assignment order -- not sure if required, could clean up?
+ p.addConcatenate()
+ goto ContinueOuterScan
+ }
+
+ ch = p.moveRightGetChar()
+
+ // Handle quantifiers
+ for p.unit != nil {
+ var min, max int
+ var lazy bool
+
+ switch ch {
+ case '*':
+ min = 0
+ max = math.MaxInt32
+
+ case '?':
+ min = 0
+ max = 1
+
+ case '+':
+ min = 1
+ max = math.MaxInt32
+
+ case '{':
+ {
+ var err error
+ startpos = p.textpos()
+ if min, err = p.scanDecimal(); err != nil {
+ return nil, err
+ }
+ max = min
+ if startpos < p.textpos() {
+ if p.charsRight() > 0 && p.rightChar(0) == ',' {
+ p.moveRight(1)
+ if p.charsRight() == 0 || p.rightChar(0) == '}' {
+ max = math.MaxInt32
+ } else {
+ if max, err = p.scanDecimal(); err != nil {
+ return nil, err
+ }
+ }
+ }
+ }
+
+ if startpos == p.textpos() || p.charsRight() == 0 || p.moveRightGetChar() != '}' {
+ p.addConcatenate()
+ p.textto(startpos - 1)
+ goto ContinueOuterScan
+ }
+ }
+
+ default:
+ return nil, p.getErr(ErrInternalError)
+ }
+
+ if err := p.scanBlank(); err != nil {
+ return nil, err
+ }
+
+ if p.charsRight() == 0 || p.rightChar(0) != '?' {
+ lazy = false
+ } else {
+ p.moveRight(1)
+ lazy = true
+ }
+
+ if min > max {
+ return nil, p.getErr(ErrInvalidRepeatSize)
+ }
+
+ p.addConcatenate3(lazy, min, max)
+ }
+
+ ContinueOuterScan:
+ }
+
+BreakOuterScan:
+ ;
+
+ if !p.emptyStack() {
+ return nil, p.getErr(ErrMissingParen)
+ }
+
+ if err := p.addGroup(); err != nil {
+ return nil, err
+ }
+
+ return p.unit, nil
+
+}
+
+/*
+ * Simple parsing for replacement patterns
+ */
+func (p *parser) scanReplacement() (*regexNode, error) {
+ var c, startpos int
+
+ p.concatenation = newRegexNode(ntConcatenate, p.options)
+
+ for {
+ c = p.charsRight()
+ if c == 0 {
+ break
+ }
+
+ startpos = p.textpos()
+
+ for c > 0 && p.rightChar(0) != '$' {
+ p.moveRight(1)
+ c--
+ }
+
+ p.addToConcatenate(startpos, p.textpos()-startpos, true)
+
+ if c > 0 {
+ if p.moveRightGetChar() == '$' {
+ n, err := p.scanDollar()
+ if err != nil {
+ return nil, err
+ }
+ p.addUnitNode(n)
+ }
+ p.addConcatenate()
+ }
+ }
+
+ return p.concatenation, nil
+}
+
+/*
+ * Scans $ patterns recognized within replacement patterns
+ */
+func (p *parser) scanDollar() (*regexNode, error) {
+ if p.charsRight() == 0 {
+ return newRegexNodeCh(ntOne, p.options, '$'), nil
+ }
+
+ ch := p.rightChar(0)
+ angled := false
+ backpos := p.textpos()
+ lastEndPos := backpos
+
+ // Note angle
+
+ if ch == '{' && p.charsRight() > 1 {
+ angled = true
+ p.moveRight(1)
+ ch = p.rightChar(0)
+ }
+
+ // Try to parse backreference: \1 or \{1} or \{cap}
+
+ if ch >= '0' && ch <= '9' {
+ if !angled && p.useOptionE() {
+ capnum := -1
+ newcapnum := int(ch - '0')
+ p.moveRight(1)
+ if p.isCaptureSlot(newcapnum) {
+ capnum = newcapnum
+ lastEndPos = p.textpos()
+ }
+
+ for p.charsRight() > 0 {
+ ch = p.rightChar(0)
+ if ch < '0' || ch > '9' {
+ break
+ }
+ digit := int(ch - '0')
+ if newcapnum > maxValueDiv10 || (newcapnum == maxValueDiv10 && digit > maxValueMod10) {
+ return nil, p.getErr(ErrCaptureGroupOutOfRange)
+ }
+
+ newcapnum = newcapnum*10 + digit
+
+ p.moveRight(1)
+ if p.isCaptureSlot(newcapnum) {
+ capnum = newcapnum
+ lastEndPos = p.textpos()
+ }
+ }
+ p.textto(lastEndPos)
+ if capnum >= 0 {
+ return newRegexNodeM(ntRef, p.options, capnum), nil
+ }
+ } else {
+ capnum, err := p.scanDecimal()
+ if err != nil {
+ return nil, err
+ }
+ if !angled || p.charsRight() > 0 && p.moveRightGetChar() == '}' {
+ if p.isCaptureSlot(capnum) {
+ return newRegexNodeM(ntRef, p.options, capnum), nil
+ }
+ }
+ }
+ } else if angled && IsWordChar(ch) {
+ capname := p.scanCapname()
+
+ if p.charsRight() > 0 && p.moveRightGetChar() == '}' {
+ if p.isCaptureName(capname) {
+ return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
+ }
+ }
+ } else if !angled {
+ capnum := 1
+
+ switch ch {
+ case '$':
+ p.moveRight(1)
+ return newRegexNodeCh(ntOne, p.options, '$'), nil
+ case '&':
+ capnum = 0
+ case '`':
+ capnum = replaceLeftPortion
+ case '\'':
+ capnum = replaceRightPortion
+ case '+':
+ capnum = replaceLastGroup
+ case '_':
+ capnum = replaceWholeString
+ }
+
+ if capnum != 1 {
+ p.moveRight(1)
+ return newRegexNodeM(ntRef, p.options, capnum), nil
+ }
+ }
+
+ // unrecognized $: literalize
+
+ p.textto(backpos)
+ return newRegexNodeCh(ntOne, p.options, '$'), nil
+}
+
+// scanGroupOpen scans chars following a '(' (not counting the '('), and returns
+// a RegexNode for the type of group scanned, or nil if the group
+// simply changed options (?cimsx-cimsx) or was a comment (#...).
+func (p *parser) scanGroupOpen() (*regexNode, error) {
+ var ch rune
+ var nt nodeType
+ var err error
+ close := '>'
+ start := p.textpos()
+
+ // just return a RegexNode if we have:
+ // 1. "(" followed by nothing
+ // 2. "(x" where x != ?
+ // 3. "(?)"
+ if p.charsRight() == 0 || p.rightChar(0) != '?' || (p.rightChar(0) == '?' && (p.charsRight() > 1 && p.rightChar(1) == ')')) {
+ if p.useOptionN() || p.ignoreNextParen {
+ p.ignoreNextParen = false
+ return newRegexNode(ntGroup, p.options), nil
+ }
+ return newRegexNodeMN(ntCapture, p.options, p.consumeAutocap(), -1), nil
+ }
+
+ p.moveRight(1)
+
+ for {
+ if p.charsRight() == 0 {
+ break
+ }
+
+ switch ch = p.moveRightGetChar(); ch {
+ case ':':
+ nt = ntGroup
+
+ case '=':
+ p.options &= ^RightToLeft
+ nt = ntRequire
+
+ case '!':
+ p.options &= ^RightToLeft
+ nt = ntPrevent
+
+ case '>':
+ nt = ntGreedy
+
+ case '\'':
+ close = '\''
+ fallthrough
+
+ case '<':
+ if p.charsRight() == 0 {
+ goto BreakRecognize
+ }
+
+ switch ch = p.moveRightGetChar(); ch {
+ case '=':
+ if close == '\'' {
+ goto BreakRecognize
+ }
+
+ p.options |= RightToLeft
+ nt = ntRequire
+
+ case '!':
+ if close == '\'' {
+ goto BreakRecognize
+ }
+
+ p.options |= RightToLeft
+ nt = ntPrevent
+
+ default:
+ p.moveLeft()
+ capnum := -1
+ uncapnum := -1
+ proceed := false
+
+ // grab part before -
+
+ if ch >= '0' && ch <= '9' {
+ if capnum, err = p.scanDecimal(); err != nil {
+ return nil, err
+ }
+
+ if !p.isCaptureSlot(capnum) {
+ capnum = -1
+ }
+
+ // check if we have bogus characters after the number
+ if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ if capnum == 0 {
+ return nil, p.getErr(ErrCapNumNotZero)
+ }
+ } else if IsWordChar(ch) {
+ capname := p.scanCapname()
+
+ if p.isCaptureName(capname) {
+ capnum = p.captureSlotFromName(capname)
+ }
+
+ // check if we have bogus character after the name
+ if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ } else if ch == '-' {
+ proceed = true
+ } else {
+ // bad group name - starts with something other than a word character and isn't a number
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+
+ // grab part after - if any
+
+ if (capnum != -1 || proceed == true) && p.charsRight() > 0 && p.rightChar(0) == '-' {
+ p.moveRight(1)
+
+ //no more chars left, no closing char, etc
+ if p.charsRight() == 0 {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+
+ ch = p.rightChar(0)
+ if ch >= '0' && ch <= '9' {
+ if uncapnum, err = p.scanDecimal(); err != nil {
+ return nil, err
+ }
+
+ if !p.isCaptureSlot(uncapnum) {
+ return nil, p.getErr(ErrUndefinedBackRef, uncapnum)
+ }
+
+ // check if we have bogus characters after the number
+ if p.charsRight() > 0 && p.rightChar(0) != close {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ } else if IsWordChar(ch) {
+ uncapname := p.scanCapname()
+
+ if !p.isCaptureName(uncapname) {
+ return nil, p.getErr(ErrUndefinedNameRef, uncapname)
+ }
+ uncapnum = p.captureSlotFromName(uncapname)
+
+ // check if we have bogus character after the name
+ if p.charsRight() > 0 && p.rightChar(0) != close {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ } else {
+ // bad group name - starts with something other than a word character and isn't a number
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ }
+
+ // actually make the node
+
+ if (capnum != -1 || uncapnum != -1) && p.charsRight() > 0 && p.moveRightGetChar() == close {
+ return newRegexNodeMN(ntCapture, p.options, capnum, uncapnum), nil
+ }
+ goto BreakRecognize
+ }
+
+ case '(':
+ // alternation construct (?(...) | )
+
+ parenPos := p.textpos()
+ if p.charsRight() > 0 {
+ ch = p.rightChar(0)
+
+ // check if the alternation condition is a backref
+ if ch >= '0' && ch <= '9' {
+ var capnum int
+ if capnum, err = p.scanDecimal(); err != nil {
+ return nil, err
+ }
+ if p.charsRight() > 0 && p.moveRightGetChar() == ')' {
+ if p.isCaptureSlot(capnum) {
+ return newRegexNodeM(ntTestref, p.options, capnum), nil
+ }
+ return nil, p.getErr(ErrUndefinedReference, capnum)
+ }
+
+ return nil, p.getErr(ErrMalformedReference, capnum)
+
+ } else if IsWordChar(ch) {
+ capname := p.scanCapname()
+
+ if p.isCaptureName(capname) && p.charsRight() > 0 && p.moveRightGetChar() == ')' {
+ return newRegexNodeM(ntTestref, p.options, p.captureSlotFromName(capname)), nil
+ }
+ }
+ }
+ // not a backref
+ nt = ntTestgroup
+ p.textto(parenPos - 1) // jump to the start of the parentheses
+ p.ignoreNextParen = true // but make sure we don't try to capture the insides
+
+ charsRight := p.charsRight()
+ if charsRight >= 3 && p.rightChar(1) == '?' {
+ rightchar2 := p.rightChar(2)
+ // disallow comments in the condition
+ if rightchar2 == '#' {
+ return nil, p.getErr(ErrAlternationCantHaveComment)
+ }
+
+ // disallow named capture group (?<..>..) in the condition
+ if rightchar2 == '\'' {
+ return nil, p.getErr(ErrAlternationCantCapture)
+ }
+
+ if charsRight >= 4 && (rightchar2 == '<' && p.rightChar(3) != '!' && p.rightChar(3) != '=') {
+ return nil, p.getErr(ErrAlternationCantCapture)
+ }
+ }
+
+ case 'P':
+ if p.useRE2() {
+ // support for P<name> syntax
+ if p.charsRight() < 3 {
+ goto BreakRecognize
+ }
+
+ ch = p.moveRightGetChar()
+ if ch != '<' {
+ goto BreakRecognize
+ }
+
+ ch = p.moveRightGetChar()
+ p.moveLeft()
+
+ if IsWordChar(ch) {
+ capnum := -1
+ capname := p.scanCapname()
+
+ if p.isCaptureName(capname) {
+ capnum = p.captureSlotFromName(capname)
+ }
+
+ // check if we have bogus character after the name
+ if p.charsRight() > 0 && p.rightChar(0) != '>' {
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+
+ // actually make the node
+
+ if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
+ return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
+ }
+ goto BreakRecognize
+
+ } else {
+ // bad group name - starts with something other than a word character and isn't a number
+ return nil, p.getErr(ErrInvalidGroupName)
+ }
+ }
+ // if we're not using RE2 compat mode then
+ // we just behave like normal
+ fallthrough
+
+ default:
+ p.moveLeft()
+
+ nt = ntGroup
+ // disallow options in the children of a testgroup node
+ if p.group.t != ntTestgroup {
+ p.scanOptions()
+ }
+ if p.charsRight() == 0 {
+ goto BreakRecognize
+ }
+
+ if ch = p.moveRightGetChar(); ch == ')' {
+ return nil, nil
+ }
+
+ if ch != ':' {
+ goto BreakRecognize
+ }
+
+ }
+
+ return newRegexNode(nt, p.options), nil
+ }
+
+BreakRecognize:
+
+ // break Recognize comes here
+
+ return nil, p.getErr(ErrUnrecognizedGrouping, string(p.pattern[start:p.textpos()]))
+}
+
+// scans backslash specials and basics
+func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
+
+ if p.charsRight() == 0 {
+ return nil, p.getErr(ErrIllegalEndEscape)
+ }
+
+ switch ch := p.rightChar(0); ch {
+ case 'b', 'B', 'A', 'G', 'Z', 'z':
+ p.moveRight(1)
+ return newRegexNode(p.typeFromCode(ch), p.options), nil
+
+ case 'w':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, WordClass()), nil
+
+ case 'W':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
+
+ case 's':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil
+
+ case 'S':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil
+
+ case 'd':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, DigitClass()), nil
+
+ case 'D':
+ p.moveRight(1)
+ if p.useOptionE() {
+ return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
+ }
+ return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
+
+ case 'p', 'P':
+ p.moveRight(1)
+ prop, err := p.parseProperty()
+ if err != nil {
+ return nil, err
+ }
+ cc := &CharSet{}
+ cc.addCategory(prop, (ch != 'p'), p.useOptionI(), p.patternRaw)
+ if p.useOptionI() {
+ cc.addLowercase()
+ }
+
+ return newRegexNodeSet(ntSet, p.options, cc), nil
+
+ default:
+ return p.scanBasicBackslash(scanOnly)
+ }
+}
+
+// Scans \-style backreferences and character escapes
+func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
+ if p.charsRight() == 0 {
+ return nil, p.getErr(ErrIllegalEndEscape)
+ }
+ angled := false
+ close := '\x00'
+
+ backpos := p.textpos()
+ ch := p.rightChar(0)
+
+ // allow \k<foo> instead of \<foo>, which is now deprecated
+
+ if ch == 'k' {
+ if p.charsRight() >= 2 {
+ p.moveRight(1)
+ ch = p.moveRightGetChar()
+
+ if ch == '<' || ch == '\'' {
+ angled = true
+ if ch == '\'' {
+ close = '\''
+ } else {
+ close = '>'
+ }
+ }
+ }
+
+ if !angled || p.charsRight() <= 0 {
+ return nil, p.getErr(ErrMalformedNameRef)
+ }
+
+ ch = p.rightChar(0)
+
+ } else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
+ angled = true
+ if ch == '\'' {
+ close = '\''
+ } else {
+ close = '>'
+ }
+
+ p.moveRight(1)
+ ch = p.rightChar(0)
+ }
+
+ // Try to parse backreference: \<1> or \<cap>
+
+ if angled && ch >= '0' && ch <= '9' {
+ capnum, err := p.scanDecimal()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.charsRight() > 0 && p.moveRightGetChar() == close {
+ if p.isCaptureSlot(capnum) {
+ return newRegexNodeM(ntRef, p.options, capnum), nil
+ }
+ return nil, p.getErr(ErrUndefinedBackRef, capnum)
+ }
+ } else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
+ capnum, err := p.scanDecimal()
+ if err != nil {
+ return nil, err
+ }
+
+ if scanOnly {
+ return nil, nil
+ }
+
+ if p.useOptionE() || p.isCaptureSlot(capnum) {
+ return newRegexNodeM(ntRef, p.options, capnum), nil
+ }
+ if capnum <= 9 {
+ return nil, p.getErr(ErrUndefinedBackRef, capnum)
+ }
+
+ } else if angled && IsWordChar(ch) {
+ capname := p.scanCapname()
+
+ if p.charsRight() > 0 && p.moveRightGetChar() == close {
+ if p.isCaptureName(capname) {
+ return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
+ }
+ return nil, p.getErr(ErrUndefinedNameRef, capname)
+ }
+ }
+
+ // Not backreference: must be char code
+
+ p.textto(backpos)
+ ch, err := p.scanCharEscape()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.useOptionI() {
+ ch = unicode.ToLower(ch)
+ }
+
+ return newRegexNodeCh(ntOne, p.options, ch), nil
+}
+
+// Scans X for \p{X} or \P{X}
+func (p *parser) parseProperty() (string, error) {
+ if p.charsRight() < 3 {
+ return "", p.getErr(ErrIncompleteSlashP)
+ }
+ ch := p.moveRightGetChar()
+ if ch != '{' {
+ return "", p.getErr(ErrMalformedSlashP)
+ }
+
+ startpos := p.textpos()
+ for p.charsRight() > 0 {
+ ch = p.moveRightGetChar()
+ if !(IsWordChar(ch) || ch == '-') {
+ p.moveLeft()
+ break
+ }
+ }
+ capname := string(p.pattern[startpos:p.textpos()])
+
+ if p.charsRight() == 0 || p.moveRightGetChar() != '}' {
+ return "", p.getErr(ErrIncompleteSlashP)
+ }
+
+ if !isValidUnicodeCat(capname) {
+ return "", p.getErr(ErrUnknownSlashP, capname)
+ }
+
+ return capname, nil
+}
+
+// Returns ReNode type for zero-length assertions with a \ code.
+func (p *parser) typeFromCode(ch rune) nodeType {
+ switch ch {
+ case 'b':
+ if p.useOptionE() {
+ return ntECMABoundary
+ }
+ return ntBoundary
+ case 'B':
+ if p.useOptionE() {
+ return ntNonECMABoundary
+ }
+ return ntNonboundary
+ case 'A':
+ return ntBeginning
+ case 'G':
+ return ntStart
+ case 'Z':
+ return ntEndZ
+ case 'z':
+ return ntEnd
+ default:
+ return ntNothing
+ }
+}
+
+// Scans whitespace or x-mode comments.
+func (p *parser) scanBlank() error {
+ if p.useOptionX() {
+ for {
+ for p.charsRight() > 0 && isSpace(p.rightChar(0)) {
+ p.moveRight(1)
+ }
+
+ if p.charsRight() == 0 {
+ break
+ }
+
+ if p.rightChar(0) == '#' {
+ for p.charsRight() > 0 && p.rightChar(0) != '\n' {
+ p.moveRight(1)
+ }
+ } else if p.charsRight() >= 3 && p.rightChar(2) == '#' &&
+ p.rightChar(1) == '?' && p.rightChar(0) == '(' {
+ for p.charsRight() > 0 && p.rightChar(0) != ')' {
+ p.moveRight(1)
+ }
+ if p.charsRight() == 0 {
+ return p.getErr(ErrUnterminatedComment)
+ }
+ p.moveRight(1)
+ } else {
+ break
+ }
+ }
+ } else {
+ for {
+ if p.charsRight() < 3 || p.rightChar(2) != '#' ||
+ p.rightChar(1) != '?' || p.rightChar(0) != '(' {
+ return nil
+ }
+
+ for p.charsRight() > 0 && p.rightChar(0) != ')' {
+ p.moveRight(1)
+ }
+ if p.charsRight() == 0 {
+ return p.getErr(ErrUnterminatedComment)
+ }
+ p.moveRight(1)
+ }
+ }
+ return nil
+}
+
+func (p *parser) scanCapname() string {
+ startpos := p.textpos()
+
+ for p.charsRight() > 0 {
+ if !IsWordChar(p.moveRightGetChar()) {
+ p.moveLeft()
+ break
+ }
+ }
+
+ return string(p.pattern[startpos:p.textpos()])
+}
+
+//Scans contents of [] (not including []'s), and converts to a set.
+func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
+ ch := '\x00'
+ chPrev := '\x00'
+ inRange := false
+ firstChar := true
+ closed := false
+
+ var cc *CharSet
+ if !scanOnly {
+ cc = &CharSet{}
+ }
+
+ if p.charsRight() > 0 && p.rightChar(0) == '^' {
+ p.moveRight(1)
+ if !scanOnly {
+ cc.negate = true
+ }
+ }
+
+ for ; p.charsRight() > 0; firstChar = false {
+ fTranslatedChar := false
+ ch = p.moveRightGetChar()
+ if ch == ']' {
+ if !firstChar {
+ closed = true
+ break
+ } else if p.useOptionE() {
+ if !scanOnly {
+ cc.addRanges(NoneClass().ranges)
+ }
+ closed = true
+ break
+ }
+
+ } else if ch == '\\' && p.charsRight() > 0 {
+ switch ch = p.moveRightGetChar(); ch {
+ case 'D', 'd':
+ if !scanOnly {
+ if inRange {
+ return nil, p.getErr(ErrBadClassInCharRange, ch)
+ }
+ cc.addDigit(p.useOptionE(), ch == 'D', p.patternRaw)
+ }
+ continue
+
+ case 'S', 's':
+ if !scanOnly {
+ if inRange {
+ return nil, p.getErr(ErrBadClassInCharRange, ch)
+ }
+ cc.addSpace(p.useOptionE(), ch == 'S')
+ }
+ continue
+
+ case 'W', 'w':
+ if !scanOnly {
+ if inRange {
+ return nil, p.getErr(ErrBadClassInCharRange, ch)
+ }
+
+ cc.addWord(p.useOptionE(), ch == 'W')
+ }
+ continue
+
+ case 'p', 'P':
+ if !scanOnly {
+ if inRange {
+ return nil, p.getErr(ErrBadClassInCharRange, ch)
+ }
+ prop, err := p.parseProperty()
+ if err != nil {
+ return nil, err
+ }
+ cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
+ } else {
+ p.parseProperty()
+ }
+
+ continue
+
+ case '-':
+ if !scanOnly {
+ cc.addRange(ch, ch)
+ }
+ continue
+
+ default:
+ p.moveLeft()
+ var err error
+ ch, err = p.scanCharEscape() // non-literal character
+ if err != nil {
+ return nil, err
+ }
+ fTranslatedChar = true
+ break // this break will only break out of the switch
+ }
+ } else if ch == '[' {
+ // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
+ // It currently doesn't do anything other than skip the whole thing!
+ if p.charsRight() > 0 && p.rightChar(0) == ':' && !inRange {
+ savePos := p.textpos()
+
+ p.moveRight(1)
+ negate := false
+ if p.charsRight() > 1 && p.rightChar(0) == '^' {
+ negate = true
+ p.moveRight(1)
+ }
+
+ nm := p.scanCapname() // snag the name
+ if !scanOnly && p.useRE2() {
+ // look up the name since these are valid for RE2
+ // add the group based on the name
+ if ok := cc.addNamedASCII(nm, negate); !ok {
+ return nil, p.getErr(ErrInvalidCharRange)
+ }
+ }
+ if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
+ p.textto(savePos)
+ } else if p.useRE2() {
+ // move on
+ continue
+ }
+ }
+ }
+
+ if inRange {
+ inRange = false
+ if !scanOnly {
+ if ch == '[' && !fTranslatedChar && !firstChar {
+ // We thought we were in a range, but we're actually starting a subtraction.
+ // In that case, we'll add chPrev to our char class, skip the opening [, and
+ // scan the new character class recursively.
+ cc.addChar(chPrev)
+ sub, err := p.scanCharSet(caseInsensitive, false)
+ if err != nil {
+ return nil, err
+ }
+ cc.addSubtraction(sub)
+
+ if p.charsRight() > 0 && p.rightChar(0) != ']' {
+ return nil, p.getErr(ErrSubtractionMustBeLast)
+ }
+ } else {
+ // a regular range, like a-z
+ if chPrev > ch {
+ return nil, p.getErr(ErrReversedCharRange)
+ }
+ cc.addRange(chPrev, ch)
+ }
+ }
+ } else if p.charsRight() >= 2 && p.rightChar(0) == '-' && p.rightChar(1) != ']' {
+ // this could be the start of a range
+ chPrev = ch
+ inRange = true
+ p.moveRight(1)
+ } else if p.charsRight() >= 1 && ch == '-' && !fTranslatedChar && p.rightChar(0) == '[' && !firstChar {
+ // we aren't in a range, and now there is a subtraction. Usually this happens
+ // only when a subtraction follows a range, like [a-z-[b]]
+ if !scanOnly {
+ p.moveRight(1)
+ sub, err := p.scanCharSet(caseInsensitive, false)
+ if err != nil {
+ return nil, err
+ }
+ cc.addSubtraction(sub)
+
+ if p.charsRight() > 0 && p.rightChar(0) != ']' {
+ return nil, p.getErr(ErrSubtractionMustBeLast)
+ }
+ } else {
+ p.moveRight(1)
+ p.scanCharSet(caseInsensitive, true)
+ }
+ } else {
+ if !scanOnly {
+ cc.addRange(ch, ch)
+ }
+ }
+ }
+
+ if !closed {
+ return nil, p.getErr(ErrUnterminatedBracket)
+ }
+
+ if !scanOnly && caseInsensitive {
+ cc.addLowercase()
+ }
+
+ return cc, nil
+}
+
+// Scans any number of decimal digits (pegs value at 2^31-1 if too large)
+func (p *parser) scanDecimal() (int, error) {
+ i := 0
+ var d int
+
+ for p.charsRight() > 0 {
+ d = int(p.rightChar(0) - '0')
+ if d < 0 || d > 9 {
+ break
+ }
+ p.moveRight(1)
+
+ if i > maxValueDiv10 || (i == maxValueDiv10 && d > maxValueMod10) {
+ return 0, p.getErr(ErrCaptureGroupOutOfRange)
+ }
+
+ i *= 10
+ i += d
+ }
+
+ return int(i), nil
+}
+
+// Returns true for options allowed only at the top level
+func isOnlyTopOption(option RegexOptions) bool {
+ return option == RightToLeft || option == ECMAScript || option == RE2
+}
+
+// Scans cimsx-cimsx option string, stops at the first unrecognized char.
+func (p *parser) scanOptions() {
+
+ for off := false; p.charsRight() > 0; p.moveRight(1) {
+ ch := p.rightChar(0)
+
+ if ch == '-' {
+ off = true
+ } else if ch == '+' {
+ off = false
+ } else {
+ option := optionFromCode(ch)
+ if option == 0 || isOnlyTopOption(option) {
+ return
+ }
+
+ if off {
+ p.options &= ^option
+ } else {
+ p.options |= option
+ }
+ }
+ }
+}
+
+// Scans \ code for escape codes that map to single unicode chars.
+func (p *parser) scanCharEscape() (rune, error) {
+
+ ch := p.moveRightGetChar()
+
+ if ch >= '0' && ch <= '7' {
+ p.moveLeft()
+ return p.scanOctal(), nil
+ }
+
+ switch ch {
+ case 'x':
+ // support for \x{HEX} syntax from Perl and PCRE
+ if p.charsRight() > 0 && p.rightChar(0) == '{' {
+ p.moveRight(1)
+ return p.scanHexUntilBrace()
+ }
+ return p.scanHex(2)
+ case 'u':
+ return p.scanHex(4)
+ case 'a':
+ return '\u0007', nil
+ case 'b':
+ return '\b', nil
+ case 'e':
+ return '\u001B', nil
+ case 'f':
+ return '\f', nil
+ case 'n':
+ return '\n', nil
+ case 'r':
+ return '\r', nil
+ case 't':
+ return '\t', nil
+ case 'v':
+ return '\u000B', nil
+ case 'c':
+ return p.scanControl()
+ default:
+ if !p.useOptionE() && IsWordChar(ch) {
+ return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
+ }
+ return ch, nil
+ }
+}
+
+// Grabs and converts an ascii control character
+func (p *parser) scanControl() (rune, error) {
+ if p.charsRight() <= 0 {
+ return 0, p.getErr(ErrMissingControl)
+ }
+
+ ch := p.moveRightGetChar()
+
+ // \ca interpreted as \cA
+
+ if ch >= 'a' && ch <= 'z' {
+ ch = (ch - ('a' - 'A'))
+ }
+ ch = (ch - '@')
+ if ch >= 0 && ch < ' ' {
+ return ch, nil
+ }
+
+ return 0, p.getErr(ErrUnrecognizedControl)
+
+}
+
+// Scan hex digits until we hit a closing brace.
+// Non-hex digits, hex value too large for UTF-8, or running out of chars are errors
+func (p *parser) scanHexUntilBrace() (rune, error) {
+ // PCRE spec reads like unlimited hex digits are allowed, but unicode has a limit
+ // so we can enforce that
+ i := 0
+ hasContent := false
+
+ for p.charsRight() > 0 {
+ ch := p.moveRightGetChar()
+ if ch == '}' {
+ // hit our close brace, we're done here
+ // prevent \x{}
+ if !hasContent {
+ return 0, p.getErr(ErrTooFewHex)
+ }
+ return rune(i), nil
+ }
+ hasContent = true
+ // no brace needs to be hex digit
+ d := hexDigit(ch)
+ if d < 0 {
+ return 0, p.getErr(ErrMissingBrace)
+ }
+
+ i *= 0x10
+ i += d
+
+ if i > unicode.MaxRune {
+ return 0, p.getErr(ErrInvalidHex)
+ }
+ }
+
+ // we only make it here if we run out of digits without finding the brace
+ return 0, p.getErr(ErrMissingBrace)
+}
+
+// Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
+func (p *parser) scanHex(c int) (rune, error) {
+
+ i := 0
+
+ if p.charsRight() >= c {
+ for c > 0 {
+ d := hexDigit(p.moveRightGetChar())
+ if d < 0 {
+ break
+ }
+ i *= 0x10
+ i += d
+ c--
+ }
+ }
+
+ if c > 0 {
+ return 0, p.getErr(ErrTooFewHex)
+ }
+
+ return rune(i), nil
+}
+
+// Returns n <= 0xF for a hex digit.
+func hexDigit(ch rune) int {
+
+ if d := uint(ch - '0'); d <= 9 {
+ return int(d)
+ }
+
+ if d := uint(ch - 'a'); d <= 5 {
+ return int(d + 0xa)
+ }
+
+ if d := uint(ch - 'A'); d <= 5 {
+ return int(d + 0xa)
+ }
+
+ return -1
+}
+
+// Scans up to three octal digits (stops before exceeding 0377).
+func (p *parser) scanOctal() rune {
+ // Consume octal chars only up to 3 digits and value 0377
+
+ c := 3
+
+ if c > p.charsRight() {
+ c = p.charsRight()
+ }
+
+ //we know the first char is good because the caller had to check
+ i := 0
+ d := int(p.rightChar(0) - '0')
+ for c > 0 && d <= 7 {
+ i *= 8
+ i += d
+ if p.useOptionE() && i >= 0x20 {
+ break
+ }
+ c--
+
+ p.moveRight(1)
+ if !p.rightMost() {
+ d = int(p.rightChar(0) - '0')
+ }
+ }
+
+ // Octal codes only go up to 255. Any larger and the behavior that Perl follows
+ // is simply to truncate the high bits.
+ i &= 0xFF
+
+ return rune(i)
+}
+
+// Returns the current parsing position.
+func (p *parser) textpos() int {
+ return p.currentPos
+}
+
+// Zaps to a specific parsing position.
+func (p *parser) textto(pos int) {
+ p.currentPos = pos
+}
+
+// Returns the char at the right of the current parsing position and advances to the right.
+func (p *parser) moveRightGetChar() rune {
+ ch := p.pattern[p.currentPos]
+ p.currentPos++
+ return ch
+}
+
+// Moves the current position to the right.
+func (p *parser) moveRight(i int) {
+ // default would be 1
+ p.currentPos += i
+}
+
+// Moves the current parsing position one to the left.
+func (p *parser) moveLeft() {
+ p.currentPos--
+}
+
+// Returns the char left of the current parsing position.
+func (p *parser) charAt(i int) rune {
+ return p.pattern[i]
+}
+
+// Returns the char i chars right of the current parsing position.
+func (p *parser) rightChar(i int) rune {
+ // default would be 0
+ return p.pattern[p.currentPos+i]
+}
+
+// Number of characters to the right of the current parsing position.
+func (p *parser) charsRight() int {
+ return len(p.pattern) - p.currentPos
+}
+
+func (p *parser) rightMost() bool {
+ return p.currentPos == len(p.pattern)
+}
+
+// Looks up the slot number for a given name
+func (p *parser) captureSlotFromName(capname string) int {
+ return p.capnames[capname]
+}
+
+// True if the capture slot was noted
+func (p *parser) isCaptureSlot(i int) bool {
+ if p.caps != nil {
+ _, ok := p.caps[i]
+ return ok
+ }
+
+ return (i >= 0 && i < p.capsize)
+}
+
+// Looks up the slot number for a given name
+func (p *parser) isCaptureName(capname string) bool {
+ if p.capnames == nil {
+ return false
+ }
+
+ _, ok := p.capnames[capname]
+ return ok
+}
+
+// option shortcuts
+
+// True if N option disabling '(' autocapture is on.
+func (p *parser) useOptionN() bool {
+ return (p.options & ExplicitCapture) != 0
+}
+
+// True if I option enabling case-insensitivity is on.
+func (p *parser) useOptionI() bool {
+ return (p.options & IgnoreCase) != 0
+}
+
+// True if M option altering meaning of $ and ^ is on.
+func (p *parser) useOptionM() bool {
+ return (p.options & Multiline) != 0
+}
+
+// True if S option altering meaning of . is on.
+func (p *parser) useOptionS() bool {
+ return (p.options & Singleline) != 0
+}
+
+// True if X option enabling whitespace/comment mode is on.
+func (p *parser) useOptionX() bool {
+ return (p.options & IgnorePatternWhitespace) != 0
+}
+
+// True if E option enabling ECMAScript behavior on.
+func (p *parser) useOptionE() bool {
+ return (p.options & ECMAScript) != 0
+}
+
+// true to use RE2 compatibility parsing behavior.
+func (p *parser) useRE2() bool {
+ return (p.options & RE2) != 0
+}
+
+// True if options stack is empty.
+func (p *parser) emptyOptionsStack() bool {
+ return len(p.optionsStack) == 0
+}
+
+// Finish the current quantifiable (when a quantifier is not found or is not possible)
+func (p *parser) addConcatenate() {
+ // The first (| inside a Testgroup group goes directly to the group
+ p.concatenation.addChild(p.unit)
+ p.unit = nil
+}
+
+// Finish the current quantifiable (when a quantifier is found)
+func (p *parser) addConcatenate3(lazy bool, min, max int) {
+ p.concatenation.addChild(p.unit.makeQuantifier(lazy, min, max))
+ p.unit = nil
+}
+
+// Sets the current unit to a single char node
+func (p *parser) addUnitOne(ch rune) {
+ if p.useOptionI() {
+ ch = unicode.ToLower(ch)
+ }
+
+ p.unit = newRegexNodeCh(ntOne, p.options, ch)
+}
+
+// Sets the current unit to a single inverse-char node
+func (p *parser) addUnitNotone(ch rune) {
+ if p.useOptionI() {
+ ch = unicode.ToLower(ch)
+ }
+
+ p.unit = newRegexNodeCh(ntNotone, p.options, ch)
+}
+
+// Sets the current unit to a single set node
+func (p *parser) addUnitSet(set *CharSet) {
+ p.unit = newRegexNodeSet(ntSet, p.options, set)
+}
+
+// Sets the current unit to a subtree
+func (p *parser) addUnitNode(node *regexNode) {
+ p.unit = node
+}
+
+// Sets the current unit to an assertion of the specified type
+func (p *parser) addUnitType(t nodeType) {
+ p.unit = newRegexNode(t, p.options)
+}
+
+// Finish the current group (in response to a ')' or end)
+func (p *parser) addGroup() error {
+ if p.group.t == ntTestgroup || p.group.t == ntTestref {
+ p.group.addChild(p.concatenation.reverseLeft())
+ if (p.group.t == ntTestref && len(p.group.children) > 2) || len(p.group.children) > 3 {
+ return p.getErr(ErrTooManyAlternates)
+ }
+ } else {
+ p.alternation.addChild(p.concatenation.reverseLeft())
+ p.group.addChild(p.alternation)
+ }
+
+ p.unit = p.group
+ return nil
+}
+
+// Pops the option stack, but keeps the current options unchanged.
+func (p *parser) popKeepOptions() {
+ lastIdx := len(p.optionsStack) - 1
+ p.optionsStack = p.optionsStack[:lastIdx]
+}
+
+// Recalls options from the stack.
+func (p *parser) popOptions() {
+ lastIdx := len(p.optionsStack) - 1
+ // get the last item on the stack and then remove it by reslicing
+ p.options = p.optionsStack[lastIdx]
+ p.optionsStack = p.optionsStack[:lastIdx]
+}
+
+// Saves options on a stack.
+func (p *parser) pushOptions() {
+ p.optionsStack = append(p.optionsStack, p.options)
+}
+
+// Add a string to the last concatenate.
+func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
+ var node *regexNode
+
+ if cch == 0 {
+ return
+ }
+
+ if cch > 1 {
+ str := p.pattern[pos : pos+cch]
+
+ if p.useOptionI() && !isReplacement {
+ // We do the ToLower character by character for consistency. With surrogate chars, doing
+ // a ToLower on the entire string could actually change the surrogate pair. This is more correct
+ // linguistically, but since Regex doesn't support surrogates, it's more important to be
+ // consistent.
+ for i := 0; i < len(str); i++ {
+ str[i] = unicode.ToLower(str[i])
+ }
+ }
+
+ node = newRegexNodeStr(ntMulti, p.options, str)
+ } else {
+ ch := p.charAt(pos)
+
+ if p.useOptionI() && !isReplacement {
+ ch = unicode.ToLower(ch)
+ }
+
+ node = newRegexNodeCh(ntOne, p.options, ch)
+ }
+
+ p.concatenation.addChild(node)
+}
+
+// Push the parser state (in response to an open paren)
+func (p *parser) pushGroup() {
+ p.group.next = p.stack
+ p.alternation.next = p.group
+ p.concatenation.next = p.alternation
+ p.stack = p.concatenation
+}
+
+// Remember the pushed state (in response to a ')')
+func (p *parser) popGroup() error {
+ p.concatenation = p.stack
+ p.alternation = p.concatenation.next
+ p.group = p.alternation.next
+ p.stack = p.group.next
+
+ // The first () inside a Testgroup group goes directly to the group
+ if p.group.t == ntTestgroup && len(p.group.children) == 0 {
+ if p.unit == nil {
+ return p.getErr(ErrConditionalExpression)
+ }
+
+ p.group.addChild(p.unit)
+ p.unit = nil
+ }
+ return nil
+}
+
+// True if the group stack is empty.
+func (p *parser) emptyStack() bool {
+ return p.stack == nil
+}
+
+// Start a new round for the parser state (in response to an open paren or string start)
+func (p *parser) startGroup(openGroup *regexNode) {
+ p.group = openGroup
+ p.alternation = newRegexNode(ntAlternate, p.options)
+ p.concatenation = newRegexNode(ntConcatenate, p.options)
+}
+
+// Finish the current concatenation (in response to a |)
+func (p *parser) addAlternate() {
+ // The | parts inside a Testgroup group go directly to the group
+
+ if p.group.t == ntTestgroup || p.group.t == ntTestref {
+ p.group.addChild(p.concatenation.reverseLeft())
+ } else {
+ p.alternation.addChild(p.concatenation.reverseLeft())
+ }
+
+ p.concatenation = newRegexNode(ntConcatenate, p.options)
+}
+
+// For categorizing ascii characters.
+
+const (
+ Q byte = 5 // quantifier
+ S = 4 // ordinary stopper
+ Z = 3 // ScanBlank stopper
+ X = 2 // whitespace
+ E = 1 // should be escaped
+)
+
+var _category = []byte{
+ //01 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ X, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
+ //@A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
+ //'a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0,
+}
+
+func isSpace(ch rune) bool {
+ return (ch <= ' ' && _category[ch] == X)
+}
+
+// Returns true for those characters that terminate a string of ordinary chars.
+func isSpecial(ch rune) bool {
+ return (ch <= '|' && _category[ch] >= S)
+}
+
+// Returns true for those characters that terminate a string of ordinary chars.
+func isStopperX(ch rune) bool {
+ return (ch <= '|' && _category[ch] >= X)
+}
+
+// Returns true for those characters that begin a quantifier.
+func isQuantifier(ch rune) bool {
+ return (ch <= '{' && _category[ch] >= Q)
+}
+
+func (p *parser) isTrueQuantifier() bool {
+ nChars := p.charsRight()
+ if nChars == 0 {
+ return false
+ }
+
+ startpos := p.textpos()
+ ch := p.charAt(startpos)
+ if ch != '{' {
+ return ch <= '{' && _category[ch] >= Q
+ }
+
+ //UGLY: this is ugly -- the original code was ugly too
+ pos := startpos
+ for {
+ nChars--
+ if nChars <= 0 {
+ break
+ }
+ pos++
+ ch = p.charAt(pos)
+ if ch < '0' || ch > '9' {
+ break
+ }
+ }
+
+ if nChars == 0 || pos-startpos == 1 {
+ return false
+ }
+ if ch == '}' {
+ return true
+ }
+ if ch != ',' {
+ return false
+ }
+ for {
+ nChars--
+ if nChars <= 0 {
+ break
+ }
+ pos++
+ ch = p.charAt(pos)
+ if ch < '0' || ch > '9' {
+ break
+ }
+ }
+
+ return nChars > 0 && ch == '}'
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/prefix.go b/vendor/github.com/dlclark/regexp2/syntax/prefix.go
new file mode 100644
index 0000000000..011ef0b416
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/prefix.go
@@ -0,0 +1,896 @@
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+ "unicode"
+ "unicode/utf8"
+)
+
+type Prefix struct {
+ PrefixStr []rune
+ PrefixSet CharSet
+ CaseInsensitive bool
+}
+
+// It takes a RegexTree and computes the set of chars that can start it.
+func getFirstCharsPrefix(tree *RegexTree) *Prefix {
+ s := regexFcd{
+ fcStack: make([]regexFc, 32),
+ intStack: make([]int, 32),
+ }
+ fc := s.regexFCFromRegexTree(tree)
+
+ if fc == nil || fc.nullable || fc.cc.IsEmpty() {
+ return nil
+ }
+ fcSet := fc.getFirstChars()
+ return &Prefix{PrefixSet: fcSet, CaseInsensitive: fc.caseInsensitive}
+}
+
+type regexFcd struct {
+ intStack []int
+ intDepth int
+ fcStack []regexFc
+ fcDepth int
+ skipAllChildren bool // don't process any more children at the current level
+ skipchild bool // don't process the current child.
+ failed bool
+}
+
+/*
+ * The main FC computation. It does a shortcutted depth-first walk
+ * through the tree and calls CalculateFC to emits code before
+ * and after each child of an interior node, and at each leaf.
+ */
+func (s *regexFcd) regexFCFromRegexTree(tree *RegexTree) *regexFc {
+ curNode := tree.root
+ curChild := 0
+
+ for {
+ if len(curNode.children) == 0 {
+ // This is a leaf node
+ s.calculateFC(curNode.t, curNode, 0)
+ } else if curChild < len(curNode.children) && !s.skipAllChildren {
+ // This is an interior node, and we have more children to analyze
+ s.calculateFC(curNode.t|beforeChild, curNode, curChild)
+
+ if !s.skipchild {
+ curNode = curNode.children[curChild]
+ // this stack is how we get a depth first walk of the tree.
+ s.pushInt(curChild)
+ curChild = 0
+ } else {
+ curChild++
+ s.skipchild = false
+ }
+ continue
+ }
+
+ // This is an interior node where we've finished analyzing all the children, or
+ // the end of a leaf node.
+ s.skipAllChildren = false
+
+ if s.intIsEmpty() {
+ break
+ }
+
+ curChild = s.popInt()
+ curNode = curNode.next
+
+ s.calculateFC(curNode.t|afterChild, curNode, curChild)
+ if s.failed {
+ return nil
+ }
+
+ curChild++
+ }
+
+ if s.fcIsEmpty() {
+ return nil
+ }
+
+ return s.popFC()
+}
+
+// To avoid recursion, we use a simple integer stack.
+// This is the push.
+func (s *regexFcd) pushInt(I int) {
+ if s.intDepth >= len(s.intStack) {
+ expanded := make([]int, s.intDepth*2)
+ copy(expanded, s.intStack)
+ s.intStack = expanded
+ }
+
+ s.intStack[s.intDepth] = I
+ s.intDepth++
+}
+
+// True if the stack is empty.
+func (s *regexFcd) intIsEmpty() bool {
+ return s.intDepth == 0
+}
+
+// This is the pop.
+func (s *regexFcd) popInt() int {
+ s.intDepth--
+ return s.intStack[s.intDepth]
+}
+
+// We also use a stack of RegexFC objects.
+// This is the push.
+func (s *regexFcd) pushFC(fc regexFc) {
+ if s.fcDepth >= len(s.fcStack) {
+ expanded := make([]regexFc, s.fcDepth*2)
+ copy(expanded, s.fcStack)
+ s.fcStack = expanded
+ }
+
+ s.fcStack[s.fcDepth] = fc
+ s.fcDepth++
+}
+
+// True if the stack is empty.
+func (s *regexFcd) fcIsEmpty() bool {
+ return s.fcDepth == 0
+}
+
+// This is the pop.
+func (s *regexFcd) popFC() *regexFc {
+ s.fcDepth--
+ return &s.fcStack[s.fcDepth]
+}
+
+// This is the top.
+func (s *regexFcd) topFC() *regexFc {
+ return &s.fcStack[s.fcDepth-1]
+}
+
+// Called in Beforechild to prevent further processing of the current child
+func (s *regexFcd) skipChild() {
+ s.skipchild = true
+}
+
+// FC computation and shortcut cases for each node type
+func (s *regexFcd) calculateFC(nt nodeType, node *regexNode, CurIndex int) {
+ //fmt.Printf("NodeType: %v, CurIndex: %v, Desc: %v\n", nt, CurIndex, node.description())
+ ci := false
+ rtl := false
+
+ if nt <= ntRef {
+ if (node.options & IgnoreCase) != 0 {
+ ci = true
+ }
+ if (node.options & RightToLeft) != 0 {
+ rtl = true
+ }
+ }
+
+ switch nt {
+ case ntConcatenate | beforeChild, ntAlternate | beforeChild, ntTestref | beforeChild, ntLoop | beforeChild, ntLazyloop | beforeChild:
+ break
+
+ case ntTestgroup | beforeChild:
+ if CurIndex == 0 {
+ s.skipChild()
+ }
+ break
+
+ case ntEmpty:
+ s.pushFC(regexFc{nullable: true})
+ break
+
+ case ntConcatenate | afterChild:
+ if CurIndex != 0 {
+ child := s.popFC()
+ cumul := s.topFC()
+
+ s.failed = !cumul.addFC(*child, true)
+ }
+
+ fc := s.topFC()
+ if !fc.nullable {
+ s.skipAllChildren = true
+ }
+ break
+
+ case ntTestgroup | afterChild:
+ if CurIndex > 1 {
+ child := s.popFC()
+ cumul := s.topFC()
+
+ s.failed = !cumul.addFC(*child, false)
+ }
+ break
+
+ case ntAlternate | afterChild, ntTestref | afterChild:
+ if CurIndex != 0 {
+ child := s.popFC()
+ cumul := s.topFC()
+
+ s.failed = !cumul.addFC(*child, false)
+ }
+ break
+
+ case ntLoop | afterChild, ntLazyloop | afterChild:
+ if node.m == 0 {
+ fc := s.topFC()
+ fc.nullable = true
+ }
+ break
+
+ case ntGroup | beforeChild, ntGroup | afterChild, ntCapture | beforeChild, ntCapture | afterChild, ntGreedy | beforeChild, ntGreedy | afterChild:
+ break
+
+ case ntRequire | beforeChild, ntPrevent | beforeChild:
+ s.skipChild()
+ s.pushFC(regexFc{nullable: true})
+ break
+
+ case ntRequire | afterChild, ntPrevent | afterChild:
+ break
+
+ case ntOne, ntNotone:
+ s.pushFC(newRegexFc(node.ch, nt == ntNotone, false, ci))
+ break
+
+ case ntOneloop, ntOnelazy:
+ s.pushFC(newRegexFc(node.ch, false, node.m == 0, ci))
+ break
+
+ case ntNotoneloop, ntNotonelazy:
+ s.pushFC(newRegexFc(node.ch, true, node.m == 0, ci))
+ break
+
+ case ntMulti:
+ if len(node.str) == 0 {
+ s.pushFC(regexFc{nullable: true})
+ } else if !rtl {
+ s.pushFC(newRegexFc(node.str[0], false, false, ci))
+ } else {
+ s.pushFC(newRegexFc(node.str[len(node.str)-1], false, false, ci))
+ }
+ break
+
+ case ntSet:
+ s.pushFC(regexFc{cc: node.set.Copy(), nullable: false, caseInsensitive: ci})
+ break
+
+ case ntSetloop, ntSetlazy:
+ s.pushFC(regexFc{cc: node.set.Copy(), nullable: node.m == 0, caseInsensitive: ci})
+ break
+
+ case ntRef:
+ s.pushFC(regexFc{cc: *AnyClass(), nullable: true, caseInsensitive: false})
+ break
+
+ case ntNothing, ntBol, ntEol, ntBoundary, ntNonboundary, ntECMABoundary, ntNonECMABoundary, ntBeginning, ntStart, ntEndZ, ntEnd:
+ s.pushFC(regexFc{nullable: true})
+ break
+
+ default:
+ panic(fmt.Sprintf("unexpected op code: %v", nt))
+ }
+}
+
+type regexFc struct {
+ cc CharSet
+ nullable bool
+ caseInsensitive bool
+}
+
+func newRegexFc(ch rune, not, nullable, caseInsensitive bool) regexFc {
+ r := regexFc{
+ caseInsensitive: caseInsensitive,
+ nullable: nullable,
+ }
+ if not {
+ if ch > 0 {
+ r.cc.addRange('\x00', ch-1)
+ }
+ if ch < 0xFFFF {
+ r.cc.addRange(ch+1, utf8.MaxRune)
+ }
+ } else {
+ r.cc.addRange(ch, ch)
+ }
+ return r
+}
+
+func (r *regexFc) getFirstChars() CharSet {
+ if r.caseInsensitive {
+ r.cc.addLowercase()
+ }
+
+ return r.cc
+}
+
+func (r *regexFc) addFC(fc regexFc, concatenate bool) bool {
+ if !r.cc.IsMergeable() || !fc.cc.IsMergeable() {
+ return false
+ }
+
+ if concatenate {
+ if !r.nullable {
+ return true
+ }
+
+ if !fc.nullable {
+ r.nullable = false
+ }
+ } else {
+ if fc.nullable {
+ r.nullable = true
+ }
+ }
+
+ r.caseInsensitive = r.caseInsensitive || fc.caseInsensitive
+ r.cc.addSet(fc.cc)
+
+ return true
+}
+
+// This is a related computation: it takes a RegexTree and computes the
+// leading substring if it sees one. It's quite trivial and gives up easily.
+func getPrefix(tree *RegexTree) *Prefix {
+ var concatNode *regexNode
+ nextChild := 0
+
+ curNode := tree.root
+
+ for {
+ switch curNode.t {
+ case ntConcatenate:
+ if len(curNode.children) > 0 {
+ concatNode = curNode
+ nextChild = 0
+ }
+
+ case ntGreedy, ntCapture:
+ curNode = curNode.children[0]
+ concatNode = nil
+ continue
+
+ case ntOneloop, ntOnelazy:
+ if curNode.m > 0 {
+ return &Prefix{
+ PrefixStr: repeat(curNode.ch, curNode.m),
+ CaseInsensitive: (curNode.options & IgnoreCase) != 0,
+ }
+ }
+ return nil
+
+ case ntOne:
+ return &Prefix{
+ PrefixStr: []rune{curNode.ch},
+ CaseInsensitive: (curNode.options & IgnoreCase) != 0,
+ }
+
+ case ntMulti:
+ return &Prefix{
+ PrefixStr: curNode.str,
+ CaseInsensitive: (curNode.options & IgnoreCase) != 0,
+ }
+
+ case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning, ntStart,
+ ntEndZ, ntEnd, ntEmpty, ntRequire, ntPrevent:
+
+ default:
+ return nil
+ }
+
+ if concatNode == nil || nextChild >= len(concatNode.children) {
+ return nil
+ }
+
+ curNode = concatNode.children[nextChild]
+ nextChild++
+ }
+}
+
+// repeat the rune r, c times... up to the max of MaxPrefixSize
+func repeat(r rune, c int) []rune {
+ if c > MaxPrefixSize {
+ c = MaxPrefixSize
+ }
+
+ ret := make([]rune, c)
+
+ // binary growth using copy for speed
+ ret[0] = r
+ bp := 1
+ for bp < len(ret) {
+ copy(ret[bp:], ret[:bp])
+ bp *= 2
+ }
+
+ return ret
+}
+
+// BmPrefix precomputes the Boyer-Moore
+// tables for fast string scanning. These tables allow
+// you to scan for the first occurrence of a string within
+// a large body of text without examining every character.
+// The performance of the heuristic depends on the actual
+// string and the text being searched, but usually, the longer
+// the string that is being searched for, the fewer characters
+// need to be examined.
+type BmPrefix struct {
+ positive []int
+ negativeASCII []int
+ negativeUnicode [][]int
+ pattern []rune
+ lowASCII rune
+ highASCII rune
+ rightToLeft bool
+ caseInsensitive bool
+}
+
+func newBmPrefix(pattern []rune, caseInsensitive, rightToLeft bool) *BmPrefix {
+
+ b := &BmPrefix{
+ rightToLeft: rightToLeft,
+ caseInsensitive: caseInsensitive,
+ pattern: pattern,
+ }
+
+ if caseInsensitive {
+ for i := 0; i < len(b.pattern); i++ {
+ // We do the ToLower character by character for consistency. With surrogate chars, doing
+ // a ToLower on the entire string could actually change the surrogate pair. This is more correct
+ // linguistically, but since Regex doesn't support surrogates, it's more important to be
+ // consistent.
+
+ b.pattern[i] = unicode.ToLower(b.pattern[i])
+ }
+ }
+
+ var beforefirst, last, bump int
+ var scan, match int
+
+ if !rightToLeft {
+ beforefirst = -1
+ last = len(b.pattern) - 1
+ bump = 1
+ } else {
+ beforefirst = len(b.pattern)
+ last = 0
+ bump = -1
+ }
+
+ // PART I - the good-suffix shift table
+ //
+ // compute the positive requirement:
+ // if char "i" is the first one from the right that doesn't match,
+ // then we know the matcher can advance by _positive[i].
+ //
+ // This algorithm is a simplified variant of the standard
+ // Boyer-Moore good suffix calculation.
+
+ b.positive = make([]int, len(b.pattern))
+
+ examine := last
+ ch := b.pattern[examine]
+ b.positive[examine] = bump
+ examine -= bump
+
+Outerloop:
+ for {
+ // find an internal char (examine) that matches the tail
+
+ for {
+ if examine == beforefirst {
+ break Outerloop
+ }
+ if b.pattern[examine] == ch {
+ break
+ }
+ examine -= bump
+ }
+
+ match = last
+ scan = examine
+
+ // find the length of the match
+ for {
+ if scan == beforefirst || b.pattern[match] != b.pattern[scan] {
+ // at the end of the match, note the difference in _positive
+ // this is not the length of the match, but the distance from the internal match
+ // to the tail suffix.
+ if b.positive[match] == 0 {
+ b.positive[match] = match - scan
+ }
+
+ // System.Diagnostics.Debug.WriteLine("Set positive[" + match + "] to " + (match - scan));
+
+ break
+ }
+
+ scan -= bump
+ match -= bump
+ }
+
+ examine -= bump
+ }
+
+ match = last - bump
+
+ // scan for the chars for which there are no shifts that yield a different candidate
+
+ // The inside of the if statement used to say
+ // "_positive[match] = last - beforefirst;"
+ // This is slightly less aggressive in how much we skip, but at worst it
+ // should mean a little more work rather than skipping a potential match.
+ for match != beforefirst {
+ if b.positive[match] == 0 {
+ b.positive[match] = bump
+ }
+
+ match -= bump
+ }
+
+ // PART II - the bad-character shift table
+ //
+ // compute the negative requirement:
+ // if char "ch" is the reject character when testing position "i",
+ // we can slide up by _negative[ch];
+ // (_negative[ch] = str.Length - 1 - str.LastIndexOf(ch))
+ //
+ // the lookup table is divided into ASCII and Unicode portions;
+ // only those parts of the Unicode 16-bit code set that actually
+ // appear in the string are in the table. (Maximum size with
+ // Unicode is 65K; ASCII only case is 512 bytes.)
+
+ b.negativeASCII = make([]int, 128)
+
+ for i := 0; i < len(b.negativeASCII); i++ {
+ b.negativeASCII[i] = last - beforefirst
+ }
+
+ b.lowASCII = 127
+ b.highASCII = 0
+
+ for examine = last; examine != beforefirst; examine -= bump {
+ ch = b.pattern[examine]
+
+ switch {
+ case ch < 128:
+ if b.lowASCII > ch {
+ b.lowASCII = ch
+ }
+
+ if b.highASCII < ch {
+ b.highASCII = ch
+ }
+
+ if b.negativeASCII[ch] == last-beforefirst {
+ b.negativeASCII[ch] = last - examine
+ }
+ case ch <= 0xffff:
+ i, j := ch>>8, ch&0xFF
+
+ if b.negativeUnicode == nil {
+ b.negativeUnicode = make([][]int, 256)
+ }
+
+ if b.negativeUnicode[i] == nil {
+ newarray := make([]int, 256)
+
+ for k := 0; k < len(newarray); k++ {
+ newarray[k] = last - beforefirst
+ }
+
+ if i == 0 {
+ copy(newarray, b.negativeASCII)
+ //TODO: this line needed?
+ b.negativeASCII = newarray
+ }
+
+ b.negativeUnicode[i] = newarray
+ }
+
+ if b.negativeUnicode[i][j] == last-beforefirst {
+ b.negativeUnicode[i][j] = last - examine
+ }
+ default:
+ // we can't do the filter because this algo doesn't support
+ // unicode chars >0xffff
+ return nil
+ }
+ }
+
+ return b
+}
+
+func (b *BmPrefix) String() string {
+ return string(b.pattern)
+}
+
+// Dump returns the contents of the filter as a human readable string
+func (b *BmPrefix) Dump(indent string) string {
+ buf := &bytes.Buffer{}
+
+ fmt.Fprintf(buf, "%sBM Pattern: %s\n%sPositive: ", indent, string(b.pattern), indent)
+ for i := 0; i < len(b.positive); i++ {
+ buf.WriteString(strconv.Itoa(b.positive[i]))
+ buf.WriteRune(' ')
+ }
+ buf.WriteRune('\n')
+
+ if b.negativeASCII != nil {
+ buf.WriteString(indent)
+ buf.WriteString("Negative table\n")
+ for i := 0; i < len(b.negativeASCII); i++ {
+ if b.negativeASCII[i] != len(b.pattern) {
+ fmt.Fprintf(buf, "%s %s %s\n", indent, Escape(string(rune(i))), strconv.Itoa(b.negativeASCII[i]))
+ }
+ }
+ }
+
+ return buf.String()
+}
+
+// Scan uses the Boyer-Moore algorithm to find the first occurrence
+// of the specified string within text, beginning at index, and
+// constrained within beglimit and endlimit.
+//
+// The direction and case-sensitivity of the match is determined
+// by the arguments to the RegexBoyerMoore constructor.
+func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int {
+ var (
+ defadv, test, test2 int
+ match, startmatch, endmatch int
+ bump, advance int
+ chTest rune
+ unicodeLookup []int
+ )
+
+ if !b.rightToLeft {
+ defadv = len(b.pattern)
+ startmatch = len(b.pattern) - 1
+ endmatch = 0
+ test = index + defadv - 1
+ bump = 1
+ } else {
+ defadv = -len(b.pattern)
+ startmatch = 0
+ endmatch = -defadv - 1
+ test = index + defadv
+ bump = -1
+ }
+
+ chMatch := b.pattern[startmatch]
+
+ for {
+ if test >= endlimit || test < beglimit {
+ return -1
+ }
+
+ chTest = text[test]
+
+ if b.caseInsensitive {
+ chTest = unicode.ToLower(chTest)
+ }
+
+ if chTest != chMatch {
+ if chTest < 128 {
+ advance = b.negativeASCII[chTest]
+ } else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
+ unicodeLookup = b.negativeUnicode[chTest>>8]
+ if len(unicodeLookup) > 0 {
+ advance = unicodeLookup[chTest&0xFF]
+ } else {
+ advance = defadv
+ }
+ } else {
+ advance = defadv
+ }
+
+ test += advance
+ } else { // if (chTest == chMatch)
+ test2 = test
+ match = startmatch
+
+ for {
+ if match == endmatch {
+ if b.rightToLeft {
+ return test2 + 1
+ } else {
+ return test2
+ }
+ }
+
+ match -= bump
+ test2 -= bump
+
+ chTest = text[test2]
+
+ if b.caseInsensitive {
+ chTest = unicode.ToLower(chTest)
+ }
+
+ if chTest != b.pattern[match] {
+ advance = b.positive[match]
+ if (chTest & 0xFF80) == 0 {
+ test2 = (match - startmatch) + b.negativeASCII[chTest]
+ } else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
+ unicodeLookup = b.negativeUnicode[chTest>>8]
+ if len(unicodeLookup) > 0 {
+ test2 = (match - startmatch) + unicodeLookup[chTest&0xFF]
+ } else {
+ test += advance
+ break
+ }
+ } else {
+ test += advance
+ break
+ }
+
+ if b.rightToLeft {
+ if test2 < advance {
+ advance = test2
+ }
+ } else if test2 > advance {
+ advance = test2
+ }
+
+ test += advance
+ break
+ }
+ }
+ }
+ }
+}
+
+// When a regex is anchored, we can do a quick IsMatch test instead of a Scan
+func (b *BmPrefix) IsMatch(text []rune, index, beglimit, endlimit int) bool {
+ if !b.rightToLeft {
+ if index < beglimit || endlimit-index < len(b.pattern) {
+ return false
+ }
+
+ return b.matchPattern(text, index)
+ } else {
+ if index > endlimit || index-beglimit < len(b.pattern) {
+ return false
+ }
+
+ return b.matchPattern(text, index-len(b.pattern))
+ }
+}
+
+func (b *BmPrefix) matchPattern(text []rune, index int) bool {
+ if len(text)-index < len(b.pattern) {
+ return false
+ }
+
+ if b.caseInsensitive {
+ for i := 0; i < len(b.pattern); i++ {
+ //Debug.Assert(textinfo.ToLower(_pattern[i]) == _pattern[i], "pattern should be converted to lower case in constructor!");
+ if unicode.ToLower(text[index+i]) != b.pattern[i] {
+ return false
+ }
+ }
+ return true
+ } else {
+ for i := 0; i < len(b.pattern); i++ {
+ if text[index+i] != b.pattern[i] {
+ return false
+ }
+ }
+ return true
+ }
+}
+
+type AnchorLoc int16
+
+// where the regex can be pegged
+const (
+ AnchorBeginning AnchorLoc = 0x0001
+ AnchorBol = 0x0002
+ AnchorStart = 0x0004
+ AnchorEol = 0x0008
+ AnchorEndZ = 0x0010
+ AnchorEnd = 0x0020
+ AnchorBoundary = 0x0040
+ AnchorECMABoundary = 0x0080
+)
+
+func getAnchors(tree *RegexTree) AnchorLoc {
+
+ var concatNode *regexNode
+ nextChild, result := 0, AnchorLoc(0)
+
+ curNode := tree.root
+
+ for {
+ switch curNode.t {
+ case ntConcatenate:
+ if len(curNode.children) > 0 {
+ concatNode = curNode
+ nextChild = 0
+ }
+
+ case ntGreedy, ntCapture:
+ curNode = curNode.children[0]
+ concatNode = nil
+ continue
+
+ case ntBol, ntEol, ntBoundary, ntECMABoundary, ntBeginning,
+ ntStart, ntEndZ, ntEnd:
+ return result | anchorFromType(curNode.t)
+
+ case ntEmpty, ntRequire, ntPrevent:
+
+ default:
+ return result
+ }
+
+ if concatNode == nil || nextChild >= len(concatNode.children) {
+ return result
+ }
+
+ curNode = concatNode.children[nextChild]
+ nextChild++
+ }
+}
+
+func anchorFromType(t nodeType) AnchorLoc {
+ switch t {
+ case ntBol:
+ return AnchorBol
+ case ntEol:
+ return AnchorEol
+ case ntBoundary:
+ return AnchorBoundary
+ case ntECMABoundary:
+ return AnchorECMABoundary
+ case ntBeginning:
+ return AnchorBeginning
+ case ntStart:
+ return AnchorStart
+ case ntEndZ:
+ return AnchorEndZ
+ case ntEnd:
+ return AnchorEnd
+ default:
+ return 0
+ }
+}
+
+// anchorDescription returns a human-readable description of the anchors
+func (anchors AnchorLoc) String() string {
+ buf := &bytes.Buffer{}
+
+ if 0 != (anchors & AnchorBeginning) {
+ buf.WriteString(", Beginning")
+ }
+ if 0 != (anchors & AnchorStart) {
+ buf.WriteString(", Start")
+ }
+ if 0 != (anchors & AnchorBol) {
+ buf.WriteString(", Bol")
+ }
+ if 0 != (anchors & AnchorBoundary) {
+ buf.WriteString(", Boundary")
+ }
+ if 0 != (anchors & AnchorECMABoundary) {
+ buf.WriteString(", ECMABoundary")
+ }
+ if 0 != (anchors & AnchorEol) {
+ buf.WriteString(", Eol")
+ }
+ if 0 != (anchors & AnchorEnd) {
+ buf.WriteString(", End")
+ }
+ if 0 != (anchors & AnchorEndZ) {
+ buf.WriteString(", EndZ")
+ }
+
+ // trim off comma
+ if buf.Len() >= 2 {
+ return buf.String()[2:]
+ }
+ return "None"
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go b/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go
new file mode 100644
index 0000000000..bcf4d3f257
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go
@@ -0,0 +1,87 @@
+package syntax
+
+import (
+ "bytes"
+ "errors"
+)
+
+type ReplacerData struct {
+ Rep string
+ Strings []string
+ Rules []int
+}
+
+const (
+ replaceSpecials = 4
+ replaceLeftPortion = -1
+ replaceRightPortion = -2
+ replaceLastGroup = -3
+ replaceWholeString = -4
+)
+
+//ErrReplacementError is a general error during parsing the replacement text
+var ErrReplacementError = errors.New("Replacement pattern error.")
+
+// NewReplacerData will populate a reusable replacer data struct based on the given replacement string
+// and the capture group data from a regexp
+func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error) {
+ p := parser{
+ options: op,
+ caps: caps,
+ capsize: capsize,
+ capnames: capnames,
+ }
+ p.setPattern(rep)
+ concat, err := p.scanReplacement()
+ if err != nil {
+ return nil, err
+ }
+
+ if concat.t != ntConcatenate {
+ panic(ErrReplacementError)
+ }
+
+ sb := &bytes.Buffer{}
+ var (
+ strings []string
+ rules []int
+ )
+
+ for _, child := range concat.children {
+ switch child.t {
+ case ntMulti:
+ child.writeStrToBuf(sb)
+
+ case ntOne:
+ sb.WriteRune(child.ch)
+
+ case ntRef:
+ if sb.Len() > 0 {
+ rules = append(rules, len(strings))
+ strings = append(strings, sb.String())
+ sb.Reset()
+ }
+ slot := child.m
+
+ if len(caps) > 0 && slot >= 0 {
+ slot = caps[slot]
+ }
+
+ rules = append(rules, -replaceSpecials-1-slot)
+
+ default:
+ panic(ErrReplacementError)
+ }
+ }
+
+ if sb.Len() > 0 {
+ rules = append(rules, len(strings))
+ strings = append(strings, sb.String())
+ }
+
+ return &ReplacerData{
+ Rep: rep,
+ Strings: strings,
+ Rules: rules,
+ }, nil
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/tree.go b/vendor/github.com/dlclark/regexp2/syntax/tree.go
new file mode 100644
index 0000000000..ea28829319
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/tree.go
@@ -0,0 +1,654 @@
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "strconv"
+)
+
+type RegexTree struct {
+ root *regexNode
+ caps map[int]int
+ capnumlist []int
+ captop int
+ Capnames map[string]int
+ Caplist []string
+ options RegexOptions
+}
+
+// It is built into a parsed tree for a regular expression.
+
+// Implementation notes:
+//
+// Since the node tree is a temporary data structure only used
+// during compilation of the regexp to integer codes, it's
+// designed for clarity and convenience rather than
+// space efficiency.
+//
+// RegexNodes are built into a tree, linked by the n.children list.
+// Each node also has a n.parent and n.ichild member indicating
+// its parent and which child # it is in its parent's list.
+//
+// RegexNodes come in as many types as there are constructs in
+// a regular expression, for example, "concatenate", "alternate",
+// "one", "rept", "group". There are also node types for basic
+// peephole optimizations, e.g., "onerep", "notsetrep", etc.
+//
+// Because perl 5 allows "lookback" groups that scan backwards,
+// each node also gets a "direction". Normally the value of
+// boolean n.backward = false.
+//
+// During parsing, top-level nodes are also stacked onto a parse
+// stack (a stack of trees). For this purpose we have a n.next
+// pointer. [Note that to save a few bytes, we could overload the
+// n.parent pointer instead.]
+//
+// On the parse stack, each tree has a "role" - basically, the
+// nonterminal in the grammar that the parser has currently
+// assigned to the tree. That code is stored in n.role.
+//
+// Finally, some of the different kinds of nodes have data.
+// Two integers (for the looping constructs) are stored in
+// n.operands, an an object (either a string or a set)
+// is stored in n.data
+type regexNode struct {
+ t nodeType
+ children []*regexNode
+ str []rune
+ set *CharSet
+ ch rune
+ m int
+ n int
+ options RegexOptions
+ next *regexNode
+}
+
+type nodeType int32
+
+const (
+ // The following are leaves, and correspond to primitive operations
+
+ ntOnerep nodeType = 0 // lef,back char,min,max a {n}
+ ntNotonerep = 1 // lef,back char,min,max .{n}
+ ntSetrep = 2 // lef,back set,min,max [\d]{n}
+ ntOneloop = 3 // lef,back char,min,max a {,n}
+ ntNotoneloop = 4 // lef,back char,min,max .{,n}
+ ntSetloop = 5 // lef,back set,min,max [\d]{,n}
+ ntOnelazy = 6 // lef,back char,min,max a {,n}?
+ ntNotonelazy = 7 // lef,back char,min,max .{,n}?
+ ntSetlazy = 8 // lef,back set,min,max [\d]{,n}?
+ ntOne = 9 // lef char a
+ ntNotone = 10 // lef char [^a]
+ ntSet = 11 // lef set [a-z\s] \w \s \d
+ ntMulti = 12 // lef string abcd
+ ntRef = 13 // lef group \#
+ ntBol = 14 // ^
+ ntEol = 15 // $
+ ntBoundary = 16 // \b
+ ntNonboundary = 17 // \B
+ ntBeginning = 18 // \A
+ ntStart = 19 // \G
+ ntEndZ = 20 // \Z
+ ntEnd = 21 // \Z
+
+ // Interior nodes do not correspond to primitive operations, but
+ // control structures compositing other operations
+
+ // Concat and alternate take n children, and can run forward or backwards
+
+ ntNothing = 22 // []
+ ntEmpty = 23 // ()
+ ntAlternate = 24 // a|b
+ ntConcatenate = 25 // ab
+ ntLoop = 26 // m,x * + ? {,}
+ ntLazyloop = 27 // m,x *? +? ?? {,}?
+ ntCapture = 28 // n ()
+ ntGroup = 29 // (?:)
+ ntRequire = 30 // (?=) (?<=)
+ ntPrevent = 31 // (?!) (?<!)
+ ntGreedy = 32 // (?>) (?<)
+ ntTestref = 33 // (?(n) | )
+ ntTestgroup = 34 // (?(...) | )
+
+ ntECMABoundary = 41 // \b
+ ntNonECMABoundary = 42 // \B
+)
+
+func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ }
+}
+
+func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ ch: ch,
+ }
+}
+
+func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ str: str,
+ }
+}
+
+func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ set: set,
+ }
+}
+
+func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ m: m,
+ }
+}
+func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
+ return &regexNode{
+ t: t,
+ options: opt,
+ m: m,
+ n: n,
+ }
+}
+
+func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) {
+ for i := 0; i < len(n.str); i++ {
+ buf.WriteRune(n.str[i])
+ }
+}
+
+func (n *regexNode) addChild(child *regexNode) {
+ reduced := child.reduce()
+ n.children = append(n.children, reduced)
+ reduced.next = n
+}
+
+func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) {
+ newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
+ n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
+}
+
+// removes children including the start but not the end index
+func (n *regexNode) removeChildren(startIndex, endIndex int) {
+ n.children = append(n.children[:startIndex], n.children[endIndex:]...)
+}
+
+// Pass type as OneLazy or OneLoop
+func (n *regexNode) makeRep(t nodeType, min, max int) {
+ n.t += (t - ntOne)
+ n.m = min
+ n.n = max
+}
+
+func (n *regexNode) reduce() *regexNode {
+ switch n.t {
+ case ntAlternate:
+ return n.reduceAlternation()
+
+ case ntConcatenate:
+ return n.reduceConcatenation()
+
+ case ntLoop, ntLazyloop:
+ return n.reduceRep()
+
+ case ntGroup:
+ return n.reduceGroup()
+
+ case ntSet, ntSetloop:
+ return n.reduceSet()
+
+ default:
+ return n
+ }
+}
+
+// Basic optimization. Single-letter alternations can be replaced
+// by faster set specifications, and nested alternations with no
+// intervening operators can be flattened:
+//
+// a|b|c|def|g|h -> [a-c]|def|[gh]
+// apple|(?:orange|pear)|grape -> apple|orange|pear|grape
+func (n *regexNode) reduceAlternation() *regexNode {
+ if len(n.children) == 0 {
+ return newRegexNode(ntNothing, n.options)
+ }
+
+ wasLastSet := false
+ lastNodeCannotMerge := false
+ var optionsLast RegexOptions
+ var i, j int
+
+ for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+ at := n.children[i]
+
+ if j < i {
+ n.children[j] = at
+ }
+
+ for {
+ if at.t == ntAlternate {
+ for k := 0; k < len(at.children); k++ {
+ at.children[k].next = n
+ }
+ n.insertChildren(i+1, at.children)
+
+ j--
+ } else if at.t == ntSet || at.t == ntOne {
+ // Cannot merge sets if L or I options differ, or if either are negated.
+ optionsAt := at.options & (RightToLeft | IgnoreCase)
+
+ if at.t == ntSet {
+ if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() {
+ wasLastSet = true
+ lastNodeCannotMerge = !at.set.IsMergeable()
+ optionsLast = optionsAt
+ break
+ }
+ } else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
+ wasLastSet = true
+ lastNodeCannotMerge = false
+ optionsLast = optionsAt
+ break
+ }
+
+ // The last node was a Set or a One, we're a Set or One and our options are the same.
+ // Merge the two nodes.
+ j--
+ prev := n.children[j]
+
+ var prevCharClass *CharSet
+ if prev.t == ntOne {
+ prevCharClass = &CharSet{}
+ prevCharClass.addChar(prev.ch)
+ } else {
+ prevCharClass = prev.set
+ }
+
+ if at.t == ntOne {
+ prevCharClass.addChar(at.ch)
+ } else {
+ prevCharClass.addSet(*at.set)
+ }
+
+ prev.t = ntSet
+ prev.set = prevCharClass
+ } else if at.t == ntNothing {
+ j--
+ } else {
+ wasLastSet = false
+ lastNodeCannotMerge = false
+ }
+ break
+ }
+ }
+
+ if j < i {
+ n.removeChildren(j, i)
+ }
+
+ return n.stripEnation(ntNothing)
+}
+
+// Basic optimization. Adjacent strings can be concatenated.
+//
+// (?:abc)(?:def) -> abcdef
+func (n *regexNode) reduceConcatenation() *regexNode {
+ // Eliminate empties and concat adjacent strings/chars
+
+ var optionsLast RegexOptions
+ var optionsAt RegexOptions
+ var i, j int
+
+ if len(n.children) == 0 {
+ return newRegexNode(ntEmpty, n.options)
+ }
+
+ wasLastString := false
+
+ for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
+ var at, prev *regexNode
+
+ at = n.children[i]
+
+ if j < i {
+ n.children[j] = at
+ }
+
+ if at.t == ntConcatenate &&
+ ((at.options & RightToLeft) == (n.options & RightToLeft)) {
+ for k := 0; k < len(at.children); k++ {
+ at.children[k].next = n
+ }
+
+ //insert at.children at i+1 index in n.children
+ n.insertChildren(i+1, at.children)
+
+ j--
+ } else if at.t == ntMulti || at.t == ntOne {
+ // Cannot merge strings if L or I options differ
+ optionsAt = at.options & (RightToLeft | IgnoreCase)
+
+ if !wasLastString || optionsLast != optionsAt {
+ wasLastString = true
+ optionsLast = optionsAt
+ continue
+ }
+
+ j--
+ prev = n.children[j]
+
+ if prev.t == ntOne {
+ prev.t = ntMulti
+ prev.str = []rune{prev.ch}
+ }
+
+ if (optionsAt & RightToLeft) == 0 {
+ if at.t == ntOne {
+ prev.str = append(prev.str, at.ch)
+ } else {
+ prev.str = append(prev.str, at.str...)
+ }
+ } else {
+ if at.t == ntOne {
+ // insert at the front by expanding our slice, copying the data over, and then setting the value
+ prev.str = append(prev.str, 0)
+ copy(prev.str[1:], prev.str)
+ prev.str[0] = at.ch
+ } else {
+ //insert at the front...this one we'll make a new slice and copy both into it
+ merge := make([]rune, len(prev.str)+len(at.str))
+ copy(merge, at.str)
+ copy(merge[len(at.str):], prev.str)
+ prev.str = merge
+ }
+ }
+ } else if at.t == ntEmpty {
+ j--
+ } else {
+ wasLastString = false
+ }
+ }
+
+ if j < i {
+ // remove indices j through i from the children
+ n.removeChildren(j, i)
+ }
+
+ return n.stripEnation(ntEmpty)
+}
+
+// Nested repeaters just get multiplied with each other if they're not
+// too lumpy
+func (n *regexNode) reduceRep() *regexNode {
+
+ u := n
+ t := n.t
+ min := n.m
+ max := n.n
+
+ for {
+ if len(u.children) == 0 {
+ break
+ }
+
+ child := u.children[0]
+
+ // multiply reps of the same type only
+ if child.t != t {
+ childType := child.t
+
+ if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
+ childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
+ break
+ }
+ }
+
+ // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
+ // [but things like (a {2,})+ are not too lumpy...]
+ if u.m == 0 && child.m > 1 || child.n < child.m*2 {
+ break
+ }
+
+ u = child
+ if u.m > 0 {
+ if (math.MaxInt32-1)/u.m < min {
+ u.m = math.MaxInt32
+ } else {
+ u.m = u.m * min
+ }
+ }
+ if u.n > 0 {
+ if (math.MaxInt32-1)/u.n < max {
+ u.n = math.MaxInt32
+ } else {
+ u.n = u.n * max
+ }
+ }
+ }
+
+ if math.MaxInt32 == min {
+ return newRegexNode(ntNothing, n.options)
+ }
+ return u
+
+}
+
+// Simple optimization. If a concatenation or alternation has only
+// one child strip out the intermediate node. If it has zero children,
+// turn it into an empty.
+func (n *regexNode) stripEnation(emptyType nodeType) *regexNode {
+ switch len(n.children) {
+ case 0:
+ return newRegexNode(emptyType, n.options)
+ case 1:
+ return n.children[0]
+ default:
+ return n
+ }
+}
+
+func (n *regexNode) reduceGroup() *regexNode {
+ u := n
+
+ for u.t == ntGroup {
+ u = u.children[0]
+ }
+
+ return u
+}
+
+// Simple optimization. If a set is a singleton, an inverse singleton,
+// or empty, it's transformed accordingly.
+func (n *regexNode) reduceSet() *regexNode {
+ // Extract empty-set, one and not-one case as special
+
+ if n.set == nil {
+ n.t = ntNothing
+ } else if n.set.IsSingleton() {
+ n.ch = n.set.SingletonChar()
+ n.set = nil
+ n.t += (ntOne - ntSet)
+ } else if n.set.IsSingletonInverse() {
+ n.ch = n.set.SingletonChar()
+ n.set = nil
+ n.t += (ntNotone - ntSet)
+ }
+
+ return n
+}
+
+func (n *regexNode) reverseLeft() *regexNode {
+ if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
+ //reverse children order
+ for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
+ n.children[left], n.children[right] = n.children[right], n.children[left]
+ }
+ }
+
+ return n
+}
+
+func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode {
+ if min == 0 && max == 0 {
+ return newRegexNode(ntEmpty, n.options)
+ }
+
+ if min == 1 && max == 1 {
+ return n
+ }
+
+ switch n.t {
+ case ntOne, ntNotone, ntSet:
+ if lazy {
+ n.makeRep(Onelazy, min, max)
+ } else {
+ n.makeRep(Oneloop, min, max)
+ }
+ return n
+
+ default:
+ var t nodeType
+ if lazy {
+ t = ntLazyloop
+ } else {
+ t = ntLoop
+ }
+ result := newRegexNodeMN(t, n.options, min, max)
+ result.addChild(n)
+ return result
+ }
+}
+
+// debug functions
+
+var typeStr = []string{
+ "Onerep", "Notonerep", "Setrep",
+ "Oneloop", "Notoneloop", "Setloop",
+ "Onelazy", "Notonelazy", "Setlazy",
+ "One", "Notone", "Set",
+ "Multi", "Ref",
+ "Bol", "Eol", "Boundary", "Nonboundary",
+ "Beginning", "Start", "EndZ", "End",
+ "Nothing", "Empty",
+ "Alternate", "Concatenate",
+ "Loop", "Lazyloop",
+ "Capture", "Group", "Require", "Prevent", "Greedy",
+ "Testref", "Testgroup",
+ "Unknown", "Unknown", "Unknown",
+ "Unknown", "Unknown", "Unknown",
+ "ECMABoundary", "NonECMABoundary",
+}
+
+func (n *regexNode) description() string {
+ buf := &bytes.Buffer{}
+
+ buf.WriteString(typeStr[n.t])
+
+ if (n.options & ExplicitCapture) != 0 {
+ buf.WriteString("-C")
+ }
+ if (n.options & IgnoreCase) != 0 {
+ buf.WriteString("-I")
+ }
+ if (n.options & RightToLeft) != 0 {
+ buf.WriteString("-L")
+ }
+ if (n.options & Multiline) != 0 {
+ buf.WriteString("-M")
+ }
+ if (n.options & Singleline) != 0 {
+ buf.WriteString("-S")
+ }
+ if (n.options & IgnorePatternWhitespace) != 0 {
+ buf.WriteString("-X")
+ }
+ if (n.options & ECMAScript) != 0 {
+ buf.WriteString("-E")
+ }
+
+ switch n.t {
+ case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
+ buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
+ break
+ case ntCapture:
+ buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
+ break
+ case ntRef, ntTestref:
+ buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
+ break
+ case ntMulti:
+ fmt.Fprintf(buf, "(String = %s)", string(n.str))
+ break
+ case ntSet, ntSetloop, ntSetlazy:
+ buf.WriteString("(Set = " + n.set.String() + ")")
+ break
+ }
+
+ switch n.t {
+ case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
+ buf.WriteString("(Min = ")
+ buf.WriteString(strconv.Itoa(n.m))
+ buf.WriteString(", Max = ")
+ if n.n == math.MaxInt32 {
+ buf.WriteString("inf")
+ } else {
+ buf.WriteString(strconv.Itoa(n.n))
+ }
+ buf.WriteString(")")
+
+ break
+ }
+
+ return buf.String()
+}
+
+var padSpace = []byte(" ")
+
+func (t *RegexTree) Dump() string {
+ return t.root.dump()
+}
+
+func (n *regexNode) dump() string {
+ var stack []int
+ CurNode := n
+ CurChild := 0
+
+ buf := bytes.NewBufferString(CurNode.description())
+ buf.WriteRune('\n')
+
+ for {
+ if CurNode.children != nil && CurChild < len(CurNode.children) {
+ stack = append(stack, CurChild+1)
+ CurNode = CurNode.children[CurChild]
+ CurChild = 0
+
+ Depth := len(stack)
+ if Depth > 32 {
+ Depth = 32
+ }
+ buf.Write(padSpace[:Depth])
+ buf.WriteString(CurNode.description())
+ buf.WriteRune('\n')
+ } else {
+ if len(stack) == 0 {
+ break
+ }
+
+ CurChild = stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+ CurNode = CurNode.next
+ }
+ }
+ return buf.String()
+}
diff --git a/vendor/github.com/dlclark/regexp2/syntax/writer.go b/vendor/github.com/dlclark/regexp2/syntax/writer.go
new file mode 100644
index 0000000000..a5aa11ca06
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/syntax/writer.go
@@ -0,0 +1,500 @@
+package syntax
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "os"
+)
+
+func Write(tree *RegexTree) (*Code, error) {
+ w := writer{
+ intStack: make([]int, 0, 32),
+ emitted: make([]int, 2),
+ stringhash: make(map[string]int),
+ sethash: make(map[string]int),
+ }
+
+ code, err := w.codeFromTree(tree)
+
+ if tree.options&Debug > 0 && code != nil {
+ os.Stdout.WriteString(code.Dump())
+ os.Stdout.WriteString("\n")
+ }
+
+ return code, err
+}
+
+type writer struct {
+ emitted []int
+
+ intStack []int
+ curpos int
+ stringhash map[string]int
+ stringtable [][]rune
+ sethash map[string]int
+ settable []*CharSet
+ counting bool
+ count int
+ trackcount int
+ caps map[int]int
+}
+
+const (
+ beforeChild nodeType = 64
+ afterChild = 128
+ //MaxPrefixSize is the largest number of runes we'll use for a BoyerMoyer prefix
+ MaxPrefixSize = 50
+)
+
+// The top level RegexCode generator. It does a depth-first walk
+// through the tree and calls EmitFragment to emits code before
+// and after each child of an interior node, and at each leaf.
+//
+// It runs two passes, first to count the size of the generated
+// code, and second to generate the code.
+//
+// We should time it against the alternative, which is
+// to just generate the code and grow the array as we go.
+func (w *writer) codeFromTree(tree *RegexTree) (*Code, error) {
+ var (
+ curNode *regexNode
+ curChild int
+ capsize int
+ )
+ // construct sparse capnum mapping if some numbers are unused
+
+ if tree.capnumlist == nil || tree.captop == len(tree.capnumlist) {
+ capsize = tree.captop
+ w.caps = nil
+ } else {
+ capsize = len(tree.capnumlist)
+ w.caps = tree.caps
+ for i := 0; i < len(tree.capnumlist); i++ {
+ w.caps[tree.capnumlist[i]] = i
+ }
+ }
+
+ w.counting = true
+
+ for {
+ if !w.counting {
+ w.emitted = make([]int, w.count)
+ }
+
+ curNode = tree.root
+ curChild = 0
+
+ w.emit1(Lazybranch, 0)
+
+ for {
+ if len(curNode.children) == 0 {
+ w.emitFragment(curNode.t, curNode, 0)
+ } else if curChild < len(curNode.children) {
+ w.emitFragment(curNode.t|beforeChild, curNode, curChild)
+
+ curNode = curNode.children[curChild]
+
+ w.pushInt(curChild)
+ curChild = 0
+ continue
+ }
+
+ if w.emptyStack() {
+ break
+ }
+
+ curChild = w.popInt()
+ curNode = curNode.next
+
+ w.emitFragment(curNode.t|afterChild, curNode, curChild)
+ curChild++
+ }
+
+ w.patchJump(0, w.curPos())
+ w.emit(Stop)
+
+ if !w.counting {
+ break
+ }
+
+ w.counting = false
+ }
+
+ fcPrefix := getFirstCharsPrefix(tree)
+ prefix := getPrefix(tree)
+ rtl := (tree.options & RightToLeft) != 0
+
+ var bmPrefix *BmPrefix
+ //TODO: benchmark string prefixes
+ if prefix != nil && len(prefix.PrefixStr) > 0 && MaxPrefixSize > 0 {
+ if len(prefix.PrefixStr) > MaxPrefixSize {
+ // limit prefix changes to 10k
+ prefix.PrefixStr = prefix.PrefixStr[:MaxPrefixSize]
+ }
+ bmPrefix = newBmPrefix(prefix.PrefixStr, prefix.CaseInsensitive, rtl)
+ } else {
+ bmPrefix = nil
+ }
+
+ return &Code{
+ Codes: w.emitted,
+ Strings: w.stringtable,
+ Sets: w.settable,
+ TrackCount: w.trackcount,
+ Caps: w.caps,
+ Capsize: capsize,
+ FcPrefix: fcPrefix,
+ BmPrefix: bmPrefix,
+ Anchors: getAnchors(tree),
+ RightToLeft: rtl,
+ }, nil
+}
+
+// The main RegexCode generator. It does a depth-first walk
+// through the tree and calls EmitFragment to emits code before
+// and after each child of an interior node, and at each leaf.
+func (w *writer) emitFragment(nodetype nodeType, node *regexNode, curIndex int) error {
+ bits := InstOp(0)
+
+ if nodetype <= ntRef {
+ if (node.options & RightToLeft) != 0 {
+ bits |= Rtl
+ }
+ if (node.options & IgnoreCase) != 0 {
+ bits |= Ci
+ }
+ }
+ ntBits := nodeType(bits)
+
+ switch nodetype {
+ case ntConcatenate | beforeChild, ntConcatenate | afterChild, ntEmpty:
+ break
+
+ case ntAlternate | beforeChild:
+ if curIndex < len(node.children)-1 {
+ w.pushInt(w.curPos())
+ w.emit1(Lazybranch, 0)
+ }
+
+ case ntAlternate | afterChild:
+ if curIndex < len(node.children)-1 {
+ lbPos := w.popInt()
+ w.pushInt(w.curPos())
+ w.emit1(Goto, 0)
+ w.patchJump(lbPos, w.curPos())
+ } else {
+ for i := 0; i < curIndex; i++ {
+ w.patchJump(w.popInt(), w.curPos())
+ }
+ }
+ break
+
+ case ntTestref | beforeChild:
+ if curIndex == 0 {
+ w.emit(Setjump)
+ w.pushInt(w.curPos())
+ w.emit1(Lazybranch, 0)
+ w.emit1(Testref, w.mapCapnum(node.m))
+ w.emit(Forejump)
+ }
+
+ case ntTestref | afterChild:
+ if curIndex == 0 {
+ branchpos := w.popInt()
+ w.pushInt(w.curPos())
+ w.emit1(Goto, 0)
+ w.patchJump(branchpos, w.curPos())
+ w.emit(Forejump)
+ if len(node.children) <= 1 {
+ w.patchJump(w.popInt(), w.curPos())
+ }
+ } else if curIndex == 1 {
+ w.patchJump(w.popInt(), w.curPos())
+ }
+
+ case ntTestgroup | beforeChild:
+ if curIndex == 0 {
+ w.emit(Setjump)
+ w.emit(Setmark)
+ w.pushInt(w.curPos())
+ w.emit1(Lazybranch, 0)
+ }
+
+ case ntTestgroup | afterChild:
+ if curIndex == 0 {
+ w.emit(Getmark)
+ w.emit(Forejump)
+ } else if curIndex == 1 {
+ Branchpos := w.popInt()
+ w.pushInt(w.curPos())
+ w.emit1(Goto, 0)
+ w.patchJump(Branchpos, w.curPos())
+ w.emit(Getmark)
+ w.emit(Forejump)
+ if len(node.children) <= 2 {
+ w.patchJump(w.popInt(), w.curPos())
+ }
+ } else if curIndex == 2 {
+ w.patchJump(w.popInt(), w.curPos())
+ }
+
+ case ntLoop | beforeChild, ntLazyloop | beforeChild:
+
+ if node.n < math.MaxInt32 || node.m > 1 {
+ if node.m == 0 {
+ w.emit1(Nullcount, 0)
+ } else {
+ w.emit1(Setcount, 1-node.m)
+ }
+ } else if node.m == 0 {
+ w.emit(Nullmark)
+ } else {
+ w.emit(Setmark)
+ }
+
+ if node.m == 0 {
+ w.pushInt(w.curPos())
+ w.emit1(Goto, 0)
+ }
+ w.pushInt(w.curPos())
+
+ case ntLoop | afterChild, ntLazyloop | afterChild:
+
+ startJumpPos := w.curPos()
+ lazy := (nodetype - (ntLoop | afterChild))
+
+ if node.n < math.MaxInt32 || node.m > 1 {
+ if node.n == math.MaxInt32 {
+ w.emit2(InstOp(Branchcount+lazy), w.popInt(), math.MaxInt32)
+ } else {
+ w.emit2(InstOp(Branchcount+lazy), w.popInt(), node.n-node.m)
+ }
+ } else {
+ w.emit1(InstOp(Branchmark+lazy), w.popInt())
+ }
+
+ if node.m == 0 {
+ w.patchJump(w.popInt(), startJumpPos)
+ }
+
+ case ntGroup | beforeChild, ntGroup | afterChild:
+
+ case ntCapture | beforeChild:
+ w.emit(Setmark)
+
+ case ntCapture | afterChild:
+ w.emit2(Capturemark, w.mapCapnum(node.m), w.mapCapnum(node.n))
+
+ case ntRequire | beforeChild:
+ // NOTE: the following line causes lookahead/lookbehind to be
+ // NON-BACKTRACKING. It can be commented out with (*)
+ w.emit(Setjump)
+
+ w.emit(Setmark)
+
+ case ntRequire | afterChild:
+ w.emit(Getmark)
+
+ // NOTE: the following line causes lookahead/lookbehind to be
+ // NON-BACKTRACKING. It can be commented out with (*)
+ w.emit(Forejump)
+
+ case ntPrevent | beforeChild:
+ w.emit(Setjump)
+ w.pushInt(w.curPos())
+ w.emit1(Lazybranch, 0)
+
+ case ntPrevent | afterChild:
+ w.emit(Backjump)
+ w.patchJump(w.popInt(), w.curPos())
+ w.emit(Forejump)
+
+ case ntGreedy | beforeChild:
+ w.emit(Setjump)
+
+ case ntGreedy | afterChild:
+ w.emit(Forejump)
+
+ case ntOne, ntNotone:
+ w.emit1(InstOp(node.t|ntBits), int(node.ch))
+
+ case ntNotoneloop, ntNotonelazy, ntOneloop, ntOnelazy:
+ if node.m > 0 {
+ if node.t == ntOneloop || node.t == ntOnelazy {
+ w.emit2(Onerep|bits, int(node.ch), node.m)
+ } else {
+ w.emit2(Notonerep|bits, int(node.ch), node.m)
+ }
+ }
+ if node.n > node.m {
+ if node.n == math.MaxInt32 {
+ w.emit2(InstOp(node.t|ntBits), int(node.ch), math.MaxInt32)
+ } else {
+ w.emit2(InstOp(node.t|ntBits), int(node.ch), node.n-node.m)
+ }
+ }
+
+ case ntSetloop, ntSetlazy:
+ if node.m > 0 {
+ w.emit2(Setrep|bits, w.setCode(node.set), node.m)
+ }
+ if node.n > node.m {
+ if node.n == math.MaxInt32 {
+ w.emit2(InstOp(node.t|ntBits), w.setCode(node.set), math.MaxInt32)
+ } else {
+ w.emit2(InstOp(node.t|ntBits), w.setCode(node.set), node.n-node.m)
+ }
+ }
+
+ case ntMulti:
+ w.emit1(InstOp(node.t|ntBits), w.stringCode(node.str))
+
+ case ntSet:
+ w.emit1(InstOp(node.t|ntBits), w.setCode(node.set))
+
+ case ntRef:
+ w.emit1(InstOp(node.t|ntBits), w.mapCapnum(node.m))
+
+ case ntNothing, ntBol, ntEol, ntBoundary, ntNonboundary, ntECMABoundary, ntNonECMABoundary, ntBeginning, ntStart, ntEndZ, ntEnd:
+ w.emit(InstOp(node.t))
+
+ default:
+ return fmt.Errorf("unexpected opcode in regular expression generation: %v", nodetype)
+ }
+
+ return nil
+}
+
+// To avoid recursion, we use a simple integer stack.
+// This is the push.
+func (w *writer) pushInt(i int) {
+ w.intStack = append(w.intStack, i)
+}
+
+// Returns true if the stack is empty.
+func (w *writer) emptyStack() bool {
+ return len(w.intStack) == 0
+}
+
+// This is the pop.
+func (w *writer) popInt() int {
+ //get our item
+ idx := len(w.intStack) - 1
+ i := w.intStack[idx]
+ //trim our slice
+ w.intStack = w.intStack[:idx]
+ return i
+}
+
+// Returns the current position in the emitted code.
+func (w *writer) curPos() int {
+ return w.curpos
+}
+
+// Fixes up a jump instruction at the specified offset
+// so that it jumps to the specified jumpDest.
+func (w *writer) patchJump(offset, jumpDest int) {
+ w.emitted[offset+1] = jumpDest
+}
+
+// Returns an index in the set table for a charset
+// uses a map to eliminate duplicates.
+func (w *writer) setCode(set *CharSet) int {
+ if w.counting {
+ return 0
+ }
+
+ buf := &bytes.Buffer{}
+
+ set.mapHashFill(buf)
+ hash := buf.String()
+ i, ok := w.sethash[hash]
+ if !ok {
+ i = len(w.sethash)
+ w.sethash[hash] = i
+ w.settable = append(w.settable, set)
+ }
+ return i
+}
+
+// Returns an index in the string table for a string.
+// uses a map to eliminate duplicates.
+func (w *writer) stringCode(str []rune) int {
+ if w.counting {
+ return 0
+ }
+
+ hash := string(str)
+ i, ok := w.stringhash[hash]
+ if !ok {
+ i = len(w.stringhash)
+ w.stringhash[hash] = i
+ w.stringtable = append(w.stringtable, str)
+ }
+
+ return i
+}
+
+// When generating code on a regex that uses a sparse set
+// of capture slots, we hash them to a dense set of indices
+// for an array of capture slots. Instead of doing the hash
+// at match time, it's done at compile time, here.
+func (w *writer) mapCapnum(capnum int) int {
+ if capnum == -1 {
+ return -1
+ }
+
+ if w.caps != nil {
+ return w.caps[capnum]
+ }
+
+ return capnum
+}
+
+// Emits a zero-argument operation. Note that the emit
+// functions all run in two modes: they can emit code, or
+// they can just count the size of the code.
+func (w *writer) emit(op InstOp) {
+ if w.counting {
+ w.count++
+ if opcodeBacktracks(op) {
+ w.trackcount++
+ }
+ return
+ }
+ w.emitted[w.curpos] = int(op)
+ w.curpos++
+}
+
+// Emits a one-argument operation.
+func (w *writer) emit1(op InstOp, opd1 int) {
+ if w.counting {
+ w.count += 2
+ if opcodeBacktracks(op) {
+ w.trackcount++
+ }
+ return
+ }
+ w.emitted[w.curpos] = int(op)
+ w.curpos++
+ w.emitted[w.curpos] = opd1
+ w.curpos++
+}
+
+// Emits a two-argument operation.
+func (w *writer) emit2(op InstOp, opd1, opd2 int) {
+ if w.counting {
+ w.count += 3
+ if opcodeBacktracks(op) {
+ w.trackcount++
+ }
+ return
+ }
+ w.emitted[w.curpos] = int(op)
+ w.curpos++
+ w.emitted[w.curpos] = opd1
+ w.curpos++
+ w.emitted[w.curpos] = opd2
+ w.curpos++
+}
diff --git a/vendor/github.com/dlclark/regexp2/testoutput1 b/vendor/github.com/dlclark/regexp2/testoutput1
new file mode 100644
index 0000000000..fbf63fdf2f
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/testoutput1
@@ -0,0 +1,7061 @@
+# This set of tests is for features that are compatible with all versions of
+# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and
+# 32-bit PCRE libraries, and also using the perltest.pl script.
+
+#forbid_utf
+#newline_default lf any anycrlf
+#perltest
+
+/the quick brown fox/
+ the quick brown fox
+ 0: the quick brown fox
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+\= Expect no match
+ The quick brown FOX
+No match
+ What do you know about THE QUICK BROWN FOX?
+No match
+
+/The quick brown fox/i
+ the quick brown fox
+ 0: the quick brown fox
+ The quick brown FOX
+ 0: The quick brown FOX
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+ What do you know about THE QUICK BROWN FOX?
+ 0: THE QUICK BROWN FOX
+
+/abcd\t\n\r\f\a\e\071\x3b\$\\\?caxyz/
+ abcd\t\n\r\f\a\e9;\$\\?caxyz
+ 0: abcd\x09\x0a\x0d\x0c\x07\x1b9;$\?caxyz
+
+/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ aabxyzpqrrrabbxyyyypqAzz
+ 0: aabxyzpqrrrabbxyyyypqAzz
+ aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+ aabcxyzpqrrrabbxyyyypqAzz
+ 0: aabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypAzz
+ 0: aaabcxyzpqrrrabbxyyyypAzz
+ aaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ aaaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzpqrrrabbxyyyypqAzz
+ abxyzzpqrrrabbxyyyypqAzz
+ 0: abxyzzpqrrrabbxyyyypqAzz
+ aabxyzzzpqrrrabbxyyyypqAzz
+ 0: aabxyzzzpqrrrabbxyyyypqAzz
+ aaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabxyzzzzpqrrrabbxyyyypqAzz
+ aaaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzzzzpqrrrabbxyyyypqAzz
+ abcxyzzpqrrrabbxyyyypqAzz
+ 0: abcxyzzpqrrrabbxyyyypqAzz
+ aabcxyzzzpqrrrabbxyyyypqAzz
+ 0: aabcxyzzzpqrrrabbxyyyypqAzz
+ aaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypABzz
+ 0: aaabcxyzpqrrrabbxyyyypABzz
+ aaabcxyzpqrrrabbxyyyypABBzz
+ 0: aaabcxyzpqrrrabbxyyyypABBzz
+ >>>aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ >aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ >>>>abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+\= Expect no match
+ abxyzpqrrabbxyyyypqAzz
+No match
+ abxyzpqrrrrabbxyyyypqAzz
+No match
+ abxyzpqrrrabxyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyypqAzz
+No match
+ aaabcxyzpqrrrabbxyyyypqqqqqqqAzz
+No match
+
+/^(abc){1,2}zz/
+ abczz
+ 0: abczz
+ 1: abc
+ abcabczz
+ 0: abcabczz
+ 1: abc
+\= Expect no match
+ zz
+No match
+ abcabcabczz
+No match
+ >>abczz
+No match
+
+/^(b+?|a){1,2}?c/
+ bc
+ 0: bc
+ 1: b
+ bbc
+ 0: bbc
+ 1: b
+ bbbc
+ 0: bbbc
+ 1: bb
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ aac
+ 0: aac
+ 1: a
+ abbbbbbbbbbbc
+ 0: abbbbbbbbbbbc
+ 1: bbbbbbbbbbb
+ bbbbbbbbbbbac
+ 0: bbbbbbbbbbbac
+ 1: a
+\= Expect no match
+ aaac
+No match
+ abbbbbbbbbbbac
+No match
+
+/^(b+|a){1,2}c/
+ bc
+ 0: bc
+ 1: b
+ bbc
+ 0: bbc
+ 1: bb
+ bbbc
+ 0: bbbc
+ 1: bbb
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ aac
+ 0: aac
+ 1: a
+ abbbbbbbbbbbc
+ 0: abbbbbbbbbbbc
+ 1: bbbbbbbbbbb
+ bbbbbbbbbbbac
+ 0: bbbbbbbbbbbac
+ 1: a
+\= Expect no match
+ aaac
+No match
+ abbbbbbbbbbbac
+No match
+
+/^(b+|a){1,2}?bc/
+ bbc
+ 0: bbc
+ 1: b
+
+/^(b*|ba){1,2}?bc/
+ babc
+ 0: babc
+ 1: ba
+ bbabc
+ 0: bbabc
+ 1: ba
+ bababc
+ 0: bababc
+ 1: ba
+\= Expect no match
+ bababbc
+No match
+ babababc
+No match
+
+/^(ba|b*){1,2}?bc/
+ babc
+ 0: babc
+ 1: ba
+ bbabc
+ 0: bbabc
+ 1: ba
+ bababc
+ 0: bababc
+ 1: ba
+\= Expect no match
+ bababbc
+No match
+ babababc
+No match
+
+#/^\ca\cA\c[;\c:/
+# \x01\x01\e;z
+# 0: \x01\x01\x1b;z
+
+/^[ab\]cde]/
+ athing
+ 0: a
+ bthing
+ 0: b
+ ]thing
+ 0: ]
+ cthing
+ 0: c
+ dthing
+ 0: d
+ ething
+ 0: e
+\= Expect no match
+ fthing
+No match
+ [thing
+No match
+ \\thing
+No match
+
+/^[]cde]/
+ ]thing
+ 0: ]
+ cthing
+ 0: c
+ dthing
+ 0: d
+ ething
+ 0: e
+\= Expect no match
+ athing
+No match
+ fthing
+No match
+
+/^[^ab\]cde]/
+ fthing
+ 0: f
+ [thing
+ 0: [
+ \\thing
+ 0: \
+\= Expect no match
+ athing
+No match
+ bthing
+No match
+ ]thing
+No match
+ cthing
+No match
+ dthing
+No match
+ ething
+No match
+
+/^[^]cde]/
+ athing
+ 0: a
+ fthing
+ 0: f
+\= Expect no match
+ ]thing
+No match
+ cthing
+No match
+ dthing
+No match
+ ething
+No match
+
+# DLC - I don't get this one
+#/^\/
+# 
+# 0: \x81
+
+#updated to handle 16-bits utf8
+/^ÿ/
+ ÿ
+ 0: \xc3\xbf
+
+/^[0-9]+$/
+ 0
+ 0: 0
+ 1
+ 0: 1
+ 2
+ 0: 2
+ 3
+ 0: 3
+ 4
+ 0: 4
+ 5
+ 0: 5
+ 6
+ 0: 6
+ 7
+ 0: 7
+ 8
+ 0: 8
+ 9
+ 0: 9
+ 10
+ 0: 10
+ 100
+ 0: 100
+\= Expect no match
+ abc
+No match
+
+/^.*nter/
+ enter
+ 0: enter
+ inter
+ 0: inter
+ uponter
+ 0: uponter
+
+/^xxx[0-9]+$/
+ xxx0
+ 0: xxx0
+ xxx1234
+ 0: xxx1234
+\= Expect no match
+ xxx
+No match
+
+/^.+[0-9][0-9][0-9]$/
+ x123
+ 0: x123
+ x1234
+ 0: x1234
+ xx123
+ 0: xx123
+ 123456
+ 0: 123456
+\= Expect no match
+ 123
+No match
+
+/^.+?[0-9][0-9][0-9]$/
+ x123
+ 0: x123
+ x1234
+ 0: x1234
+ xx123
+ 0: xx123
+ 123456
+ 0: 123456
+\= Expect no match
+ 123
+No match
+
+/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/
+ abc!pqr=apquxz.ixr.zzz.ac.uk
+ 0: abc!pqr=apquxz.ixr.zzz.ac.uk
+ 1: abc
+ 2: pqr
+\= Expect no match
+ !pqr=apquxz.ixr.zzz.ac.uk
+No match
+ abc!=apquxz.ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz:ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz.ixr.zzz.ac.ukk
+No match
+
+/:/
+ Well, we need a colon: somewhere
+ 0: :
+\= Expect no match
+ Fail without a colon
+No match
+
+/([\da-f:]+)$/i
+ 0abc
+ 0: 0abc
+ 1: 0abc
+ abc
+ 0: abc
+ 1: abc
+ fed
+ 0: fed
+ 1: fed
+ E
+ 0: E
+ 1: E
+ ::
+ 0: ::
+ 1: ::
+ 5f03:12C0::932e
+ 0: 5f03:12C0::932e
+ 1: 5f03:12C0::932e
+ fed def
+ 0: def
+ 1: def
+ Any old stuff
+ 0: ff
+ 1: ff
+\= Expect no match
+ 0zzz
+No match
+ gzzz
+No match
+ fed\x20
+No match
+ Any old rubbish
+No match
+
+/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/
+ .1.2.3
+ 0: .1.2.3
+ 1: 1
+ 2: 2
+ 3: 3
+ A.12.123.0
+ 0: A.12.123.0
+ 1: 12
+ 2: 123
+ 3: 0
+\= Expect no match
+ .1.2.3333
+No match
+ 1.2.3
+No match
+ 1234.2.3
+No match
+
+/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
+ 1 IN SOA non-sp1 non-sp2(
+ 0: 1 IN SOA non-sp1 non-sp2(
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+ 1 IN SOA non-sp1 non-sp2 (
+ 0: 1 IN SOA non-sp1 non-sp2 (
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+\= Expect no match
+ 1IN SOA non-sp1 non-sp2(
+No match
+
+/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/
+ a.
+ 0: a.
+ Z.
+ 0: Z.
+ 2.
+ 0: 2.
+ ab-c.pq-r.
+ 0: ab-c.pq-r.
+ 1: .pq-r
+ sxk.zzz.ac.uk.
+ 0: sxk.zzz.ac.uk.
+ 1: .uk
+ x-.y-.
+ 0: x-.y-.
+ 1: .y-
+\= Expect no match
+ -abc.peq.
+No match
+
+/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/
+ *.a
+ 0: *.a
+ *.b0-a
+ 0: *.b0-a
+ 1: 0-a
+ *.c3-b.c
+ 0: *.c3-b.c
+ 1: 3-b
+ 2: .c
+ *.c-a.b-c
+ 0: *.c-a.b-c
+ 1: -a
+ 2: .b-c
+ 3: -c
+\= Expect no match
+ *.0
+No match
+ *.a-
+No match
+ *.a-b.c-
+No match
+ *.c-a.0-c
+No match
+
+/^(?=ab(de))(abd)(e)/
+ abde
+ 0: abde
+ 1: de
+ 2: abd
+ 3: e
+
+/^(?!(ab)de|x)(abd)(f)/
+ abdf
+ 0: abdf
+ 1: <unset>
+ 2: abd
+ 3: f
+
+/^(?=(ab(cd)))(ab)/
+ abcd
+ 0: ab
+ 1: abcd
+ 2: cd
+ 3: ab
+
+/^[\da-f](\.[\da-f])*$/i
+ a.b.c.d
+ 0: a.b.c.d
+ 1: .d
+ A.B.C.D
+ 0: A.B.C.D
+ 1: .D
+ a.b.c.1.2.3.C
+ 0: a.b.c.1.2.3.C
+ 1: .C
+
+/^\".*\"\s*(;.*)?$/
+ \"1234\"
+ 0: "1234"
+ \"abcd\" ;
+ 0: "abcd" ;
+ 1: ;
+ \"\" ; rhubarb
+ 0: "" ; rhubarb
+ 1: ; rhubarb
+\= Expect no match
+ \"1234\" : things
+No match
+
+/^$/
+ \
+ 0:
+\= Expect no match
+ A non-empty line
+No match
+
+/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x
+ ab c
+ 0: ab c
+\= Expect no match
+ abc
+No match
+ ab cde
+No match
+
+/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/
+ ab c
+ 0: ab c
+\= Expect no match
+ abc
+No match
+ ab cde
+No match
+
+/^ a\ b[c ]d $/x
+ a bcd
+ 0: a bcd
+ a b d
+ 0: a b d
+\= Expect no match
+ abcd
+No match
+ ab d
+No match
+
+/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/
+ abcdefhijklm
+ 0: abcdefhijklm
+ 1: abc
+ 2: bc
+ 3: c
+ 4: def
+ 5: ef
+ 6: f
+ 7: hij
+ 8: ij
+ 9: j
+10: klm
+11: lm
+12: m
+
+/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/
+ abcdefhijklm
+ 0: abcdefhijklm
+ 1: bc
+ 2: c
+ 3: ef
+ 4: f
+ 5: ij
+ 6: j
+ 7: lm
+ 8: m
+
+#/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/
+# a+ Z0+\x08\n\x1d\x12
+# 0: a+ Z0+\x08\x0a\x1d\x12
+
+/^[.^$|()*+?{,}]+/
+ .^\$(*+)|{?,?}
+ 0: .^$(*+)|{?,?}
+
+/^a*\w/
+ z
+ 0: z
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ a
+ 0: a
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ a+
+ 0: a
+ aa+
+ 0: aa
+
+/^a*?\w/
+ z
+ 0: z
+ az
+ 0: a
+ aaaz
+ 0: a
+ a
+ 0: a
+ aa
+ 0: a
+ aaaa
+ 0: a
+ a+
+ 0: a
+ aa+
+ 0: a
+
+/^a+\w/
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ aa+
+ 0: aa
+
+/^a+?\w/
+ az
+ 0: az
+ aaaz
+ 0: aa
+ aa
+ 0: aa
+ aaaa
+ 0: aa
+ aa+
+ 0: aa
+
+/^\d{8}\w{2,}/
+ 1234567890
+ 0: 1234567890
+ 12345678ab
+ 0: 12345678ab
+ 12345678__
+ 0: 12345678__
+\= Expect no match
+ 1234567
+No match
+
+/^[aeiou\d]{4,5}$/
+ uoie
+ 0: uoie
+ 1234
+ 0: 1234
+ 12345
+ 0: 12345
+ aaaaa
+ 0: aaaaa
+\= Expect no match
+ 123456
+No match
+
+/^[aeiou\d]{4,5}?/
+ uoie
+ 0: uoie
+ 1234
+ 0: 1234
+ 12345
+ 0: 1234
+ aaaaa
+ 0: aaaa
+ 123456
+ 0: 1234
+
+/\A(abc|def)=(\1){2,3}\Z/
+ abc=abcabc
+ 0: abc=abcabc
+ 1: abc
+ 2: abc
+ def=defdefdef
+ 0: def=defdefdef
+ 1: def
+ 2: def
+\= Expect no match
+ abc=defdef
+No match
+
+/^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/
+ abcdefghijkcda2
+ 0: abcdefghijkcda2
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+12: cd
+ abcdefghijkkkkcda2
+ 0: abcdefghijkkkkcda2
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+12: cd
+
+/(cat(a(ract|tonic)|erpillar)) \1()2(3)/
+ cataract cataract23
+ 0: cataract cataract23
+ 1: cataract
+ 2: aract
+ 3: ract
+ 4:
+ 5: 3
+ catatonic catatonic23
+ 0: catatonic catatonic23
+ 1: catatonic
+ 2: atonic
+ 3: tonic
+ 4:
+ 5: 3
+ caterpillar caterpillar23
+ 0: caterpillar caterpillar23
+ 1: caterpillar
+ 2: erpillar
+ 3: <unset>
+ 4:
+ 5: 3
+
+
+/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: abcd
+
+/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: Sep
+ From abcd Mon Sep 1 12:33:02 1997
+ 0: From abcd Mon Sep 1 12:33
+ 1: Sep
+\= Expect no match
+ From abcd Sep 01 12:33:02 1997
+No match
+
+/^12.34/s
+ 12\n34
+ 0: 12\x0a34
+ 12\r34
+ 0: 12\x0d34
+
+/\w+(?=\t)/
+ the quick brown\t fox
+ 0: brown
+
+/foo(?!bar)(.*)/
+ foobar is foolish see?
+ 0: foolish see?
+ 1: lish see?
+
+/(?:(?!foo)...|^.{0,2})bar(.*)/
+ foobar crowbar etc
+ 0: rowbar etc
+ 1: etc
+ barrel
+ 0: barrel
+ 1: rel
+ 2barrel
+ 0: 2barrel
+ 1: rel
+ A barrel
+ 0: A barrel
+ 1: rel
+
+/^(\D*)(?=\d)(?!123)/
+ abc456
+ 0: abc
+ 1: abc
+\= Expect no match
+ abc123
+No match
+
+/^1234(?# test newlines
+ inside)/
+ 1234
+ 0: 1234
+
+/^1234 #comment in extended re
+ /x
+ 1234
+ 0: 1234
+
+/#rhubarb
+ abcd/x
+ abcd
+ 0: abcd
+
+/^abcd#rhubarb/x
+ abcd
+ 0: abcd
+
+/^(a)\1{2,3}(.)/
+ aaab
+ 0: aaab
+ 1: a
+ 2: b
+ aaaab
+ 0: aaaab
+ 1: a
+ 2: b
+ aaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+ aaaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+
+/(?!^)abc/
+ the abc
+ 0: abc
+\= Expect no match
+ abc
+No match
+
+/(?=^)abc/
+ abc
+ 0: abc
+\= Expect no match
+ the abc
+No match
+
+/^[ab]{1,3}(ab*|b)/
+ aabbbbb
+ 0: aabb
+ 1: b
+
+/^[ab]{1,3}?(ab*|b)/
+ aabbbbb
+ 0: aabbbbb
+ 1: abbbbb
+
+/^[ab]{1,3}?(ab*?|b)/
+ aabbbbb
+ 0: aa
+ 1: a
+
+/^[ab]{1,3}(ab*?|b)/
+ aabbbbb
+ 0: aabb
+ 1: b
+
+/ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional leading comment
+(?: (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # one word, optionally followed by....
+(?:
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
+\(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) | # comments, or...
+
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+# quoted strings
+)*
+< (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # leading <
+(?: @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* , (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+)* # further okay, if led by comma
+: # closing colon
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* )? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) # initial word
+(?: (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+" (?: # opening quote...
+[^\\\x80-\xff\n\015"] # Anything except backslash and quote
+| # or
+\\ [^\x80-\xff] # Escaped something (something != CR)
+)* " # closing quote
+) )* # further okay, if led by a period
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* @ (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # initial subdomain
+(?: #
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* \. # if led by a period...
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* (?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+| \[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+) # ...further okay
+)*
+# address spec
+(?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* > # trailing >
+# name and address
+) (?: [\040\t] | \(
+(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
+\) )* # optional trailing comment
+/x
+ Alan Other <user\@dom.ain>
+ 0: Alan Other <user@dom.ain>
+ <user\@dom.ain>
+ 0: user@dom.ain
+ user\@dom.ain
+ 0: user@dom.ain
+ \"A. Other\" <user.1234\@dom.ain> (a comment)
+ 0: "A. Other" <user.1234@dom.ain> (a comment)
+ A. Other <user.1234\@dom.ain> (a comment)
+ 0: Other <user.1234@dom.ain> (a comment)
+ \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
+ 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
+ A missing angle <user\@some.where
+ 0: user@some.where
+\= Expect no match
+ The quick brown fox
+No match
+
+/[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional leading comment
+(?:
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+# Atom
+| # or
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+# Quoted string
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+# Atom
+| # or
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+# Quoted string
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# additional words
+)*
+@
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+)*
+# address
+| # or
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+# Atom
+| # or
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+# Quoted string
+)
+# leading word
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces
+(?:
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+|
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+) # "special" comment or quoted string
+[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal"
+)*
+<
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# <
+(?:
+@
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+)*
+(?: ,
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+@
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+)*
+)* # additional domains
+:
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+)? # optional route
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+# Atom
+| # or
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+# Quoted string
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+# Atom
+| # or
+" # "
+[^\\\x80-\xff\n\015"] * # normal
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )*
+" # "
+# Quoted string
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# additional words
+)*
+@
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+(?:
+\.
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+(?:
+[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
+|
+\[ # [
+(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
+\] # ]
+)
+[\040\t]* # Nab whitespace.
+(?:
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: # (
+(?: \\ [^\x80-\xff] |
+\( # (
+[^\\\x80-\xff\n\015()] * # normal*
+(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)*
+\) # )
+) # special
+[^\\\x80-\xff\n\015()] * # normal*
+)* # )*
+\) # )
+[\040\t]* )* # If comment found, allow more spaces.
+# optional trailing comments
+)*
+# address spec
+> # >
+# name and address
+)
+/x
+ Alan Other <user\@dom.ain>
+ 0: Alan Other <user@dom.ain>
+ <user\@dom.ain>
+ 0: user@dom.ain
+ user\@dom.ain
+ 0: user@dom.ain
+ \"A. Other\" <user.1234\@dom.ain> (a comment)
+ 0: "A. Other" <user.1234@dom.ain>
+ A. Other <user.1234\@dom.ain> (a comment)
+ 0: Other <user.1234@dom.ain>
+ \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
+ 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
+ A missing angle <user\@some.where
+ 0: user@some.where
+\= Expect no match
+ The quick brown fox
+No match
+
+/abc\0def\00pqr\000xyz\0000AB/
+ abc\0def\00pqr\000xyz\0000AB
+ 0: abc\x00def\x00pqr\x00xyz\x000AB
+ abc456 abc\0def\00pqr\000xyz\0000ABCDE
+ 0: abc\x00def\x00pqr\x00xyz\x000AB
+
+/abc\x0def\x00pqr\x000xyz\x0000AB/
+ abc\x0def\x00pqr\x000xyz\x0000AB
+ 0: abc\x0def\x00pqr\x000xyz\x0000AB
+ abc456 abc\x0def\x00pqr\x000xyz\x0000ABCDE
+ 0: abc\x0def\x00pqr\x000xyz\x0000AB
+
+/^[\000-\037]/
+ \0A
+ 0: \x00
+ \01B
+ 0: \x01
+ \037C
+ 0: \x1f
+
+#.NET doesn't do octal with 1 number
+
+/^(cow|)\1(bell)/
+ cowcowbell
+ 0: cowcowbell
+ 1: cow
+ 2: bell
+ bell
+ 0: bell
+ 1:
+ 2: bell
+\= Expect no match
+ cowbell
+No match
+
+/^\s/
+ \040abc
+ 0:
+ \x0cabc
+ 0: \x0c
+ \nabc
+ 0: \x0a
+ \rabc
+ 0: \x0d
+ \tabc
+ 0: \x09
+\= Expect no match
+ abc
+No match
+
+/^a b
+ c/x
+ abc
+ 0: abc
+
+/^(a|)\1*b/
+ ab
+ 0: ab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+\= Expect no match
+ acb
+No match
+
+/^(a|)\1+b/
+ aab
+ 0: aab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+\= Expect no match
+ ab
+No match
+
+/^(a|)\1?b/
+ ab
+ 0: ab
+ 1: a
+ aab
+ 0: aab
+ 1: a
+ b
+ 0: b
+ 1:
+\= Expect no match
+ acb
+No match
+
+/^(a|)\1{2}b/
+ aaab
+ 0: aaab
+ 1: a
+ b
+ 0: b
+ 1:
+\= Expect no match
+ ab
+No match
+ aab
+No match
+ aaaab
+No match
+
+/^(a|)\1{2,3}b/
+ aaab
+ 0: aaab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+\= Expect no match
+ ab
+No match
+ aab
+No match
+ aaaaab
+No match
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+ abbbc
+ 0: abbbc
+ abbc
+ 0: abbc
+\= Expect no match
+ abc
+No match
+ abbbbbc
+No match
+
+/([^.]*)\.([^:]*):[T ]+(.*)/
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[T ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[t ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/^[W-c]+$/
+ WXY_^abc
+ 0: WXY_^abc
+\= Expect no match
+ wxy
+No match
+
+/^[W-c]+$/i
+ WXY_^abc
+ 0: WXY_^abc
+ wxy_^ABC
+ 0: wxy_^ABC
+
+/^[\x3f-\x5F]+$/i
+ WXY_^abc
+ 0: WXY_^abc
+ wxy_^ABC
+ 0: wxy_^ABC
+
+/^abc$/m
+ abc
+ 0: abc
+ qqq\nabc
+ 0: abc
+ abc\nzzz
+ 0: abc
+ qqq\nabc\nzzz
+ 0: abc
+
+/^abc$/
+ abc
+ 0: abc
+\= Expect no match
+ qqq\nabc
+No match
+ abc\nzzz
+No match
+ qqq\nabc\nzzz
+No match
+
+/\Aabc\Z/m
+ abc
+ 0: abc
+ abc\n
+ 0: abc
+\= Expect no match
+ qqq\nabc
+No match
+ abc\nzzz
+No match
+ qqq\nabc\nzzz
+No match
+
+/\A(.)*\Z/s
+ abc\ndef
+ 0: abc\x0adef
+ 1: f
+
+/\A(.)*\Z/m
+\= Expect no match
+ abc\ndef
+No match
+
+/(?:b)|(?::+)/
+ b::c
+ 0: b
+ c::b
+ 0: ::
+
+/[-az]+/
+ az-
+ 0: az-
+\= Expect no match
+ b
+No match
+
+/[az-]+/
+ za-
+ 0: za-
+\= Expect no match
+ b
+No match
+
+/[a\-z]+/
+ a-z
+ 0: a-z
+\= Expect no match
+ b
+No match
+
+/[a-z]+/
+ abcdxyz
+ 0: abcdxyz
+
+/[\d-]+/
+ 12-34
+ 0: 12-34
+\= Expect no match
+ aaa
+No match
+
+/[\d-z]+/
+ 12-34z
+ 0: 12-34z
+\= Expect no match
+ aaa
+No match
+
+/\x5c/
+ \\
+ 0: \
+
+/\x20Z/
+ the Zoo
+ 0: Z
+\= Expect no match
+ Zulu
+No match
+
+/(abc)\1/i
+ abcabc
+ 0: abcabc
+ 1: abc
+ ABCabc
+ 0: ABCabc
+ 1: ABC
+ abcABC
+ 0: abcABC
+ 1: abc
+
+/abc$/
+ abc
+ 0: abc
+ abc\n
+ 0: abc
+\= Expect no match
+ abc\ndef
+No match
+
+/(abc)\123/
+ abc\x53
+ 0: abcS
+ 1: abc
+
+/(abc)\100/
+ abc\x40
+ 0: abc@
+ 1: abc
+ abc\100
+ 0: abc@
+ 1: abc
+
+/(abc)\1000/
+ abc\x400
+ 0: abc@0
+ 1: abc
+ abc\x40\x30
+ 0: abc@0
+ 1: abc
+ abc\1000
+ 0: abc@0
+ 1: abc
+ abc\100\x30
+ 0: abc@0
+ 1: abc
+ abc\100\060
+ 0: abc@0
+ 1: abc
+ abc\100\60
+ 0: abc@0
+ 1: abc
+
+/^(A)(B)(C)(D)(E)(F)(G)(H)(I)\8\9$/
+ ABCDEFGHIHI
+ 0: ABCDEFGHIHI
+ 1: A
+ 2: B
+ 3: C
+ 4: D
+ 5: E
+ 6: F
+ 7: G
+ 8: H
+ 9: I
+
+/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\12\123/
+ abcdefghijkllS
+ 0: abcdefghijkllS
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+12: l
+
+/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
+ abcdefghijk\12S
+ 0: abcdefghijk\x0aS
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+
+/a{0}bc/
+ bc
+ 0: bc
+
+/(a|(bc)){0,0}?xyz/
+ xyz
+ 0: xyz
+
+/abc[\10]de/
+ abc\010de
+ 0: abc\x08de
+
+/abc[\1]de/
+ abc\1de
+ 0: abc\x01de
+
+/(abc)[\1]de/
+ abc\1de
+ 0: abc\x01de
+ 1: abc
+
+/(?s)a.b/
+ a\nb
+ 0: a\x0ab
+
+/^([^a])([^\b])([^c]*)([^d]{3,4})/
+ baNOTccccd
+ 0: baNOTcccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: cccc
+ baNOTcccd
+ 0: baNOTccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: ccc
+ baNOTccd
+ 0: baNOTcc
+ 1: b
+ 2: a
+ 3: NO
+ 4: Tcc
+ bacccd
+ 0: baccc
+ 1: b
+ 2: a
+ 3:
+ 4: ccc
+\= Expect no match
+ anything
+No match
+ b\bc
+No match
+ baccd
+No match
+
+/[^a]/
+ Abc
+ 0: A
+
+/[^a]/i
+ Abc
+ 0: b
+
+/[^a]+/
+ AAAaAbc
+ 0: AAA
+
+/[^a]+/i
+ AAAaAbc
+ 0: bc
+
+/[^a]+/
+ bbb\nccc
+ 0: bbb\x0accc
+
+/[^k]$/
+ abc
+ 0: c
+\= Expect no match
+ abk
+No match
+
+/[^k]{2,3}$/
+ abc
+ 0: abc
+ kbc
+ 0: bc
+ kabc
+ 0: abc
+\= Expect no match
+ abk
+No match
+ akb
+No match
+ akk
+No match
+
+/^\d{8,}\@.+[^k]$/
+ 12345678\@a.b.c.d
+ 0: 12345678@a.b.c.d
+ 123456789\@x.y.z
+ 0: 123456789@x.y.z
+\= Expect no match
+ 12345678\@x.y.uk
+No match
+ 1234567\@a.b.c.d
+No match
+
+/(a)\1{8,}/
+ aaaaaaaaa
+ 0: aaaaaaaaa
+ 1: a
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: a
+\= Expect no match
+ aaaaaaa
+No match
+
+/[^a]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^a]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+/[^az]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^az]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+# trimmed upper ascii since Go is UTF-8
+/\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177/
+ \000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077\100\101\102\103\104\105\106\107\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177
+ 0: \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f
+
+/P[^*]TAIRE[^*]{1,6}?LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/P[^*]TAIRE[^*]{1,}?LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/(\.\d\d[1-9]?)\d+/
+ 1.230003938
+ 0: .230003938
+ 1: .23
+ 1.875000282
+ 0: .875000282
+ 1: .875
+ 1.235
+ 0: .235
+ 1: .23
+
+/(\.\d\d((?=0)|\d(?=\d)))/
+ 1.230003938
+ 0: .23
+ 1: .23
+ 2:
+ 1.875000282
+ 0: .875
+ 1: .875
+ 2: 5
+\= Expect no match
+ 1.235
+No match
+
+/\b(foo)\s+(\w+)/i
+ Food is on the foo table
+ 0: foo table
+ 1: foo
+ 2: table
+
+/foo(.*)bar/
+ The food is under the bar in the barn.
+ 0: food is under the bar in the bar
+ 1: d is under the bar in the
+
+/foo(.*?)bar/
+ The food is under the bar in the barn.
+ 0: food is under the bar
+ 1: d is under the
+
+/(.*)(\d*)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 53147
+ 2:
+
+/(.*)(\d+)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*?)(\d*)/
+ I have 2 numbers: 53147
+ 0:
+ 1:
+ 2:
+
+/(.*?)(\d+)/
+ I have 2 numbers: 53147
+ 0: I have 2
+ 1: I have
+ 2: 2
+
+/(.*)(\d+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*?)(\d+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/(.*)\b(\d+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/(.*\D)(\d+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/^\D*(?!123)/
+ ABC123
+ 0: AB
+
+/^(\D*)(?=\d)(?!123)/
+ ABC445
+ 0: ABC
+ 1: ABC
+\= Expect no match
+ ABC123
+No match
+
+/^[W-]46]/
+ W46]789
+ 0: W46]
+ -46]789
+ 0: -46]
+\= Expect no match
+ Wall
+No match
+ Zebra
+No match
+ 42
+No match
+ [abcd]
+No match
+ ]abcd[
+No match
+
+/^[W-\]46]/
+ W46]789
+ 0: W
+ Wall
+ 0: W
+ Zebra
+ 0: Z
+ Xylophone
+ 0: X
+ 42
+ 0: 4
+ [abcd]
+ 0: [
+ ]abcd[
+ 0: ]
+ \\backslash
+ 0: \
+\= Expect no match
+ -46]789
+No match
+ well
+No match
+
+/\d\d\/\d\d\/\d\d\d\d/
+ 01/01/2000
+ 0: 01/01/2000
+
+/word (?:[a-zA-Z0-9]+ ){0,10}otherword/
+ word cat dog elephant mussel cow horse canary baboon snake shark otherword
+ 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword
+\= Expect no match
+ word cat dog elephant mussel cow horse canary baboon snake shark
+No match
+
+/word (?:[a-zA-Z0-9]+ ){0,300}otherword/
+\= Expect no match
+ word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope
+No match
+
+/^(a){0,0}/
+ bcd
+ 0:
+ abc
+ 0:
+ aab
+ 0:
+
+/^(a){0,1}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+
+/^(a){0,2}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+
+/^(a){0,3}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+
+/^(a){0,}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+
+/^(a){1,1}/
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+\= Expect no match
+ bcd
+No match
+
+/^(a){1,2}/
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+\= Expect no match
+ bcd
+No match
+
+/^(a){1,3}/
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+\= Expect no match
+ bcd
+No match
+
+/^(a){1,}/
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+\= Expect no match
+ bcd
+No match
+
+/.*\.gif/
+ borfle\nbib.gif\nno
+ 0: bib.gif
+
+/.{0,}\.gif/
+ borfle\nbib.gif\nno
+ 0: bib.gif
+
+/.*\.gif/m
+ borfle\nbib.gif\nno
+ 0: bib.gif
+
+/.*\.gif/s
+ borfle\nbib.gif\nno
+ 0: borfle\x0abib.gif
+
+/.*\.gif/ms
+ borfle\nbib.gif\nno
+ 0: borfle\x0abib.gif
+
+/.*$/
+ borfle\nbib.gif\nno
+ 0: no
+
+/.*$/m
+ borfle\nbib.gif\nno
+ 0: borfle
+
+/.*$/s
+ borfle\nbib.gif\nno
+ 0: borfle\x0abib.gif\x0ano
+
+/.*$/ms
+ borfle\nbib.gif\nno
+ 0: borfle\x0abib.gif\x0ano
+
+/.*$/
+ borfle\nbib.gif\nno\n
+ 0: no
+
+/.*$/m
+ borfle\nbib.gif\nno\n
+ 0: borfle
+
+/.*$/s
+ borfle\nbib.gif\nno\n
+ 0: borfle\x0abib.gif\x0ano\x0a
+
+/.*$/ms
+ borfle\nbib.gif\nno\n
+ 0: borfle\x0abib.gif\x0ano\x0a
+
+/(.*X|^B)/
+ abcde\n1234Xyz
+ 0: 1234X
+ 1: 1234X
+ BarFoo
+ 0: B
+ 1: B
+\= Expect no match
+ abcde\nBar
+No match
+
+/(.*X|^B)/m
+ abcde\n1234Xyz
+ 0: 1234X
+ 1: 1234X
+ BarFoo
+ 0: B
+ 1: B
+ abcde\nBar
+ 0: B
+ 1: B
+
+/(.*X|^B)/s
+ abcde\n1234Xyz
+ 0: abcde\x0a1234X
+ 1: abcde\x0a1234X
+ BarFoo
+ 0: B
+ 1: B
+\= Expect no match
+ abcde\nBar
+No match
+
+/(.*X|^B)/ms
+ abcde\n1234Xyz
+ 0: abcde\x0a1234X
+ 1: abcde\x0a1234X
+ BarFoo
+ 0: B
+ 1: B
+ abcde\nBar
+ 0: B
+ 1: B
+
+/(?s)(.*X|^B)/
+ abcde\n1234Xyz
+ 0: abcde\x0a1234X
+ 1: abcde\x0a1234X
+ BarFoo
+ 0: B
+ 1: B
+\= Expect no match
+ abcde\nBar
+No match
+
+/(?s:.*X|^B)/
+ abcde\n1234Xyz
+ 0: abcde\x0a1234X
+ BarFoo
+ 0: B
+\= Expect no match
+ abcde\nBar
+No match
+
+/^.*B/
+\= Expect no match
+ abc\nB
+No match
+
+/(?s)^.*B/
+ abc\nB
+ 0: abc\x0aB
+
+/(?m)^.*B/
+ abc\nB
+ 0: B
+
+/(?ms)^.*B/
+ abc\nB
+ 0: abc\x0aB
+
+/(?ms)^B/
+ abc\nB
+ 0: B
+
+/(?s)B$/
+ B\n
+ 0: B
+
+/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/
+ 123456654321
+ 0: 123456654321
+
+/^\d\d\d\d\d\d\d\d\d\d\d\d/
+ 123456654321
+ 0: 123456654321
+
+/^[\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d]/
+ 123456654321
+ 0: 123456654321
+
+/^[abc]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^[a-c]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^(a|b|c){12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+ 1: c
+
+/^[abcdefghijklmnopqrstuvwxy0123456789]/
+ n
+ 0: n
+\= Expect no match
+ z
+No match
+
+/abcde{0,0}/
+ abcd
+ 0: abcd
+\= Expect no match
+ abce
+No match
+
+/ab[cd]{0,0}e/
+ abe
+ 0: abe
+\= Expect no match
+ abcde
+No match
+
+/ab(c){0,0}d/
+ abd
+ 0: abd
+\= Expect no match
+ abcd
+No match
+
+/a(b*)/
+ a
+ 0: a
+ 1:
+ ab
+ 0: ab
+ 1: b
+ abbbb
+ 0: abbbb
+ 1: bbbb
+\= Expect no match
+ bbbbb
+No match
+
+/ab\d{0}e/
+ abe
+ 0: abe
+\= Expect no match
+ ab1e
+No match
+
+/"([^\\"]+|\\.)*"/
+ the \"quick\" brown fox
+ 0: "quick"
+ 1: quick
+ \"the \\\"quick\\\" brown fox\"
+ 0: "the \"quick\" brown fox"
+ 1: brown fox
+
+/<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is
+ <TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>
+ 0: <TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>
+ 1: BGCOLOR='#DBE9E9'
+ 2: align=left valign=top
+ 3: 43.
+ 4: <a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)
+ 5:
+ 6:
+ 7: <unset>
+ 8: align=left valign=top
+ 9: Lega lstaff.com
+10: align=left valign=top
+11: CA - Statewide
+
+/a[^a]b/
+ acb
+ 0: acb
+ a\nb
+ 0: a\x0ab
+
+/a.b/
+ acb
+ 0: acb
+\= Expect no match
+ a\nb
+No match
+
+/a[^a]b/s
+ acb
+ 0: acb
+ a\nb
+ 0: a\x0ab
+
+/a.b/s
+ acb
+ 0: acb
+ a\nb
+ 0: a\x0ab
+
+/^(b+?|a){1,2}?c/
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ bbbac
+ 0: bbbac
+ 1: a
+ bbbbac
+ 0: bbbbac
+ 1: a
+ bbbbbac
+ 0: bbbbbac
+ 1: a
+
+/^(b+|a){1,2}?c/
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ bbbac
+ 0: bbbac
+ 1: a
+ bbbbac
+ 0: bbbbac
+ 1: a
+ bbbbbac
+ 0: bbbbbac
+ 1: a
+
+/(?!\A)x/m
+ a\bx\n
+ 0: x
+ a\nx\n
+ 0: x
+\= Expect no match
+ x\nb\n
+No match
+
+/(A|B)*?CD/
+ CD
+ 0: CD
+
+/(A|B)*CD/
+ CD
+ 0: CD
+
+/(AB)*?\1/
+ ABABAB
+ 0: ABAB
+ 1: AB
+
+/(AB)*\1/
+ ABABAB
+ 0: ABABAB
+ 1: AB
+
+/(?<!bar)foo/
+ foo
+ 0: foo
+ catfood
+ 0: foo
+ arfootle
+ 0: foo
+ rfoosh
+ 0: foo
+\= Expect no match
+ barfoo
+No match
+ towbarfoo
+No match
+
+/\w{3}(?<!bar)foo/
+ catfood
+ 0: catfoo
+\= Expect no match
+ foo
+No match
+ barfoo
+No match
+ towbarfoo
+No match
+
+/(?<=(foo)a)bar/
+ fooabar
+ 0: bar
+ 1: foo
+\= Expect no match
+ bar
+No match
+ foobbar
+No match
+
+/\Aabc\z/m
+ abc
+ 0: abc
+\= Expect no match
+ abc\n
+No match
+ qqq\nabc
+No match
+ abc\nzzz
+No match
+ qqq\nabc\nzzz
+No match
+
+"(?>.*/)foo"
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+ 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+\= Expect no match
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
+No match
+
+/(?>(\.\d\d[1-9]?))\d+/
+ 1.230003938
+ 0: .230003938
+ 1: .23
+ 1.875000282
+ 0: .875000282
+ 1: .875
+\= Expect no match
+ 1.235
+No match
+
+/^((?>\w+)|(?>\s+))*$/
+ now is the time for all good men to come to the aid of the party
+ 0: now is the time for all good men to come to the aid of the party
+ 1: party
+\= Expect no match
+ this is not a line with only words and spaces!
+No match
+
+/(\d+)(\w)/
+ 12345a
+ 0: 12345a
+ 1: 12345
+ 2: a
+ 12345+
+ 0: 12345
+ 1: 1234
+ 2: 5
+
+/((?>\d+))(\w)/
+ 12345a
+ 0: 12345a
+ 1: 12345
+ 2: a
+\= Expect no match
+ 12345+
+No match
+
+/(?>a+)b/
+ aaab
+ 0: aaab
+
+/((?>a+)b)/
+ aaab
+ 0: aaab
+ 1: aaab
+
+/(?>(a+))b/
+ aaab
+ 0: aaab
+ 1: aaa
+
+/(?>b)+/
+ aaabbbccc
+ 0: bbb
+
+/(?>a+|b+|c+)*c/
+ aaabbbbccccd
+ 0: aaabbbbc
+
+/((?>[^()]+)|\([^()]*\))+/
+ ((abc(ade)ufh()()x
+ 0: abc(ade)ufh()()x
+ 1: x
+
+/\(((?>[^()]+)|\([^()]+\))+\)/
+ (abc)
+ 0: (abc)
+ 1: abc
+ (abc(def)xyz)
+ 0: (abc(def)xyz)
+ 1: xyz
+\= Expect no match
+ ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+
+/a(?-i)b/i
+ ab
+ 0: ab
+ Ab
+ 0: Ab
+\= Expect no match
+ aB
+No match
+ AB
+No match
+
+/(a (?x)b c)d e/
+ a bcd e
+ 0: a bcd e
+ 1: a bc
+\= Expect no match
+ a b cd e
+No match
+ abcd e
+No match
+ a bcde
+No match
+
+/(a b(?x)c d (?-x)e f)/
+ a bcde f
+ 0: a bcde f
+ 1: a bcde f
+\= Expect no match
+ abcdef
+No match
+
+/(a(?i)b)c/
+ abc
+ 0: abc
+ 1: ab
+ aBc
+ 0: aBc
+ 1: aB
+\= Expect no match
+ abC
+No match
+ aBC
+No match
+ Abc
+No match
+ ABc
+No match
+ ABC
+No match
+ AbC
+No match
+
+/a(?i:b)c/
+ abc
+ 0: abc
+ aBc
+ 0: aBc
+\= Expect no match
+ ABC
+No match
+ abC
+No match
+ aBC
+No match
+
+/a(?i:b)*c/
+ aBc
+ 0: aBc
+ aBBc
+ 0: aBBc
+\= Expect no match
+ aBC
+No match
+ aBBC
+No match
+
+/a(?=b(?i)c)\w\wd/
+ abcd
+ 0: abcd
+ abCd
+ 0: abCd
+\= Expect no match
+ aBCd
+No match
+ abcD
+No match
+
+/(?s-i:more.*than).*million/i
+ more than million
+ 0: more than million
+ more than MILLION
+ 0: more than MILLION
+ more \n than Million
+ 0: more \x0a than Million
+\= Expect no match
+ MORE THAN MILLION
+No match
+ more \n than \n million
+No match
+
+/(?:(?s-i)more.*than).*million/i
+ more than million
+ 0: more than million
+ more than MILLION
+ 0: more than MILLION
+ more \n than Million
+ 0: more \x0a than Million
+\= Expect no match
+ MORE THAN MILLION
+No match
+ more \n than \n million
+No match
+
+/(?>a(?i)b+)+c/
+ abc
+ 0: abc
+ aBbc
+ 0: aBbc
+ aBBc
+ 0: aBBc
+\= Expect no match
+ Abc
+No match
+ abAb
+No match
+ abbC
+No match
+
+/(?=a(?i)b)\w\wc/
+ abc
+ 0: abc
+ aBc
+ 0: aBc
+\= Expect no match
+ Ab
+No match
+ abC
+No match
+ aBC
+No match
+
+/(?<=a(?i)b)(\w\w)c/
+ abxxc
+ 0: xxc
+ 1: xx
+ aBxxc
+ 0: xxc
+ 1: xx
+\= Expect no match
+ Abxxc
+No match
+ ABxxc
+No match
+ abxxC
+No match
+
+/(?:(a)|b)(?(1)A|B)/
+ aA
+ 0: aA
+ 1: a
+ bB
+ 0: bB
+\= Expect no match
+ aB
+No match
+ bA
+No match
+
+/^(a)?(?(1)a|b)+$/
+ aa
+ 0: aa
+ 1: a
+ b
+ 0: b
+ bb
+ 0: bb
+\= Expect no match
+ ab
+No match
+
+# Perl gets this next one wrong if the pattern ends with $; in that case it
+# fails to match "12".
+
+/^(?(?=abc)\w{3}:|\d\d)/
+ abc:
+ 0: abc:
+ 12
+ 0: 12
+ 123
+ 0: 12
+\= Expect no match
+ xyz
+No match
+
+/^(?(?!abc)\d\d|\w{3}:)$/
+ abc:
+ 0: abc:
+ 12
+ 0: 12
+\= Expect no match
+ 123
+No match
+ xyz
+No match
+
+/(?(?<=foo)bar|cat)/
+ foobar
+ 0: bar
+ cat
+ 0: cat
+ fcat
+ 0: cat
+ focat
+ 0: cat
+\= Expect no match
+ foocat
+No match
+
+/(?(?<!foo)cat|bar)/
+ foobar
+ 0: bar
+ cat
+ 0: cat
+ fcat
+ 0: cat
+ focat
+ 0: cat
+\= Expect no match
+ foocat
+No match
+
+/( \( )? [^()]+ (?(1) \) |) /x
+ abcd
+ 0: abcd
+ (abcd)
+ 0: (abcd)
+ 1: (
+ the quick (abcd) fox
+ 0: the quick
+ (abcd
+ 0: abcd
+
+/( \( )? [^()]+ (?(1) \) ) /x
+ abcd
+ 0: abcd
+ (abcd)
+ 0: (abcd)
+ 1: (
+ the quick (abcd) fox
+ 0: the quick
+ (abcd
+ 0: abcd
+
+/^(?(2)a|(1)(2))+$/
+ 12
+ 0: 12
+ 1: 1
+ 2: 2
+ 12a
+ 0: 12a
+ 1: 1
+ 2: 2
+ 12aa
+ 0: 12aa
+ 1: 1
+ 2: 2
+\= Expect no match
+ 1234
+No match
+
+/((?i)blah)\s+\1/
+ blah blah
+ 0: blah blah
+ 1: blah
+ BLAH BLAH
+ 0: BLAH BLAH
+ 1: BLAH
+ Blah Blah
+ 0: Blah Blah
+ 1: Blah
+ blaH blaH
+ 0: blaH blaH
+ 1: blaH
+\= Expect no match
+ blah BLAH
+No match
+ Blah blah
+No match
+ blaH blah
+No match
+
+/((?i)blah)\s+(?i:\1)/
+ blah blah
+ 0: blah blah
+ 1: blah
+ BLAH BLAH
+ 0: BLAH BLAH
+ 1: BLAH
+ Blah Blah
+ 0: Blah Blah
+ 1: Blah
+ blaH blaH
+ 0: blaH blaH
+ 1: blaH
+ blah BLAH
+ 0: blah BLAH
+ 1: blah
+ Blah blah
+ 0: Blah blah
+ 1: Blah
+ blaH blah
+ 0: blaH blah
+ 1: blaH
+
+/(?>a*)*/
+ a
+ 0: a
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+
+/(abc|)+/
+ abc
+ 0: abc
+ 1:
+ abcabc
+ 0: abcabc
+ 1:
+ abcabcabc
+ 0: abcabcabc
+ 1:
+ xyz
+ 0:
+ 1:
+
+/([a]*)*/
+ a
+ 0: a
+ 1:
+ aaaaa
+ 0: aaaaa
+ 1:
+
+/([ab]*)*/
+ a
+ 0: a
+ 1:
+ b
+ 0: b
+ 1:
+ ababab
+ 0: ababab
+ 1:
+ aaaabcde
+ 0: aaaab
+ 1:
+ bbbb
+ 0: bbbb
+ 1:
+
+/([^a]*)*/
+ b
+ 0: b
+ 1:
+ bbbb
+ 0: bbbb
+ 1:
+ aaa
+ 0:
+ 1:
+
+/([^ab]*)*/
+ cccc
+ 0: cccc
+ 1:
+ abab
+ 0:
+ 1:
+
+/([a]*?)*/
+ a
+ 0:
+ 1:
+ aaaa
+ 0:
+ 1:
+
+/([ab]*?)*/
+ a
+ 0:
+ 1:
+ b
+ 0:
+ 1:
+ abab
+ 0:
+ 1:
+ baba
+ 0:
+ 1:
+
+/([^a]*?)*/
+ b
+ 0:
+ 1:
+ bbbb
+ 0:
+ 1:
+ aaa
+ 0:
+ 1:
+
+/([^ab]*?)*/
+ c
+ 0:
+ 1:
+ cccc
+ 0:
+ 1:
+ baba
+ 0:
+ 1:
+
+/(?>a*)*/
+ a
+ 0: a
+ aaabcde
+ 0: aaa
+
+/((?>a*))*/
+ aaaaa
+ 0: aaaaa
+ 1:
+ aabbaa
+ 0: aa
+ 1:
+
+/((?>a*?))*/
+ aaaaa
+ 0:
+ 1:
+ aabbaa
+ 0:
+ 1:
+
+/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x
+ 12-sep-98
+ 0: 12-sep-98
+ 12-09-98
+ 0: 12-09-98
+\= Expect no match
+ sep-12-98
+No match
+
+/(?<=(foo))bar\1/
+ foobarfoo
+ 0: barfoo
+ 1: foo
+ foobarfootling
+ 0: barfoo
+ 1: foo
+\= Expect no match
+ foobar
+No match
+ barfoo
+No match
+
+/(?i:saturday|sunday)/
+ saturday
+ 0: saturday
+ sunday
+ 0: sunday
+ Saturday
+ 0: Saturday
+ Sunday
+ 0: Sunday
+ SATURDAY
+ 0: SATURDAY
+ SUNDAY
+ 0: SUNDAY
+ SunDay
+ 0: SunDay
+
+/(a(?i)bc|BB)x/
+ abcx
+ 0: abcx
+ 1: abc
+ aBCx
+ 0: aBCx
+ 1: aBC
+ bbx
+ 0: bbx
+ 1: bb
+ BBx
+ 0: BBx
+ 1: BB
+\= Expect no match
+ abcX
+No match
+ aBCX
+No match
+ bbX
+No match
+ BBX
+No match
+
+/^([ab](?i)[cd]|[ef])/
+ ac
+ 0: ac
+ 1: ac
+ aC
+ 0: aC
+ 1: aC
+ bD
+ 0: bD
+ 1: bD
+ elephant
+ 0: e
+ 1: e
+ Europe
+ 0: E
+ 1: E
+ frog
+ 0: f
+ 1: f
+ France
+ 0: F
+ 1: F
+\= Expect no match
+ Africa
+No match
+
+/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/
+ ab
+ 0: ab
+ 1: ab
+ aBd
+ 0: aBd
+ 1: aBd
+ xy
+ 0: xy
+ 1: xy
+ xY
+ 0: xY
+ 1: xY
+ zebra
+ 0: z
+ 1: z
+ Zambesi
+ 0: Z
+ 1: Z
+\= Expect no match
+ aCD
+No match
+ XY
+No match
+
+/(?<=foo\n)^bar/m
+ foo\nbar
+ 0: bar
+\= Expect no match
+ bar
+No match
+ baz\nbar
+No match
+
+/(?<=(?<!foo)bar)baz/
+ barbaz
+ 0: baz
+ barbarbaz
+ 0: baz
+ koobarbaz
+ 0: baz
+\= Expect no match
+ baz
+No match
+ foobarbaz
+No match
+
+# The cases of aaaa and aaaaaa are missed out below because Perl does things
+# differently. We know that odd, and maybe incorrect, things happen with
+# recursive references in Perl, as far as 5.11.3 - see some stuff in test #2.
+
+/^(a\1?){4}$/
+ aaaaa
+ 0: aaaaa
+ 1: a
+ aaaaaaa
+ 0: aaaaaaa
+ 1: a
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: aaaa
+\= Expect no match
+ a
+No match
+ aa
+No match
+ aaa
+No match
+ aaaaaaaa
+No match
+ aaaaaaaaa
+No match
+ aaaaaaaaaaa
+No match
+ aaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaaaa
+No match
+
+/^(a\1?)(a\1?)(a\2?)(a\3?)$/
+ aaaa
+ 0: aaaa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ aaaaa
+ 0: aaaaa
+ 1: a
+ 2: aa
+ 3: a
+ 4: a
+ aaaaaa
+ 0: aaaaaa
+ 1: a
+ 2: aa
+ 3: a
+ 4: aa
+ aaaaaaa
+ 0: aaaaaaa
+ 1: a
+ 2: aa
+ 3: aaa
+ 4: a
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: a
+ 2: aa
+ 3: aaa
+ 4: aaaa
+\= Expect no match
+ a
+No match
+ aa
+No match
+ aaa
+No match
+ aaaaaaaa
+No match
+ aaaaaaaaa
+No match
+ aaaaaaaaaaa
+No match
+ aaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaaa
+No match
+ aaaaaaaaaaaaaaaa
+No match
+
+# The following tests are taken from the Perl 5.005 test suite; some of them
+# are compatible with 5.004, but I'd rather not have to sort them out.
+
+/abc/
+ abc
+ 0: abc
+ xabcy
+ 0: abc
+ ababc
+ 0: abc
+\= Expect no match
+ xbc
+No match
+ axc
+No match
+ abx
+No match
+
+/ab*c/
+ abc
+ 0: abc
+
+/ab*bc/
+ abc
+ 0: abc
+ abbc
+ 0: abbc
+ abbbbc
+ 0: abbbbc
+
+/.{1}/
+ abbbbc
+ 0: a
+
+/.{3,4}/
+ abbbbc
+ 0: abbb
+
+/ab{0,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab+bc/
+ abbc
+ 0: abbc
+\= Expect no match
+ abc
+No match
+ abq
+No match
+
+/ab{1,}bc/
+
+/ab+bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{3,4}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{4,5}bc/
+\= Expect no match
+ abq
+No match
+ abbbbc
+No match
+
+/ab?bc/
+ abbc
+ 0: abbc
+ abc
+ 0: abc
+
+/ab{0,1}bc/
+ abc
+ 0: abc
+
+/ab?bc/
+
+/ab?c/
+ abc
+ 0: abc
+
+/ab{0,1}c/
+ abc
+ 0: abc
+
+/^abc$/
+ abc
+ 0: abc
+\= Expect no match
+ abbbbc
+No match
+ abcc
+No match
+
+/^abc/
+ abcc
+ 0: abc
+
+/^abc$/
+
+/abc$/
+ aabc
+ 0: abc
+\= Expect no match
+ aabcd
+No match
+
+/^/
+ abc
+ 0:
+
+/$/
+ abc
+ 0:
+
+/a.c/
+ abc
+ 0: abc
+ axc
+ 0: axc
+
+/a.*c/
+ axyzc
+ 0: axyzc
+
+/a[bc]d/
+ abd
+ 0: abd
+\= Expect no match
+ axyzd
+No match
+ abc
+No match
+
+/a[b-d]e/
+ ace
+ 0: ace
+
+/a[b-d]/
+ aac
+ 0: ac
+
+/a[-b]/
+ a-
+ 0: a-
+
+/a[b-]/
+ a-
+ 0: a-
+
+/a]/
+ a]
+ 0: a]
+
+/a[]]b/
+ a]b
+ 0: a]b
+
+/a[^bc]d/
+ aed
+ 0: aed
+\= Expect no match
+ abd
+No match
+ abd
+No match
+
+/a[^-b]c/
+ adc
+ 0: adc
+
+/a[^]b]c/
+ adc
+ 0: adc
+ a-c
+ 0: a-c
+\= Expect no match
+ a]c
+No match
+
+/\ba\b/
+ a-
+ 0: a
+ -a
+ 0: a
+ -a-
+ 0: a
+
+/\by\b/
+\= Expect no match
+ xy
+No match
+ yz
+No match
+ xyz
+No match
+
+/\Ba\B/
+\= Expect no match
+ a-
+No match
+ -a
+No match
+ -a-
+No match
+
+/\By\b/
+ xy
+ 0: y
+
+/\by\B/
+ yz
+ 0: y
+
+/\By\B/
+ xyz
+ 0: y
+
+/\w/
+ a
+ 0: a
+
+/\W/
+ -
+ 0: -
+\= Expect no match
+ a
+No match
+
+/a\sb/
+ a b
+ 0: a b
+
+/a\Sb/
+ a-b
+ 0: a-b
+\= Expect no match
+ a b
+No match
+
+/\d/
+ 1
+ 0: 1
+
+/\D/
+ -
+ 0: -
+\= Expect no match
+ 1
+No match
+
+/[\w]/
+ a
+ 0: a
+
+/[\W]/
+ -
+ 0: -
+\= Expect no match
+ a
+No match
+
+/a[\s]b/
+ a b
+ 0: a b
+
+/a[\S]b/
+ a-b
+ 0: a-b
+\= Expect no match
+ a b
+No match
+
+/[\d]/
+ 1
+ 0: 1
+
+/[\D]/
+ -
+ 0: -
+\= Expect no match
+ 1
+No match
+
+/ab|cd/
+ abc
+ 0: ab
+ abcd
+ 0: ab
+
+/()ef/
+ def
+ 0: ef
+ 1:
+
+/$b/
+
+/a\(b/
+ a(b
+ 0: a(b
+
+/a\(*b/
+ ab
+ 0: ab
+ a((b
+ 0: a((b
+
+/a\\b/
+ a\\b
+ 0: a\b
+
+/((a))/
+ abc
+ 0: a
+ 1: a
+ 2: a
+
+/(a)b(c)/
+ abc
+ 0: abc
+ 1: a
+ 2: c
+
+/a+b+c/
+ aabbabc
+ 0: abc
+
+/a{1,}b{1,}c/
+ aabbabc
+ 0: abc
+
+/a.+?c/
+ abcabc
+ 0: abc
+
+/(a+|b)*/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){0,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)+/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){1,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)?/
+ ab
+ 0: a
+ 1: a
+
+/(a+|b){0,1}/
+ ab
+ 0: a
+ 1: a
+
+/[^ab]*/
+ cde
+ 0: cde
+
+/abc/
+\= Expect no match
+ b
+No match
+
+/a*/
+ \
+ 0:
+
+/([abc])*d/
+ abbbcd
+ 0: abbbcd
+ 1: c
+
+/([abc])*bcd/
+ abcd
+ 0: abcd
+ 1: a
+
+/a|b|c|d|e/
+ e
+ 0: e
+
+/(a|b|c|d|e)f/
+ ef
+ 0: ef
+ 1: e
+
+/abcd*efg/
+ abcdefg
+ 0: abcdefg
+
+/ab*/
+ xabyabbbz
+ 0: ab
+ xayabbbz
+ 0: a
+
+/(ab|cd)e/
+ abcde
+ 0: cde
+ 1: cd
+
+/[abhgefdc]ij/
+ hij
+ 0: hij
+
+/^(ab|cd)e/
+
+/(abc|)ef/
+ abcdef
+ 0: ef
+ 1:
+
+/(a|b)c*d/
+ abcd
+ 0: bcd
+ 1: b
+
+/(ab|ab*)bc/
+ abc
+ 0: abc
+ 1: a
+
+/a([bc]*)c*/
+ abc
+ 0: abc
+ 1: bc
+
+/a([bc]*)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]+)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]*)(c+d)/
+ abcd
+ 0: abcd
+ 1: b
+ 2: cd
+
+/a[bcd]*dcdcde/
+ adcdcde
+ 0: adcdcde
+
+/a[bcd]+dcdcde/
+\= Expect no match
+ abcde
+No match
+ adcdcde
+No match
+
+/(ab|a)b*c/
+ abc
+ 0: abc
+ 1: ab
+
+/((a)(b)c)(d)/
+ abcd
+ 0: abcd
+ 1: abc
+ 2: a
+ 3: b
+ 4: d
+
+/[a-zA-Z_][a-zA-Z0-9_]*/
+ alpha
+ 0: alpha
+
+/^a(bc+|b[eh])g|.h$/
+ abh
+ 0: bh
+
+/(bc+d$|ef*g.|h?i(j|k))/
+ effgz
+ 0: effgz
+ 1: effgz
+ ij
+ 0: ij
+ 1: ij
+ 2: j
+ reffgz
+ 0: effgz
+ 1: effgz
+\= Expect no match
+ effg
+No match
+ bcdd
+No match
+
+/((((((((((a))))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/((((((((((a))))))))))\10/
+ aa
+ 0: aa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/(((((((((a)))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+
+/multiple words of text/
+\= Expect no match
+ aa
+No match
+ uh-uh
+No match
+
+/multiple words/
+ multiple words, yeah
+ 0: multiple words
+
+/(.*)c(.*)/
+ abcde
+ 0: abcde
+ 1: ab
+ 2: de
+
+/\((.*), (.*)\)/
+ (a, b)
+ 0: (a, b)
+ 1: a
+ 2: b
+
+/[k]/
+
+/abcd/
+ abcd
+ 0: abcd
+
+/a(bc)d/
+ abcd
+ 0: abcd
+ 1: bc
+
+/a[-]?c/
+ ac
+ 0: ac
+
+/(abc)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/([a-c]*)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/(a)|\1/
+ a
+ 0: a
+ 1: a
+ ab
+ 0: a
+ 1: a
+\= Expect no match
+ x
+No match
+
+/(([a-c])b*?\2)*/
+ ababbbcbc
+ 0: ababb
+ 1: bb
+ 2: b
+
+/(([a-c])b*?\2){3}/
+ ababbbcbc
+ 0: ababbbcbc
+ 1: cbc
+ 2: c
+
+/((\3|b)\2(a)x)+/
+ aaaxabaxbaaxbbax
+ 0: bbax
+ 1: bbax
+ 2: b
+ 3: a
+
+/((\3|b)\2(a)){2,}/
+ bbaababbabaaaaabbaaaabba
+ 0: bbaaaabba
+ 1: bba
+ 2: b
+ 3: a
+
+/abc/i
+ ABC
+ 0: ABC
+ XABCY
+ 0: ABC
+ ABABC
+ 0: ABC
+\= Expect no match
+ aaxabxbaxbbx
+No match
+ XBC
+No match
+ AXC
+No match
+ ABX
+No match
+
+/ab*c/i
+ ABC
+ 0: ABC
+
+/ab*bc/i
+ ABC
+ 0: ABC
+ ABBC
+ 0: ABBC
+
+/ab*?bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab{0,}?bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab+?bc/i
+ ABBC
+ 0: ABBC
+
+/ab+bc/i
+\= Expect no match
+ ABC
+No match
+ ABQ
+No match
+
+/ab{1,}bc/i
+
+/ab+bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab{1,}?bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab{1,3}?bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab{3,4}?bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/ab{4,5}?bc/i
+\= Expect no match
+ ABQ
+No match
+ ABBBBC
+No match
+
+/ab??bc/i
+ ABBC
+ 0: ABBC
+ ABC
+ 0: ABC
+
+/ab{0,1}?bc/i
+ ABC
+ 0: ABC
+
+/ab??bc/i
+
+/ab??c/i
+ ABC
+ 0: ABC
+
+/ab{0,1}?c/i
+ ABC
+ 0: ABC
+
+/^abc$/i
+ ABC
+ 0: ABC
+\= Expect no match
+ ABBBBC
+No match
+ ABCC
+No match
+
+/^abc/i
+ ABCC
+ 0: ABC
+
+/^abc$/i
+
+/abc$/i
+ AABC
+ 0: ABC
+
+/^/i
+ ABC
+ 0:
+
+/$/i
+ ABC
+ 0:
+
+/a.c/i
+ ABC
+ 0: ABC
+ AXC
+ 0: AXC
+
+/a.*?c/i
+ AXYZC
+ 0: AXYZC
+
+/a.*c/i
+ AABC
+ 0: AABC
+\= Expect no match
+ AXYZD
+No match
+
+/a[bc]d/i
+ ABD
+ 0: ABD
+
+/a[b-d]e/i
+ ACE
+ 0: ACE
+\= Expect no match
+ ABC
+No match
+ ABD
+No match
+
+/a[b-d]/i
+ AAC
+ 0: AC
+
+/a[-b]/i
+ A-
+ 0: A-
+
+/a[b-]/i
+ A-
+ 0: A-
+
+/a]/i
+ A]
+ 0: A]
+
+/a[]]b/i
+ A]B
+ 0: A]B
+
+/a[^bc]d/i
+ AED
+ 0: AED
+
+/a[^-b]c/i
+ ADC
+ 0: ADC
+\= Expect no match
+ ABD
+No match
+ A-C
+No match
+
+/a[^]b]c/i
+ ADC
+ 0: ADC
+
+/ab|cd/i
+ ABC
+ 0: AB
+ ABCD
+ 0: AB
+
+/()ef/i
+ DEF
+ 0: EF
+ 1:
+
+/$b/i
+\= Expect no match
+ A]C
+No match
+ B
+No match
+
+/a\(b/i
+ A(B
+ 0: A(B
+
+/a\(*b/i
+ AB
+ 0: AB
+ A((B
+ 0: A((B
+
+/a\\b/i
+ A\\b
+ 0: A\b
+ a\\B
+ 0: a\B
+
+/((a))/i
+ ABC
+ 0: A
+ 1: A
+ 2: A
+
+/(a)b(c)/i
+ ABC
+ 0: ABC
+ 1: A
+ 2: C
+
+/a+b+c/i
+ AABBABC
+ 0: ABC
+
+/a{1,}b{1,}c/i
+ AABBABC
+ 0: ABC
+
+/a.+?c/i
+ ABCABC
+ 0: ABC
+
+/a.*?c/i
+ ABCABC
+ 0: ABC
+
+/a.{0,5}?c/i
+ ABCABC
+ 0: ABC
+
+/(a+|b)*/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){0,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)+/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){1,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)?/i
+ AB
+ 0: A
+ 1: A
+
+/(a+|b){0,1}/i
+ AB
+ 0: A
+ 1: A
+
+/(a+|b){0,1}?/i
+ AB
+ 0:
+
+/[^ab]*/i
+ CDE
+ 0: CDE
+
+/([abc])*d/i
+ ABBBCD
+ 0: ABBBCD
+ 1: C
+
+/([abc])*bcd/i
+ ABCD
+ 0: ABCD
+ 1: A
+
+/a|b|c|d|e/i
+ E
+ 0: E
+
+/(a|b|c|d|e)f/i
+ EF
+ 0: EF
+ 1: E
+
+/abcd*efg/i
+ ABCDEFG
+ 0: ABCDEFG
+
+/ab*/i
+ XABYABBBZ
+ 0: AB
+ XAYABBBZ
+ 0: A
+
+/(ab|cd)e/i
+ ABCDE
+ 0: CDE
+ 1: CD
+
+/[abhgefdc]ij/i
+ HIJ
+ 0: HIJ
+
+/^(ab|cd)e/i
+\= Expect no match
+ ABCDE
+No match
+
+/(abc|)ef/i
+ ABCDEF
+ 0: EF
+ 1:
+
+/(a|b)c*d/i
+ ABCD
+ 0: BCD
+ 1: B
+
+/(ab|ab*)bc/i
+ ABC
+ 0: ABC
+ 1: A
+
+/a([bc]*)c*/i
+ ABC
+ 0: ABC
+ 1: BC
+
+/a([bc]*)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]+)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]*)(c+d)/i
+ ABCD
+ 0: ABCD
+ 1: B
+ 2: CD
+
+/a[bcd]*dcdcde/i
+ ADCDCDE
+ 0: ADCDCDE
+
+/a[bcd]+dcdcde/i
+
+/(ab|a)b*c/i
+ ABC
+ 0: ABC
+ 1: AB
+
+/((a)(b)c)(d)/i
+ ABCD
+ 0: ABCD
+ 1: ABC
+ 2: A
+ 3: B
+ 4: D
+
+/[a-zA-Z_][a-zA-Z0-9_]*/i
+ ALPHA
+ 0: ALPHA
+
+/^a(bc+|b[eh])g|.h$/i
+ ABH
+ 0: BH
+
+/(bc+d$|ef*g.|h?i(j|k))/i
+ EFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+ IJ
+ 0: IJ
+ 1: IJ
+ 2: J
+ REFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+\= Expect no match
+ ADCDCDE
+No match
+ EFFG
+No match
+ BCDD
+No match
+
+/((((((((((a))))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/((((((((((a))))))))))\10/i
+ AA
+ 0: AA
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/(((((((((a)))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+
+/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))/i
+ A
+ 0: A
+ 1: A
+
+/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/i
+ C
+ 0: C
+ 1: C
+
+/multiple words of text/i
+\= Expect no match
+ AA
+No match
+ UH-UH
+No match
+
+/multiple words/i
+ MULTIPLE WORDS, YEAH
+ 0: MULTIPLE WORDS
+
+/(.*)c(.*)/i
+ ABCDE
+ 0: ABCDE
+ 1: AB
+ 2: DE
+
+/\((.*), (.*)\)/i
+ (A, B)
+ 0: (A, B)
+ 1: A
+ 2: B
+
+/[k]/i
+
+/abcd/i
+ ABCD
+ 0: ABCD
+
+/a(bc)d/i
+ ABCD
+ 0: ABCD
+ 1: BC
+
+/a[-]?c/i
+ AC
+ 0: AC
+
+/(abc)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/([a-c]*)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/a(?!b)./
+ abad
+ 0: ad
+
+/a(?=d)./
+ abad
+ 0: ad
+
+/a(?=c|d)./
+ abad
+ 0: ad
+
+/a(?:b|c|d)(.)/
+ ace
+ 0: ace
+ 1: e
+
+/a(?:b|c|d)*(.)/
+ ace
+ 0: ace
+ 1: e
+
+/a(?:b|c|d)+?(.)/
+ ace
+ 0: ace
+ 1: e
+ acdbcdbe
+ 0: acd
+ 1: d
+
+/a(?:b|c|d)+(.)/
+ acdbcdbe
+ 0: acdbcdbe
+ 1: e
+
+/a(?:b|c|d){2}(.)/
+ acdbcdbe
+ 0: acdb
+ 1: b
+
+/a(?:b|c|d){4,5}(.)/
+ acdbcdbe
+ 0: acdbcdb
+ 1: b
+
+/a(?:b|c|d){4,5}?(.)/
+ acdbcdbe
+ 0: acdbcd
+ 1: d
+
+/((foo)|(bar))*/
+ foobar
+ 0: foobar
+ 1: bar
+ 2: foo
+ 3: bar
+
+/a(?:b|c|d){6,7}(.)/
+ acdbcdbe
+ 0: acdbcdbe
+ 1: e
+
+/a(?:b|c|d){6,7}?(.)/
+ acdbcdbe
+ 0: acdbcdbe
+ 1: e
+
+/a(?:b|c|d){5,6}(.)/
+ acdbcdbe
+ 0: acdbcdbe
+ 1: e
+
+/a(?:b|c|d){5,6}?(.)/
+ acdbcdbe
+ 0: acdbcdb
+ 1: b
+
+/a(?:b|c|d){5,7}(.)/
+ acdbcdbe
+ 0: acdbcdbe
+ 1: e
+
+/a(?:b|c|d){5,7}?(.)/
+ acdbcdbe
+ 0: acdbcdb
+ 1: b
+
+/a(?:b|(c|e){1,2}?|d)+?(.)/
+ ace
+ 0: ace
+ 1: c
+ 2: e
+
+/^(.+)?B/
+ AB
+ 0: AB
+ 1: A
+
+/^([^a-z])|(\^)$/
+ .
+ 0: .
+ 1: .
+
+/^[<>]&/
+ <&OUT
+ 0: <&
+
+/^(a\1?){4}$/
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: aaaa
+\= Expect no match
+ AB
+No match
+ aaaaaaaaa
+No match
+ aaaaaaaaaaa
+No match
+
+/^(a(?(1)\1)){4}$/
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: aaaa
+\= Expect no match
+ aaaaaaaaa
+No match
+ aaaaaaaaaaa
+No match
+
+/(?:(f)(o)(o)|(b)(a)(r))*/
+ foobar
+ 0: foobar
+ 1: f
+ 2: o
+ 3: o
+ 4: b
+ 5: a
+ 6: r
+
+/(?<=a)b/
+ ab
+ 0: b
+\= Expect no match
+ cb
+No match
+ b
+No match
+
+/(?<!c)b/
+ ab
+ 0: b
+ b
+ 0: b
+ b
+ 0: b
+
+/(?:..)*a/
+ aba
+ 0: aba
+
+/(?:..)*?a/
+ aba
+ 0: a
+
+/^(?:b|a(?=(.)))*\1/
+ abc
+ 0: ab
+ 1: b
+
+/^(){3,5}/
+ abc
+ 0:
+ 1:
+
+/^(a+)*ax/
+ aax
+ 0: aax
+ 1: a
+
+/^((a|b)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/^((a|bc)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/(a|x)*ab/
+ cab
+ 0: ab
+
+/(a)*ab/
+ cab
+ 0: ab
+
+/(?:(?i)a)b/
+ ab
+ 0: ab
+
+/((?i)a)b/
+ ab
+ 0: ab
+ 1: a
+
+/(?:(?i)a)b/
+ Ab
+ 0: Ab
+
+/((?i)a)b/
+ Ab
+ 0: Ab
+ 1: A
+
+/(?:(?i)a)b/
+\= Expect no match
+ cb
+No match
+ aB
+No match
+
+/((?i)a)b/
+
+/(?i:a)b/
+ ab
+ 0: ab
+
+/((?i:a))b/
+ ab
+ 0: ab
+ 1: a
+
+/(?i:a)b/
+ Ab
+ 0: Ab
+
+/((?i:a))b/
+ Ab
+ 0: Ab
+ 1: A
+
+/(?i:a)b/
+\= Expect no match
+ aB
+No match
+ aB
+No match
+
+/((?i:a))b/
+
+/(?:(?-i)a)b/i
+ ab
+ 0: ab
+
+/((?-i)a)b/i
+ ab
+ 0: ab
+ 1: a
+
+/(?:(?-i)a)b/i
+ aB
+ 0: aB
+
+/((?-i)a)b/i
+ aB
+ 0: aB
+ 1: a
+
+/(?:(?-i)a)b/i
+ aB
+ 0: aB
+\= Expect no match
+ Ab
+No match
+ AB
+No match
+
+/(?-i:a)b/i
+ ab
+ 0: ab
+
+/((?-i:a))b/i
+ ab
+ 0: ab
+ 1: a
+
+/(?-i:a)b/i
+ aB
+ 0: aB
+
+/((?-i:a))b/i
+ aB
+ 0: aB
+ 1: a
+
+/(?-i:a)b/i
+\= Expect no match
+ AB
+No match
+ Ab
+No match
+
+/((?-i:a))b/i
+
+/(?-i:a)b/i
+ aB
+ 0: aB
+
+/((?-i:a))b/i
+ aB
+ 0: aB
+ 1: a
+
+/(?-i:a)b/i
+\= Expect no match
+ Ab
+No match
+ AB
+No match
+
+/((?-i:a))b/i
+
+/((?-i:a.))b/i
+\= Expect no match
+ AB
+No match
+ a\nB
+No match
+
+/((?s-i:a.))b/i
+ a\nB
+ 0: a\x0aB
+ 1: a\x0a
+
+/(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))/
+ cabbbb
+ 0: cabbbb
+
+/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/
+ caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+ 0: caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+
+/(ab)\d\1/i
+ Ab4ab
+ 0: Ab4ab
+ 1: Ab
+ ab4Ab
+ 0: ab4Ab
+ 1: ab
+
+/foo\w*\d{4}baz/
+ foobar1234baz
+ 0: foobar1234baz
+
+/x(~~)*(?:(?:F)?)?/
+ x~~
+ 0: x~~
+ 1: ~~
+
+/^a(?#xxx){3}c/
+ aaac
+ 0: aaac
+
+/^a (?#xxx) (?#yyy) {3}c/x
+ aaac
+ 0: aaac
+
+/(?<![cd])b/
+\= Expect no match
+ B\nB
+No match
+ dbcb
+No match
+
+/(?<![cd])[ab]/
+ dbaacb
+ 0: a
+
+/(?<!(c|d))b/
+
+/(?<!(c|d))[ab]/
+ dbaacb
+ 0: a
+
+/(?<!cd)[ab]/
+ cdaccb
+ 0: b
+
+/^(?:a?b?)*$/
+ \
+ 0:
+ a
+ 0: a
+ ab
+ 0: ab
+ aaa
+ 0: aaa
+\= Expect no match
+ dbcb
+No match
+ a--
+No match
+ aa--
+No match
+
+/((?s)^a(.))((?m)^b$)/
+ a\nb\nc\n
+ 0: a\x0ab
+ 1: a\x0a
+ 2: \x0a
+ 3: b
+
+/((?m)^b$)/
+ a\nb\nc\n
+ 0: b
+ 1: b
+
+/(?m)^b/
+ a\nb\n
+ 0: b
+
+/(?m)^(b)/
+ a\nb\n
+ 0: b
+ 1: b
+
+/((?m)^b)/
+ a\nb\n
+ 0: b
+ 1: b
+
+/\n((?m)^b)/
+ a\nb\n
+ 0: \x0ab
+ 1: b
+
+/((?s).)c(?!.)/
+ a\nb\nc\n
+ 0: \x0ac
+ 1: \x0a
+ a\nb\nc\n
+ 0: \x0ac
+ 1: \x0a
+
+/((?s)b.)c(?!.)/
+ a\nb\nc\n
+ 0: b\x0ac
+ 1: b\x0a
+ a\nb\nc\n
+ 0: b\x0ac
+ 1: b\x0a
+
+/^b/
+
+/()^b/
+\= Expect no match
+ a\nb\nc\n
+No match
+ a\nb\nc\n
+No match
+
+/((?m)^b)/
+ a\nb\nc\n
+ 0: b
+ 1: b
+
+/(x)?(?(1)a|b)/
+\= Expect no match
+ a
+No match
+ a
+No match
+
+/(x)?(?(1)b|a)/
+ a
+ 0: a
+
+/()?(?(1)b|a)/
+ a
+ 0: a
+
+/()(?(1)b|a)/
+
+/()?(?(1)a|b)/
+ a
+ 0: a
+ 1:
+
+/^(\()?blah(?(1)(\)))$/
+ (blah)
+ 0: (blah)
+ 1: (
+ 2: )
+ blah
+ 0: blah
+\= Expect no match
+ a
+No match
+ blah)
+No match
+ (blah
+No match
+
+/^(\(+)?blah(?(1)(\)))$/
+ (blah)
+ 0: (blah)
+ 1: (
+ 2: )
+ blah
+ 0: blah
+\= Expect no match
+ blah)
+No match
+ (blah
+No match
+
+/(?(?!a)a|b)/
+
+/(?(?!a)b|a)/
+ a
+ 0: a
+
+/(?(?=a)b|a)/
+\= Expect no match
+ a
+No match
+ a
+No match
+
+/(?(?=a)a|b)/
+ a
+ 0: a
+
+/(?=(a+?))(\1ab)/
+ aaab
+ 0: aab
+ 1: a
+ 2: aab
+
+/^(?=(a+?))\1ab/
+
+/(\w+:)+/
+ one:
+ 0: one:
+ 1: one:
+
+/$(?<=^(a))/
+ a
+ 0:
+ 1: a
+
+/(?=(a+?))(\1ab)/
+ aaab
+ 0: aab
+ 1: a
+ 2: aab
+
+/^(?=(a+?))\1ab/
+\= Expect no match
+ aaab
+No match
+ aaab
+No match
+
+/([\w:]+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: abcd
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/(a*)b+/
+ caab
+ 0: aab
+ 1: aa
+
+/([\w:]+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: abcd
+\= Expect no match
+ abcd:
+No match
+ abcd:
+No match
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/(>a+)ab/
+
+/(?>a+)b/
+ aaab
+ 0: aaab
+
+/([[:]+)/
+ a:[b]:
+ 0: :[
+ 1: :[
+
+/([[=]+)/
+ a=[b]=
+ 0: =[
+ 1: =[
+
+/([[.]+)/
+ a.[b].
+ 0: .[
+ 1: .[
+
+/((?>a+)b)/
+ aaab
+ 0: aaab
+ 1: aaab
+
+/(?>(a+))b/
+ aaab
+ 0: aaab
+ 1: aaa
+
+/((?>[^()]+)|\([^()]*\))+/
+ ((abc(ade)ufh()()x
+ 0: abc(ade)ufh()()x
+ 1: x
+
+/a\Z/
+\= Expect no match
+ aaab
+No match
+ a\nb\n
+No match
+
+/b\Z/
+ a\nb\n
+ 0: b
+
+/b\z/
+
+/b\Z/
+ a\nb
+ 0: b
+
+/b\z/
+ a\nb
+ 0: b
+
+/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/
+ a
+ 0: a
+ 1:
+ abc
+ 0: abc
+ 1:
+ a-b
+ 0: a-b
+ 1:
+ 0-9
+ 0: 0-9
+ 1:
+ a.b
+ 0: a.b
+ 1:
+ 5.6.7
+ 0: 5.6.7
+ 1:
+ the.quick.brown.fox
+ 0: the.quick.brown.fox
+ 1:
+ a100.b200.300c
+ 0: a100.b200.300c
+ 1:
+ 12-ab.1245
+ 0: 12-ab.1245
+ 1:
+\= Expect no match
+ \
+No match
+ .a
+No match
+ -a
+No match
+ a-
+No match
+ a.
+No match
+ a_b
+No match
+ a.-
+No match
+ a..
+No match
+ ab..bc
+No match
+ the.quick.brown.fox-
+No match
+ the.quick.brown.fox.
+No match
+ the.quick.brown.fox_
+No match
+ the.quick.brown.fox+
+No match
+
+/(?>.*)(?<=(abcd|wxyz))/
+ alphabetabcd
+ 0: alphabetabcd
+ 1: abcd
+ endingwxyz
+ 0: endingwxyz
+ 1: wxyz
+\= Expect no match
+ a rather long string that doesn't end with one of them
+No match
+
+/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/
+ word cat dog elephant mussel cow horse canary baboon snake shark otherword
+ 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword
+\= Expect no match
+ word cat dog elephant mussel cow horse canary baboon snake shark
+No match
+
+/word (?>[a-zA-Z0-9]+ ){0,30}otherword/
+\= Expect no match
+ word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope
+No match
+
+/(?<=\d{3}(?!999))foo/
+ 999foo
+ 0: foo
+ 123999foo
+ 0: foo
+\= Expect no match
+ 123abcfoo
+No match
+
+/(?<=(?!...999)\d{3})foo/
+ 999foo
+ 0: foo
+ 123999foo
+ 0: foo
+\= Expect no match
+ 123abcfoo
+No match
+
+/(?<=\d{3}(?!999)...)foo/
+ 123abcfoo
+ 0: foo
+ 123456foo
+ 0: foo
+\= Expect no match
+ 123999foo
+No match
+
+/(?<=\d{3}...)(?<!999)foo/
+ 123abcfoo
+ 0: foo
+ 123456foo
+ 0: foo
+\= Expect no match
+ 123999foo
+No match
+
+/<a[\s]+href[\s]*=[\s]* # find <a href=
+ ([\"\'])? # find single or double quote
+ (?(1) (.*?)\1 | ([^\s]+)) # if quote found, match up to next matching
+ # quote, otherwise match up to next space
+/isx
+ <a href=abcd xyz
+ 0: <a href=abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ <a href=\"abcd xyz pqr\" cats
+ 0: <a href="abcd xyz pqr"
+ 1: "
+ 2: abcd xyz pqr
+ <a href=\'abcd xyz pqr\' cats
+ 0: <a href='abcd xyz pqr'
+ 1: '
+ 2: abcd xyz pqr
+
+/<a\s+href\s*=\s* # find <a href=
+ (["'])? # find single or double quote
+ (?(1) (.*?)\1 | (\S+)) # if quote found, match up to next matching
+ # quote, otherwise match up to next space
+/isx
+ <a href=abcd xyz
+ 0: <a href=abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ <a href=\"abcd xyz pqr\" cats
+ 0: <a href="abcd xyz pqr"
+ 1: "
+ 2: abcd xyz pqr
+ <a href = \'abcd xyz pqr\' cats
+ 0: <a href = 'abcd xyz pqr'
+ 1: '
+ 2: abcd xyz pqr
+
+/<a\s+href(?>\s*)=(?>\s*) # find <a href=
+ (["'])? # find single or double quote
+ (?(1) (.*?)\1 | (\S+)) # if quote found, match up to next matching
+ # quote, otherwise match up to next space
+/isx
+ <a href=abcd xyz
+ 0: <a href=abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ <a href=\"abcd xyz pqr\" cats
+ 0: <a href="abcd xyz pqr"
+ 1: "
+ 2: abcd xyz pqr
+ <a href = \'abcd xyz pqr\' cats
+ 0: <a href = 'abcd xyz pqr'
+ 1: '
+ 2: abcd xyz pqr
+
+/((Z)+|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2: Z
+
+/(Z()|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+
+/(Z(())|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+ 3:
+
+/((?>Z)+|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+
+/((?>)+|A)*/
+ ZABCDEFG
+ 0:
+ 1:
+
+/^[\d-a]/
+ abcde
+ 0: a
+ -things
+ 0: -
+ 0digit
+ 0: 0
+\= Expect no match
+ bcdef
+No match
+
+/[\s]+/
+ > \x09\x0a\x0c\x0d\x0b<
+ 0: \x09\x0a\x0c\x0d\x0b
+
+/\s+/
+ > \x09\x0a\x0c\x0d\x0b<
+ 0: \x09\x0a\x0c\x0d\x0b
+
+/a b/x
+ ab
+ 0: ab
+
+/(?!\A)x/m
+ a\nxb\n
+ 0: x
+
+/(?!^)x/m
+\= Expect no match
+ a\nxb\n
+No match
+
+#/abc\Qabc\Eabc/
+# abcabcabc
+# 0: abcabcabc
+
+#/abc\Q(*+|\Eabc/
+# abc(*+|abc
+# 0: abc(*+|abc
+
+#/ abc\Q abc\Eabc/x
+# abc abcabc
+# 0: abc abcabc
+#\= Expect no match
+# abcabcabc
+#No match
+
+#/abc#comment
+# \Q#not comment
+# literal\E/x
+# abc#not comment\n literal
+# 0: abc#not comment\x0a literal
+
+#/abc#comment
+# \Q#not comment
+# literal/x
+# abc#not comment\n literal
+# 0: abc#not comment\x0a literal
+
+#/abc#comment
+# \Q#not comment
+# literal\E #more comment
+# /x
+# abc#not comment\n literal
+# 0: abc#not comment\x0a literal
+
+#/abc#comment
+# \Q#not comment
+# literal\E #more comment/x
+# abc#not comment\n literal
+# 0: abc#not comment\x0a literal
+
+#/\Qabc\$xyz\E/
+# abc\\\$xyz
+# 0: abc\$xyz
+
+#/\Qabc\E\$\Qxyz\E/
+# abc\$xyz
+# 0: abc$xyz
+
+/\Gabc/
+ abc
+ 0: abc
+\= Expect no match
+ xyzabc
+No match
+
+/a(?x: b c )d/
+ XabcdY
+ 0: abcd
+\= Expect no match
+ Xa b c d Y
+No match
+
+/((?x)x y z | a b c)/
+ XabcY
+ 0: abc
+ 1: abc
+ AxyzB
+ 0: xyz
+ 1: xyz
+
+/(?i)AB(?-i)C/
+ XabCY
+ 0: abC
+\= Expect no match
+ XabcY
+No match
+
+/((?i)AB(?-i)C|D)E/
+ abCE
+ 0: abCE
+ 1: abC
+ DE
+ 0: DE
+ 1: D
+\= Expect no match
+ abcE
+No match
+ abCe
+No match
+ dE
+No match
+ De
+No match
+
+/(.*)\d+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+
+/(.*)\d+\1/s
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+
+/((.*))\d+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ 2: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+ 2: bc
+
+# This tests for an IPv6 address in the form where it can have up to
+# eight components, one and only one of which is empty. This must be
+# an internal component.
+
+/^(?!:) # colon disallowed at start
+ (?: # start of item
+ (?: [0-9a-f]{1,4} | # 1-4 hex digits or
+ (?(1)0 | () ) ) # if null previously matched, fail; else null
+ : # followed by colon
+ ){1,7} # end item; 1-7 of them required
+ [0-9a-f]{1,4} $ # final hex number at end of string
+ (?(1)|.) # check that there was an empty component
+ /ix
+ a123::a123
+ 0: a123::a123
+ 1:
+ a123:b342::abcd
+ 0: a123:b342::abcd
+ 1:
+ a123:b342::324e:abcd
+ 0: a123:b342::324e:abcd
+ 1:
+ a123:ddde:b342::324e:abcd
+ 0: a123:ddde:b342::324e:abcd
+ 1:
+ a123:ddde:b342::324e:dcba:abcd
+ 0: a123:ddde:b342::324e:dcba:abcd
+ 1:
+ a123:ddde:9999:b342::324e:dcba:abcd
+ 0: a123:ddde:9999:b342::324e:dcba:abcd
+ 1:
+\= Expect no match
+ 1:2:3:4:5:6:7:8
+No match
+ a123:bce:ddde:9999:b342::324e:dcba:abcd
+No match
+ a123::9999:b342::324e:dcba:abcd
+No match
+ abcde:2:3:4:5:6:7:8
+No match
+ ::1
+No match
+ abcd:fee0:123::
+No match
+ :1
+No match
+ 1:
+No match
+
+#/[z\Qa-d]\E]/
+# z
+# 0: z
+# a
+# 0: a
+# -
+# 0: -
+# d
+# 0: d
+# ]
+# 0: ]
+#\= Expect no match
+# b
+#No match
+
+#TODO: PCRE has an optimization to make this workable, .NET does not
+#/(a+)*b/
+#\= Expect no match
+# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+#No match
+
+# All these had to be updated because we understand unicode
+# and this looks like it's expecting single byte matches
+
+# .NET generates \xe4...not sure what's up, might just be different code pages
+/(?i)reg(?:ul(?:[aä]|ae)r|ex)/
+ REGular
+ 0: REGular
+ regulaer
+ 0: regulaer
+ Regex
+ 0: Regex
+ regulär
+ 0: regul\xc3\xa4r
+
+#/Åæåä[à-ÿÀ-ß]+/
+# Åæåäà
+# 0: \xc5\xe6\xe5\xe4\xe0
+# Åæåäÿ
+# 0: \xc5\xe6\xe5\xe4\xff
+# ÅæåäÀ
+# 0: \xc5\xe6\xe5\xe4\xc0
+# Åæåäß
+# 0: \xc5\xe6\xe5\xe4\xdf
+
+/(?<=Z)X./
+ \x84XAZXB
+ 0: XB
+
+/ab cd (?x) de fg/
+ ab cd defg
+ 0: ab cd defg
+
+/ab cd(?x) de fg/
+ ab cddefg
+ 0: ab cddefg
+\= Expect no match
+ abcddefg
+No match
+
+/(?<![^f]oo)(bar)/
+ foobarX
+ 0: bar
+ 1: bar
+\= Expect no match
+ boobarX
+No match
+
+/(?<![^f])X/
+ offX
+ 0: X
+\= Expect no match
+ onyX
+No match
+
+/(?<=[^f])X/
+ onyX
+ 0: X
+\= Expect no match
+ offX
+No match
+
+/(?:(?(1)a|b)(X))+/
+ bXaX
+ 0: bXaX
+ 1: X
+
+/(?:(?(1)\1a|b)(X|Y))+/
+ bXXaYYaY
+ 0: bXXaYYaY
+ 1: Y
+ bXYaXXaX
+ 0: bX
+ 1: X
+
+# TODO: I think this is a difference caused by the
+# collision of group numbers, but not sure
+#/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
+# bXXaYYaY
+# 0: bX
+# 1:
+# 2:
+# 3:
+# 4:
+# 5:
+# 6:
+# 7:
+# 8:
+# 9:
+#10: X
+
+/[[,abc,]+]/
+ abc]
+ 0: abc]
+ a,b]
+ 0: a,b]
+ [a,b,c]
+ 0: [a,b,c]
+
+/(?-x: )/x
+ A\x20B
+ 0:
+
+"(?x)(?-x: \s*#\s*)"
+ A # B
+ 0: #
+\= Expect no match
+ #
+No match
+
+"(?x-is)(?:(?-ixs) \s*#\s*) include"
+ A #include
+ 0: #include
+\= Expect no match
+ A#include
+No match
+ A #Include
+No match
+
+/a*b*\w/
+ aaabbbb
+ 0: aaabbbb
+ aaaa
+ 0: aaaa
+ a
+ 0: a
+
+/a*b?\w/
+ aaabbbb
+ 0: aaabb
+ aaaa
+ 0: aaaa
+ a
+ 0: a
+
+/a*b{0,4}\w/
+ aaabbbb
+ 0: aaabbbb
+ aaaa
+ 0: aaaa
+ a
+ 0: a
+
+/a*b{0,}\w/
+ aaabbbb
+ 0: aaabbbb
+ aaaa
+ 0: aaaa
+ a
+ 0: a
+
+/a*\d*\w/
+ 0a
+ 0: 0a
+ a
+ 0: a
+
+/a*b *\w/x
+ a
+ 0: a
+
+/a*b#comment
+ *\w/x
+ a
+ 0: a
+
+/a* b *\w/x
+ a
+ 0: a
+
+/^\w+=.*(\\\n.*)*/
+ abc=xyz\\\npqr
+ 0: abc=xyz\
+
+/(?=(\w+))\1:/
+ abcd:
+ 0: abcd:
+ 1: abcd
+
+/^(?=(\w+))\1:/
+ abcd:
+ 0: abcd:
+ 1: abcd
+
+#/^\Eabc/
+# abc
+# 0: abc
+
+#/^[\Eabc]/
+# a
+# 0: a
+#\= Expect no match
+# E
+#No match
+
+#/^[a-\Ec]/
+# b
+# 0: b
+#\= Expect no match
+# -
+#No match
+# E
+#No match
+
+#/^[a\E\E-\Ec]/
+# b
+# 0: b
+#\= Expect no match
+# -
+#No match
+# E
+#No match
+
+#/^[\E\Qa\E-\Qz\E]+/
+# b
+# 0: b
+#\= Expect no match
+# -
+#No match
+
+#/^[a\Q]bc\E]/
+# a
+# 0: a
+# ]
+# 0: ]
+# c
+# 0: c
+
+#/^[a-\Q\E]/
+# a
+# 0: a
+# -
+# 0: -
+
+/^(a()*)*/
+ aaaa
+ 0: aaaa
+ 1: a
+ 2:
+
+/^(?:a(?:(?:))*)*/
+ aaaa
+ 0: aaaa
+
+/^(a()+)+/
+ aaaa
+ 0: aaaa
+ 1: a
+ 2:
+
+/^(?:a(?:(?:))+)+/
+ aaaa
+ 0: aaaa
+
+/(a){0,3}(?(1)b|(c|))*D/
+ abbD
+ 0: abbD
+ 1: a
+ ccccD
+ 0: ccccD
+ 1: <unset>
+ 2:
+ D
+ 0: D
+ 1: <unset>
+ 2:
+
+# this is really long with debug -- removing for now
+#/(a|)*\d/
+# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+# 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+# 1:
+#\= Expect no match
+# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+#No match
+
+/(?>a|)*\d/
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+\= Expect no match
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+
+/(?:a|)*\d/
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+\= Expect no match
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+
+/^(?s)(?>.*)(?<!\n)/
+ abc
+ 0: abc
+\= Expect no match
+ abc\n
+No match
+
+/^(?![^\n]*\n\z)/
+ abc
+ 0:
+\= Expect no match
+ abc\n
+No match
+
+/\z(?<!\n)/
+ abc
+ 0:
+\= Expect no match
+ abc\n
+No match
+
+/(.*(.)?)*/
+ abcd
+ 0: abcd
+ 1:
+
+/( (A | (?(1)0|) )* )/x
+ abcd
+ 0:
+ 1:
+ 2:
+
+/( ( (?(1)0|) )* )/x
+ abcd
+ 0:
+ 1:
+ 2:
+
+/( (?(1)0|)* )/x
+ abcd
+ 0:
+ 1:
+
+/[[:abcd:xyz]]/
+ a]
+ 0: a]
+ :]
+ 0: :]
+
+/[abc[:x\]pqr]/
+ a
+ 0: a
+ [
+ 0: [
+ :
+ 0: :
+ ]
+ 0: ]
+ p
+ 0: p
+
+/.*[op][xyz]/
+\= Expect no match
+ fooabcfoo
+No match
+
+/(?(?=.*b)b|^)/
+ adc
+ 0:
+ abc
+ 0: b
+
+/(?(?=^.*b)b|^)/
+ adc
+ 0:
+\= Expect no match
+ abc
+No match
+
+/(?(?=.*b)b|^)*/
+ adc
+ 0:
+ abc
+ 0:
+
+/(?(?=.*b)b|^)+/
+ adc
+ 0:
+ abc
+ 0: b
+
+/(?(?=b).*b|^d)/
+ abc
+ 0: b
+
+/(?(?=.*b).*b|^d)/
+ abc
+ 0: ab
+
+/^%((?(?=[a])[^%])|b)*%$/
+ %ab%
+ 0: %ab%
+ 1:
+
+/(?i)a(?-i)b|c/
+ XabX
+ 0: ab
+ XAbX
+ 0: Ab
+ CcC
+ 0: c
+\= Expect no match
+ XABX
+No match
+
+/[\x00-\xff\s]+/
+ \x0a\x0b\x0c\x0d
+ 0: \x0a\x0b\x0c\x0d
+
+/(abc)\1/i
+\= Expect no match
+ abc
+No match
+
+/(abc)\1/
+\= Expect no match
+ abc
+No match
+
+/[^a]*/i
+ 12abc
+ 0: 12
+ 12ABC
+ 0: 12
+
+#Posses
+/[^a]*/i
+ 12abc
+ 0: 12
+ 12ABC
+ 0: 12
+
+/[^a]*?X/i
+\= Expect no match
+ 12abc
+No match
+ 12ABC
+No match
+
+/[^a]+?X/i
+\= Expect no match
+ 12abc
+No match
+ 12ABC
+No match
+
+/[^a]?X/i
+ 12aXbcX
+ 0: X
+ 12AXBCX
+ 0: X
+ BCX
+ 0: CX
+
+/[^a]??X/i
+ 12aXbcX
+ 0: X
+ 12AXBCX
+ 0: X
+ BCX
+ 0: CX
+
+/[^a]{2,3}/i
+ abcdef
+ 0: bcd
+ ABCDEF
+ 0: BCD
+
+/[^a]{2,3}?/i
+ abcdef
+ 0: bc
+ ABCDEF
+ 0: BC
+
+/((a|)+)+Z/
+ Z
+ 0: Z
+ 1:
+ 2:
+
+/(a)b|(a)c/
+ ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/(?>(a))b|(a)c/
+ ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/(?=(a))ab|(a)c/
+ ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/((?>(a))b|(a)c)/
+ ac
+ 0: ac
+ 1: ac
+ 2: <unset>
+ 3: a
+
+/(?=(?>(a))b|(a)c)(..)/
+ ac
+ 0: ac
+ 1: <unset>
+ 2: a
+ 3: ac
+
+/(?>(?>(a))b|(a)c)/
+ ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/((?>(a+)b)+(aabab))/
+ aaaabaaabaabab
+ 0: aaaabaaabaabab
+ 1: aaaabaaabaabab
+ 2: aaa
+ 3: aabab
+
+/(?>a+|ab)+?c/
+\= Expect no match
+ aabc
+No match
+
+/(?>a+|ab)+c/
+\= Expect no match
+ aabc
+No match
+
+/(?:a+|ab)+c/
+ aabc
+ 0: aabc
+
+/^(?:a|ab)+c/
+ aaaabc
+ 0: aaaabc
+
+/(?=abc){0}xyz/
+ xyz
+ 0: xyz
+
+/(?=abc){1}xyz/
+\= Expect no match
+ xyz
+No match
+
+/(?=(a))?./
+ ab
+ 0: a
+ 1: a
+ bc
+ 0: b
+
+/(?=(a))??./
+ ab
+ 0: a
+ bc
+ 0: b
+
+/^(?!a){0}\w+/
+ aaaaa
+ 0: aaaaa
+
+/(?<=(abc))?xyz/
+ abcxyz
+ 0: xyz
+ 1: abc
+ pqrxyz
+ 0: xyz
+
+/^[g<a>]+/
+ ggg<<<aaa>>>
+ 0: ggg<<<aaa>>>
+\= Expect no match
+ \\ga
+No match
+
+/^[ga]+/
+ gggagagaxyz
+ 0: gggagaga
+
+/[:a]xxx[b:]/
+ :xxx:
+ 0: :xxx:
+
+/(?<=a{2})b/i
+ xaabc
+ 0: b
+\= Expect no match
+ xabc
+No match
+
+/(?<!a{2})b/i
+ xabc
+ 0: b
+\= Expect no match
+ xaabc
+No match
+
+/(?<=[^a]{2})b/
+ axxbc
+ 0: b
+ aAAbc
+ 0: b
+\= Expect no match
+ xaabc
+No match
+
+/(?<=[^a]{2})b/i
+ axxbc
+ 0: b
+\= Expect no match
+ aAAbc
+No match
+ xaabc
+No match
+
+#/(?|(abc)|(xyz))\1/
+# abcabc
+# 0: abcabc
+# 1: abc
+# xyzxyz
+# 0: xyzxyz
+# 1: xyz
+#\= Expect no match
+# abcxyz
+#No match
+# xyzabc
+#No match
+
+#/(?|(abc)|(xyz))(?1)/
+# abcabc
+# 0: abcabc
+# 1: abc
+# xyzabc
+# 0: xyzabc
+# 1: xyz
+#\= Expect no match
+# xyzxyz
+#No match
+
+#/^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/
+# XYabcdY
+# 0: XYabcdY
+# 1: a
+# 2: b
+# 3: c
+# 4: d
+# 5: Y
+
+#/^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)/
+# XYabcdY
+# 0: XYabcdY
+# 1: a
+# 2: b
+# 3: <unset>
+# 4: <unset>
+# 5: c
+# 6: d
+# 7: Y
+
+#/^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)/
+# XYabcdY
+# 0: XYabcdY
+# 1: a
+# 2: b
+# 3: <unset>
+# 4: <unset>
+# 5: c
+# 6: d
+# 7: Y
+
+/(?'abc'\w+):\k<abc>{2}/
+ a:aaxyz
+ 0: a:aa
+ 1: a
+ ab:ababxyz
+ 0: ab:abab
+ 1: ab
+\= Expect no match
+ a:axyz
+No match
+ ab:abxyz
+No match
+
+/^(?<ab>a)? (?(ab)b|c) (?(ab)d|e)/x
+ abd
+ 0: abd
+ 1: a
+ ce
+ 0: ce
+
+# .NET has more consistent grouping numbers with these dupe groups for the two options
+/(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |b(?<quote> (?<apostrophe>')|(?<realquote>")) ) (?(quote)[a-z]+|[0-9]+)/x,dupnames
+ a\"aaaaa
+ 0: a"aaaaa
+ 1: "
+ 2: <unset>
+ 3: "
+ b\"aaaaa
+ 0: b"aaaaa
+ 1: "
+ 2: <unset>
+ 3: "
+\= Expect no match
+ b\"11111
+No match
+
+#/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+# 0
+# 0: 0
+# 1: 0
+# 00
+# 0: 00
+# 1: 00
+# 2: 0
+# 0000
+# 0: 0000
+# 1: 0000
+# 2: 0
+
+#/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+# 0
+# 0: 0
+# 1: 0
+# 2: 0
+# 00
+# 0: 0
+# 1: 0
+# 2: 0
+# 0000
+# 0: 0
+# 1: 0
+# 2: 0
+
+# Check the use of names for failure
+
+# Check opening parens in comment when seeking forward reference.
+
+#/(?P<abn>(?P=abn)xxx|)+/
+# xxx
+# 0:
+# 1:
+
+#Posses
+/^(a)?(\w)/
+ aaaaX
+ 0: aa
+ 1: a
+ 2: a
+ YZ
+ 0: Y
+ 1: <unset>
+ 2: Y
+
+#Posses
+/^(?:a)?(\w)/
+ aaaaX
+ 0: aa
+ 1: a
+ YZ
+ 0: Y
+ 1: Y
+
+/\A.*?(a|bc)/
+ ba
+ 0: ba
+ 1: a
+
+/\A.*?(?:a|bc|d)/
+ ba
+ 0: ba
+
+# --------------------------
+
+/(another)?(\1?)test/
+ hello world test
+ 0: test
+ 1: <unset>
+ 2:
+
+/(another)?(\1+)test/
+\= Expect no match
+ hello world test
+No match
+
+/((?:a?)*)*c/
+ aac
+ 0: aac
+ 1:
+
+/((?>a?)*)*c/
+ aac
+ 0: aac
+ 1:
+
+/(?>.*?a)(?<=ba)/
+ aba
+ 0: ba
+
+/(?:.*?a)(?<=ba)/
+ aba
+ 0: aba
+
+/(?>.*?a)b/s
+ aab
+ 0: ab
+
+/(?>.*?a)b/
+ aab
+ 0: ab
+
+/(?>^a)b/s
+\= Expect no match
+ aab
+No match
+
+/(?>.*?)(?<=(abcd)|(wxyz))/
+ alphabetabcd
+ 0:
+ 1: abcd
+ endingwxyz
+ 0:
+ 1: <unset>
+ 2: wxyz
+
+/(?>.*)(?<=(abcd)|(wxyz))/
+ alphabetabcd
+ 0: alphabetabcd
+ 1: abcd
+ endingwxyz
+ 0: endingwxyz
+ 1: <unset>
+ 2: wxyz
+
+"(?>.*)foo"
+\= Expect no match
+ abcdfooxyz
+No match
+
+"(?>.*?)foo"
+ abcdfooxyz
+ 0: foo
+
+# Tests that try to figure out how Perl works. My hypothesis is that the first
+# verb that is backtracked onto is the one that acts. This seems to be the case
+# almost all the time, but there is one exception that is perhaps a bug.
+
+/a(?=bc).|abd/
+ abd
+ 0: abd
+ abc
+ 0: ab
+
+/a(?>bc)d|abd/
+ abceabd
+ 0: abd
+
+# These tests were formerly in test 2, but changes in PCRE and Perl have
+# made them compatible.
+
+/^(a)?(?(1)a|b)+$/
+\= Expect no match
+ a
+No match
+
+# ----
+
+/^\d*\w{4}/
+ 1234
+ 0: 1234
+\= Expect no match
+ 123
+No match
+
+/^[^b]*\w{4}/
+ aaaa
+ 0: aaaa
+\= Expect no match
+ aaa
+No match
+
+/^[^b]*\w{4}/i
+ aaaa
+ 0: aaaa
+\= Expect no match
+ aaa
+No match
+
+/^a*\w{4}/
+ aaaa
+ 0: aaaa
+\= Expect no match
+ aaa
+No match
+
+/^a*\w{4}/i
+ aaaa
+ 0: aaaa
+\= Expect no match
+ aaa
+No match
+
+/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
+ foofoo
+ 0: foofoo
+ 1: foo
+ barbar
+ 0: barbar
+ 1: bar
+
+# A notable difference between PCRE and .NET. According to
+# the PCRE docs:
+# If you make a subroutine call to a non-unique named
+# subpattern, the one that corresponds to the first
+# occurrence of the name is used. In the absence of
+# duplicate numbers (see the previous section) this is
+# the one with the lowest number.
+# .NET takes the most recently captured number according to MSDN:
+# A backreference refers to the most recent definition of
+# a group (the definition most immediately to the left,
+# when matching left to right). When a group makes multiple
+# captures, a backreference refers to the most recent capture.
+
+#/(?<n>A)(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
+# AfooA
+# 0: AfooA
+# 1: A
+# 2: foo
+# AbarA
+# 0: AbarA
+# 1: A
+# 2: <unset>
+# 3: bar
+#\= Expect no match
+# Afoofoo
+#No match
+# Abarbar
+#No match
+
+/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
+ 1 IN SOA non-sp1 non-sp2(
+ 0: 1 IN SOA non-sp1 non-sp2(
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+
+# TODO: .NET's group number ordering here in the second example is a bit odd
+/^ (?:(?<A>A)|(?'B'B)(?<A>A)) (?(A)x) (?(B)y)$/x,dupnames
+ Ax
+ 0: Ax
+ 1: A
+ BAxy
+ 0: BAxy
+ 1: A
+ 2: B
+
+/ ^ a + b $ /x
+ aaaab
+ 0: aaaab
+
+/ ^ a + #comment
+ b $ /x
+ aaaab
+ 0: aaaab
+
+/ ^ a + #comment
+ #comment
+ b $ /x
+ aaaab
+ 0: aaaab
+
+/ ^ (?> a + ) b $ /x
+ aaaab
+ 0: aaaab
+
+/ ^ ( a + ) + \w $ /x
+ aaaab
+ 0: aaaab
+ 1: aaaa
+
+/(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/
+\= Expect no match
+ acb
+No match
+
+#Posses
+#/\A(?:[^\"]+|\"(?:[^\"]*|\"\")*\")+/
+# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+# 0: NON QUOTED "QUOT""ED" AFTER
+
+#Posses
+#/\A(?:[^\"]+|\"(?:[^\"]+|\"\")*\")+/
+# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+# 0: NON QUOTED "QUOT""ED" AFTER
+
+#Posses
+#/\A(?:[^\"]+|\"(?:[^\"]+|\"\")+\")+/
+# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+# 0: NON QUOTED "QUOT""ED" AFTER
+
+#Posses
+#/\A([^\"1]+|[\"2]([^\"3]*|[\"4][\"5])*[\"6])+/
+# NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+# 0: NON QUOTED "QUOT""ED" AFTER
+# 1: AFTER
+# 2:
+
+/^\w+(?>\s*)(?<=\w)/
+ test test
+ 0: tes
+
+#/(?P<Name>a)?(?P<Name2>b)?(?(<Name>)c|d)*l/
+# acl
+# 0: acl
+# 1: a
+# bdl
+# 0: bdl
+# 1: <unset>
+# 2: b
+# adl
+# 0: dl
+# bcl
+# 0: l
+
+/\sabc/
+ \x0babc
+ 0: \x0babc
+
+#/[\Qa]\E]+/
+# aa]]
+# 0: aa]]
+
+#/[\Q]a\E]+/
+# aa]]
+# 0: aa]]
+
+/A((((((((a))))))))\8B/
+ AaaB
+ 0: AaaB
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+
+/A(((((((((a)))))))))\9B/
+ AaaB
+ 0: AaaB
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+
+/(|ab)*?d/
+ abd
+ 0: abd
+ 1: ab
+ xyd
+ 0: d
+
+/(\2|a)(\1)/
+ aaa
+ 0: aa
+ 1: a
+ 2: a
+
+/(\2)(\1)/
+
+"Z*(|d*){216}"
+
+/((((((((((((x))))))))))))\12/
+ xx
+ 0: xx
+ 1: x
+ 2: x
+ 3: x
+ 4: x
+ 5: x
+ 6: x
+ 7: x
+ 8: x
+ 9: x
+10: x
+11: x
+12: x
+
+#"(?|(\k'Pm')|(?'Pm'))"
+# abcd
+# 0:
+# 1:
+
+#/(?|(aaa)|(b))\g{1}/
+# aaaaaa
+# 0: aaaaaa
+# 1: aaa
+# bb
+# 0: bb
+# 1: b
+
+#/(?|(aaa)|(b))(?1)/
+# aaaaaa
+# 0: aaaaaa
+# 1: aaa
+# baaa
+# 0: baaa
+# 1: b
+#\= Expect no match
+# bb
+#No match
+
+#/(?|(aaa)|(b))/
+# xaaa
+# 0: aaa
+# 1: aaa
+# xbc
+# 0: b
+# 1: b
+
+#/(?|(?'a'aaa)|(?'a'b))\k'a'/
+# aaaaaa
+# 0: aaaaaa
+# 1: aaa
+# bb
+# 0: bb
+# 1: b
+
+#/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames
+# aaaccccaaa
+# 0: aaaccccaaa
+# 1: aaa
+# 2: cccc
+# bccccb
+# 0: bccccb
+# 1: b
+# 2: cccc
+
+# End of testinput1