summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/dlclark/regexp2/runner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/dlclark/regexp2/runner.go')
-rw-r--r--vendor/github.com/dlclark/regexp2/runner.go1621
1 files changed, 1621 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/runner.go b/vendor/github.com/dlclark/regexp2/runner.go
new file mode 100644
index 0000000000..2d84a934b0
--- /dev/null
+++ b/vendor/github.com/dlclark/regexp2/runner.go
@@ -0,0 +1,1621 @@
+package regexp2
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+ "time"
+ "unicode"
+
+ "github.com/dlclark/regexp2/syntax"
+)
+
+type runner struct {
+ re *Regexp
+ code *syntax.Code
+
+ runtextstart int // starting point for search
+
+ runtext []rune // text to search
+ runtextpos int // current position in text
+ runtextend int
+
+ // The backtracking stack. Opcodes use this to store data regarding
+ // what they have matched and where to backtrack to. Each "frame" on
+ // the stack takes the form of [CodePosition Data1 Data2...], where
+ // CodePosition is the position of the current opcode and
+ // the data values are all optional. The CodePosition can be negative, and
+ // these values (also called "back2") are used by the BranchMark family of opcodes
+ // to indicate whether they are backtracking after a successful or failed
+ // match.
+ // When we backtrack, we pop the CodePosition off the stack, set the current
+ // instruction pointer to that code position, and mark the opcode
+ // with a backtracking flag ("Back"). Each opcode then knows how to
+ // handle its own data.
+ runtrack []int
+ runtrackpos int
+
+ // This stack is used to track text positions across different opcodes.
+ // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
+ // pair. SetMark records the text position before we match a*b. Then
+ // CaptureMark uses that position to figure out where the capture starts.
+ // Opcodes which push onto this stack are always paired with other opcodes
+ // which will pop the value from it later. A successful match should mean
+ // that this stack is empty.
+ runstack []int
+ runstackpos int
+
+ // The crawl stack is used to keep track of captures. Every time a group
+ // has a capture, we push its group number onto the runcrawl stack. In
+ // the case of a balanced match, we push BOTH groups onto the stack.
+ runcrawl []int
+ runcrawlpos int
+
+ runtrackcount int // count of states that may do backtracking
+
+ runmatch *Match // result object
+
+ ignoreTimeout bool
+ timeout time.Duration // timeout in milliseconds (needed for actual)
+ timeoutChecksToSkip int
+ timeoutAt time.Time
+
+ operator syntax.InstOp
+ codepos int
+ rightToLeft bool
+ caseInsensitive bool
+}
+
+// run searches for matches and can continue from the previous match
+//
+// quick is usually false, but can be true to not return matches, just put it in caches
+// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
+// input is the string to search for our regex pattern
+func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
+
+ // get a cached runner
+ runner := re.getRunner()
+ defer re.putRunner(runner)
+
+ if textstart < 0 {
+ if re.RightToLeft() {
+ textstart = len(input)
+ } else {
+ textstart = 0
+ }
+ }
+
+ return runner.scan(input, textstart, quick, re.MatchTimeout)
+}
+
+// Scans the string to find the first match. Uses the Match object
+// both to feed text in and as a place to store matches that come out.
+//
+// All the action is in the Go() method. Our
+// responsibility is to load up the class members before
+// calling Go.
+//
+// The optimizer can compute a set of candidate starting characters,
+// and we could use a separate method Skip() that will quickly scan past
+// any characters that we know can't match.
+func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
+ r.timeout = timeout
+ r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
+ r.runtextstart = textstart
+ r.runtext = rt
+ r.runtextend = len(rt)
+
+ stoppos := r.runtextend
+ bump := 1
+
+ if r.re.RightToLeft() {
+ bump = -1
+ stoppos = 0
+ }
+
+ r.runtextpos = textstart
+ initted := false
+
+ r.startTimeoutWatch()
+ for {
+ if r.re.Debug() {
+ //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
+ fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
+ fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
+ }
+
+ if r.findFirstChar() {
+ if err := r.checkTimeout(); err != nil {
+ return nil, err
+ }
+
+ if !initted {
+ r.initMatch()
+ initted = true
+ }
+
+ if r.re.Debug() {
+ fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
+ }
+
+ if err := r.execute(); err != nil {
+ return nil, err
+ }
+
+ if r.runmatch.matchcount[0] > 0 {
+ // We'll return a match even if it touches a previous empty match
+ return r.tidyMatch(quick), nil
+ }
+
+ // reset state for another go
+ r.runtrackpos = len(r.runtrack)
+ r.runstackpos = len(r.runstack)
+ r.runcrawlpos = len(r.runcrawl)
+ }
+
+ // failure!
+
+ if r.runtextpos == stoppos {
+ r.tidyMatch(true)
+ return nil, nil
+ }
+
+ // Recognize leading []* and various anchors, and bump on failure accordingly
+
+ // r.bump by one and start again
+
+ r.runtextpos += bump
+ }
+ // We never get here
+}
+
+func (r *runner) execute() error {
+
+ r.goTo(0)
+
+ for {
+
+ if r.re.Debug() {
+ r.dumpState()
+ }
+
+ if err := r.checkTimeout(); err != nil {
+ return err
+ }
+
+ switch r.operator {
+ case syntax.Stop:
+ return nil
+
+ case syntax.Nothing:
+ break
+
+ case syntax.Goto:
+ r.goTo(r.operand(0))
+ continue
+
+ case syntax.Testref:
+ if !r.runmatch.isMatched(r.operand(0)) {
+ break
+ }
+ r.advance(1)
+ continue
+
+ case syntax.Lazybranch:
+ r.trackPush1(r.textPos())
+ r.advance(1)
+ continue
+
+ case syntax.Lazybranch | syntax.Back:
+ r.trackPop()
+ r.textto(r.trackPeek())
+ r.goTo(r.operand(0))
+ continue
+
+ case syntax.Setmark:
+ r.stackPush(r.textPos())
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Nullmark:
+ r.stackPush(-1)
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
+ r.stackPop()
+ break
+
+ case syntax.Getmark:
+ r.stackPop()
+ r.trackPush1(r.stackPeek())
+ r.textto(r.stackPeek())
+ r.advance(0)
+ continue
+
+ case syntax.Getmark | syntax.Back:
+ r.trackPop()
+ r.stackPush(r.trackPeek())
+ break
+
+ case syntax.Capturemark:
+ if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
+ break
+ }
+ r.stackPop()
+ if r.operand(1) != -1 {
+ r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
+ } else {
+ r.capture(r.operand(0), r.stackPeek(), r.textPos())
+ }
+ r.trackPush1(r.stackPeek())
+
+ r.advance(2)
+
+ continue
+
+ case syntax.Capturemark | syntax.Back:
+ r.trackPop()
+ r.stackPush(r.trackPeek())
+ r.uncapture()
+ if r.operand(0) != -1 && r.operand(1) != -1 {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Branchmark:
+ r.stackPop()
+
+ matched := r.textPos() - r.stackPeek()
+
+ if matched != 0 { // Nonempty match -> loop now
+ r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
+ r.stackPush(r.textPos()) // Make new mark
+ r.goTo(r.operand(0)) // Loop
+ } else { // Empty match -> straight now
+ r.trackPushNeg1(r.stackPeek()) // Save old mark
+ r.advance(1) // Straight
+ }
+ continue
+
+ case syntax.Branchmark | syntax.Back:
+ r.trackPopN(2)
+ r.stackPop()
+ r.textto(r.trackPeekN(1)) // Recall position
+ r.trackPushNeg1(r.trackPeek()) // Save old mark
+ r.advance(1) // Straight
+ continue
+
+ case syntax.Branchmark | syntax.Back2:
+ r.trackPop()
+ r.stackPush(r.trackPeek()) // Recall old mark
+ break // Backtrack
+
+ case syntax.Lazybranchmark:
+ {
+ // We hit this the first time through a lazy loop and after each
+ // successful match of the inner expression. It simply continues
+ // on and doesn't loop.
+ r.stackPop()
+
+ oldMarkPos := r.stackPeek()
+
+ if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
+ if oldMarkPos != -1 {
+ r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
+ } else {
+ r.trackPush2(r.textPos(), r.textPos())
+ }
+ } else {
+ // The inner expression found an empty match, so we'll go directly to 'back2' if we
+ // backtrack. In this case, we need to push something on the stack, since back2 pops.
+ // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
+ // position associated with that empty match.
+ r.stackPush(oldMarkPos)
+
+ r.trackPushNeg1(r.stackPeek()) // Save old mark
+ }
+ r.advance(1)
+ continue
+ }
+
+ case syntax.Lazybranchmark | syntax.Back:
+
+ // After the first time, Lazybranchmark | syntax.Back occurs
+ // with each iteration of the loop, and therefore with every attempted
+ // match of the inner expression. We'll try to match the inner expression,
+ // then go back to Lazybranchmark if successful. If the inner expression
+ // fails, we go to Lazybranchmark | syntax.Back2
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.trackPushNeg1(r.trackPeek()) // Save old mark
+ r.stackPush(pos) // Make new mark
+ r.textto(pos) // Recall position
+ r.goTo(r.operand(0)) // Loop
+ continue
+
+ case syntax.Lazybranchmark | syntax.Back2:
+ // The lazy loop has failed. We'll do a true backtrack and
+ // start over before the lazy loop.
+ r.stackPop()
+ r.trackPop()
+ r.stackPush(r.trackPeek()) // Recall old mark
+ break
+
+ case syntax.Setcount:
+ r.stackPush2(r.textPos(), r.operand(0))
+ r.trackPush()
+ r.advance(1)
+ continue
+
+ case syntax.Nullcount:
+ r.stackPush2(-1, r.operand(0))
+ r.trackPush()
+ r.advance(1)
+ continue
+
+ case syntax.Setcount | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Nullcount | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Branchcount:
+ // r.stackPush:
+ // 0: Mark
+ // 1: Count
+
+ r.stackPopN(2)
+ mark := r.stackPeek()
+ count := r.stackPeekN(1)
+ matched := r.textPos() - mark
+
+ if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
+ r.trackPushNeg2(mark, count) // Save old mark, count
+ r.advance(2) // Straight
+ } else { // Nonempty match -> count+loop now
+ r.trackPush1(mark) // remember mark
+ r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
+ r.goTo(r.operand(0)) // Loop
+ }
+ continue
+
+ case syntax.Branchcount | syntax.Back:
+ // r.trackPush:
+ // 0: Previous mark
+ // r.stackPush:
+ // 0: Mark (= current pos, discarded)
+ // 1: Count
+ r.trackPop()
+ r.stackPopN(2)
+ if r.stackPeekN(1) > 0 { // Positive -> can go straight
+ r.textto(r.stackPeek()) // Zap to mark
+ r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
+ r.advance(2) // Straight
+ continue
+ }
+ r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
+ break
+
+ case syntax.Branchcount | syntax.Back2:
+ // r.trackPush:
+ // 0: Previous mark
+ // 1: Previous count
+ r.trackPopN(2)
+ r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
+ break // Backtrack
+
+ case syntax.Lazybranchcount:
+ // r.stackPush:
+ // 0: Mark
+ // 1: Count
+
+ r.stackPopN(2)
+ mark := r.stackPeek()
+ count := r.stackPeekN(1)
+
+ if count < 0 { // Negative count -> loop now
+ r.trackPushNeg1(mark) // Save old mark
+ r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
+ r.goTo(r.operand(0)) // Loop
+ } else { // Nonneg count -> straight now
+ r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
+ r.advance(2) // Straight
+ }
+ continue
+
+ case syntax.Lazybranchcount | syntax.Back:
+ // r.trackPush:
+ // 0: Mark
+ // 1: Count
+ // 2: r.textPos
+
+ r.trackPopN(3)
+ mark := r.trackPeek()
+ textpos := r.trackPeekN(2)
+
+ if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
+ r.textto(textpos) // Recall position
+ r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
+ r.trackPushNeg1(mark) // Save old mark
+ r.goTo(r.operand(0)) // Loop
+ continue
+ } else { // Max loops or empty match -> backtrack
+ r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
+ break // backtrack
+ }
+
+ case syntax.Lazybranchcount | syntax.Back2:
+ // r.trackPush:
+ // 0: Previous mark
+ // r.stackPush:
+ // 0: Mark (== current pos, discarded)
+ // 1: Count
+ r.trackPop()
+ r.stackPopN(2)
+ r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
+ break // Backtrack
+
+ case syntax.Setjump:
+ r.stackPush2(r.trackpos(), r.crawlpos())
+ r.trackPush()
+ r.advance(0)
+ continue
+
+ case syntax.Setjump | syntax.Back:
+ r.stackPopN(2)
+ break
+
+ case syntax.Backjump:
+ // r.stackPush:
+ // 0: Saved trackpos
+ // 1: r.crawlpos
+ r.stackPopN(2)
+ r.trackto(r.stackPeek())
+
+ for r.crawlpos() != r.stackPeekN(1) {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Forejump:
+ // r.stackPush:
+ // 0: Saved trackpos
+ // 1: r.crawlpos
+ r.stackPopN(2)
+ r.trackto(r.stackPeek())
+ r.trackPush1(r.stackPeekN(1))
+ r.advance(0)
+ continue
+
+ case syntax.Forejump | syntax.Back:
+ // r.trackPush:
+ // 0: r.crawlpos
+ r.trackPop()
+
+ for r.crawlpos() != r.trackPeek() {
+ r.uncapture()
+ }
+
+ break
+
+ case syntax.Bol:
+ if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Eol:
+ if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Boundary:
+ if !r.isBoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Nonboundary:
+ if r.isBoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.ECMABoundary:
+ if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.NonECMABoundary:
+ if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Beginning:
+ if r.leftchars() > 0 {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.Start:
+ if r.textPos() != r.textstart() {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.EndZ:
+ if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.End:
+ if r.rightchars() > 0 {
+ break
+ }
+ r.advance(0)
+ continue
+
+ case syntax.One:
+ if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Notone:
+ if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Set:
+
+ if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Multi:
+ if !r.runematch(r.code.Strings[r.operand(0)]) {
+ break
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Ref:
+
+ capnum := r.operand(0)
+
+ if r.runmatch.isMatched(capnum) {
+ if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
+ break
+ }
+ } else {
+ if (r.re.options & ECMAScript) == 0 {
+ break
+ }
+ }
+
+ r.advance(1)
+ continue
+
+ case syntax.Onerep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+
+ ch := rune(r.operand(0))
+
+ for c > 0 {
+ if r.forwardcharnext() != ch {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notonerep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+ ch := rune(r.operand(0))
+
+ for c > 0 {
+ if r.forwardcharnext() == ch {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setrep:
+
+ c := r.operand(1)
+
+ if r.forwardchars() < c {
+ break
+ }
+
+ set := r.code.Sets[r.operand(0)]
+
+ for c > 0 {
+ if !set.CharIn(r.forwardcharnext()) {
+ goto BreakBackward
+ }
+ c--
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Oneloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ ch := rune(r.operand(0))
+ i := c
+
+ for ; i > 0; i-- {
+ if r.forwardcharnext() != ch {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notoneloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ ch := rune(r.operand(0))
+ i := c
+
+ for ; i > 0; i-- {
+ if r.forwardcharnext() == ch {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setloop:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ set := r.code.Sets[r.operand(0)]
+ i := c
+
+ for ; i > 0; i-- {
+ if !set.CharIn(r.forwardcharnext()) {
+ r.backwardnext()
+ break
+ }
+ }
+
+ if c > i {
+ r.trackPush2(c-i-1, r.textPos()-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
+
+ r.trackPopN(2)
+ i := r.trackPeek()
+ pos := r.trackPeekN(1)
+
+ r.textto(pos)
+
+ if i > 0 {
+ r.trackPush2(i-1, pos-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setloop | syntax.Back:
+
+ r.trackPopN(2)
+ i := r.trackPeek()
+ pos := r.trackPeekN(1)
+
+ r.textto(pos)
+
+ if i > 0 {
+ r.trackPush2(i-1, pos-r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Onelazy, syntax.Notonelazy:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ if c > 0 {
+ r.trackPush2(c-1, r.textPos())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setlazy:
+
+ c := r.operand(1)
+
+ if c > r.forwardchars() {
+ c = r.forwardchars()
+ }
+
+ if c > 0 {
+ r.trackPush2(c-1, r.textPos())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Onelazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if r.forwardcharnext() != rune(r.operand(0)) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Notonelazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if r.forwardcharnext() == rune(r.operand(0)) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ case syntax.Setlazy | syntax.Back:
+
+ r.trackPopN(2)
+ pos := r.trackPeekN(1)
+ r.textto(pos)
+
+ if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
+ break
+ }
+
+ i := r.trackPeek()
+
+ if i > 0 {
+ r.trackPush2(i-1, pos+r.bump())
+ }
+
+ r.advance(2)
+ continue
+
+ default:
+ return errors.New("unknown state in regex runner")
+ }
+
+ BreakBackward:
+ ;
+
+ // "break Backward" comes here:
+ r.backtrack()
+ }
+}
+
+// increase the size of stack and track storage
+func (r *runner) ensureStorage() {
+ if r.runstackpos < r.runtrackcount*4 {
+ doubleIntSlice(&r.runstack, &r.runstackpos)
+ }
+ if r.runtrackpos < r.runtrackcount*4 {
+ doubleIntSlice(&r.runtrack, &r.runtrackpos)
+ }
+}
+
+func doubleIntSlice(s *[]int, pos *int) {
+ oldLen := len(*s)
+ newS := make([]int, oldLen*2)
+
+ copy(newS[oldLen:], *s)
+ *pos += oldLen
+ *s = newS
+}
+
+// Save a number on the longjump unrolling stack
+func (r *runner) crawl(i int) {
+ if r.runcrawlpos == 0 {
+ doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
+ }
+ r.runcrawlpos--
+ r.runcrawl[r.runcrawlpos] = i
+}
+
+// Remove a number from the longjump unrolling stack
+func (r *runner) popcrawl() int {
+ val := r.runcrawl[r.runcrawlpos]
+ r.runcrawlpos++
+ return val
+}
+
+// Get the height of the stack
+func (r *runner) crawlpos() int {
+ return len(r.runcrawl) - r.runcrawlpos
+}
+
+func (r *runner) advance(i int) {
+ r.codepos += (i + 1)
+ r.setOperator(r.code.Codes[r.codepos])
+}
+
+func (r *runner) goTo(newpos int) {
+ // when branching backward, ensure storage
+ if newpos < r.codepos {
+ r.ensureStorage()
+ }
+
+ r.setOperator(r.code.Codes[newpos])
+ r.codepos = newpos
+}
+
+func (r *runner) textto(newpos int) {
+ r.runtextpos = newpos
+}
+
+func (r *runner) trackto(newpos int) {
+ r.runtrackpos = len(r.runtrack) - newpos
+}
+
+func (r *runner) textstart() int {
+ return r.runtextstart
+}
+
+func (r *runner) textPos() int {
+ return r.runtextpos
+}
+
+// push onto the backtracking stack
+func (r *runner) trackpos() int {
+ return len(r.runtrack) - r.runtrackpos
+}
+
+func (r *runner) trackPush() {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush1(I1 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush2(I1, I2 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPush3(I1, I2, I3 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I3
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = r.codepos
+}
+
+func (r *runner) trackPushNeg1(I1 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = -r.codepos
+}
+
+func (r *runner) trackPushNeg2(I1, I2 int) {
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I1
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = I2
+ r.runtrackpos--
+ r.runtrack[r.runtrackpos] = -r.codepos
+}
+
+func (r *runner) backtrack() {
+ newpos := r.runtrack[r.runtrackpos]
+ r.runtrackpos++
+
+ if r.re.Debug() {
+ if newpos < 0 {
+ fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
+ } else {
+ fmt.Printf(" Backtracking to code position %v\n", newpos)
+ }
+ }
+
+ if newpos < 0 {
+ newpos = -newpos
+ r.setOperator(r.code.Codes[newpos] | syntax.Back2)
+ } else {
+ r.setOperator(r.code.Codes[newpos] | syntax.Back)
+ }
+
+ // When branching backward, ensure storage
+ if newpos < r.codepos {
+ r.ensureStorage()
+ }
+
+ r.codepos = newpos
+}
+
+func (r *runner) setOperator(op int) {
+ r.caseInsensitive = (0 != (op & syntax.Ci))
+ r.rightToLeft = (0 != (op & syntax.Rtl))
+ r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
+}
+
+func (r *runner) trackPop() {
+ r.runtrackpos++
+}
+
+// pop framesize items from the backtracking stack
+func (r *runner) trackPopN(framesize int) {
+ r.runtrackpos += framesize
+}
+
+// Technically we are actually peeking at items already popped. So if you want to
+// get and pop the top item from the stack, you do
+// r.trackPop();
+// r.trackPeek();
+func (r *runner) trackPeek() int {
+ return r.runtrack[r.runtrackpos-1]
+}
+
+// get the ith element down on the backtracking stack
+func (r *runner) trackPeekN(i int) int {
+ return r.runtrack[r.runtrackpos-i-1]
+}
+
+// Push onto the grouping stack
+func (r *runner) stackPush(I1 int) {
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I1
+}
+
+func (r *runner) stackPush2(I1, I2 int) {
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I1
+ r.runstackpos--
+ r.runstack[r.runstackpos] = I2
+}
+
+func (r *runner) stackPop() {
+ r.runstackpos++
+}
+
+// pop framesize items from the grouping stack
+func (r *runner) stackPopN(framesize int) {
+ r.runstackpos += framesize
+}
+
+// Technically we are actually peeking at items already popped. So if you want to
+// get and pop the top item from the stack, you do
+// r.stackPop();
+// r.stackPeek();
+func (r *runner) stackPeek() int {
+ return r.runstack[r.runstackpos-1]
+}
+
+// get the ith element down on the grouping stack
+func (r *runner) stackPeekN(i int) int {
+ return r.runstack[r.runstackpos-i-1]
+}
+
+func (r *runner) operand(i int) int {
+ return r.code.Codes[r.codepos+i+1]
+}
+
+func (r *runner) leftchars() int {
+ return r.runtextpos
+}
+
+func (r *runner) rightchars() int {
+ return r.runtextend - r.runtextpos
+}
+
+func (r *runner) bump() int {
+ if r.rightToLeft {
+ return -1
+ }
+ return 1
+}
+
+func (r *runner) forwardchars() int {
+ if r.rightToLeft {
+ return r.runtextpos
+ }
+ return r.runtextend - r.runtextpos
+}
+
+func (r *runner) forwardcharnext() rune {
+ var ch rune
+ if r.rightToLeft {
+ r.runtextpos--
+ ch = r.runtext[r.runtextpos]
+ } else {
+ ch = r.runtext[r.runtextpos]
+ r.runtextpos++
+ }
+
+ if r.caseInsensitive {
+ return unicode.ToLower(ch)
+ }
+ return ch
+}
+
+func (r *runner) runematch(str []rune) bool {
+ var pos int
+
+ c := len(str)
+ if !r.rightToLeft {
+ if r.runtextend-r.runtextpos < c {
+ return false
+ }
+
+ pos = r.runtextpos + c
+ } else {
+ if r.runtextpos-0 < c {
+ return false
+ }
+
+ pos = r.runtextpos
+ }
+
+ if !r.caseInsensitive {
+ for c != 0 {
+ c--
+ pos--
+ if str[c] != r.runtext[pos] {
+ return false
+ }
+ }
+ } else {
+ for c != 0 {
+ c--
+ pos--
+ if str[c] != unicode.ToLower(r.runtext[pos]) {
+ return false
+ }
+ }
+ }
+
+ if !r.rightToLeft {
+ pos += len(str)
+ }
+
+ r.runtextpos = pos
+
+ return true
+}
+
+func (r *runner) refmatch(index, len int) bool {
+ var c, pos, cmpos int
+
+ if !r.rightToLeft {
+ if r.runtextend-r.runtextpos < len {
+ return false
+ }
+
+ pos = r.runtextpos + len
+ } else {
+ if r.runtextpos-0 < len {
+ return false
+ }
+
+ pos = r.runtextpos
+ }
+ cmpos = index + len
+
+ c = len
+
+ if !r.caseInsensitive {
+ for c != 0 {
+ c--
+ cmpos--
+ pos--
+ if r.runtext[cmpos] != r.runtext[pos] {
+ return false
+ }
+
+ }
+ } else {
+ for c != 0 {
+ c--
+ cmpos--
+ pos--
+
+ if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
+ return false
+ }
+ }
+ }
+
+ if !r.rightToLeft {
+ pos += len
+ }
+
+ r.runtextpos = pos
+
+ return true
+}
+
+func (r *runner) backwardnext() {
+ if r.rightToLeft {
+ r.runtextpos++
+ } else {
+ r.runtextpos--
+ }
+}
+
+func (r *runner) charAt(j int) rune {
+ return r.runtext[j]
+}
+
+func (r *runner) findFirstChar() bool {
+
+ if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
+ if !r.code.RightToLeft {
+ if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
+ (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
+ r.runtextpos = r.runtextend
+ return false
+ }
+ if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
+ r.runtextpos = r.runtextend - 1
+ } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
+ r.runtextpos = r.runtextend
+ }
+ } else {
+ if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
+ (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
+ (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
+ (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
+ r.runtextpos = 0
+ return false
+ }
+ if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
+ r.runtextpos = 0
+ }
+ }
+
+ if r.code.BmPrefix != nil {
+ return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
+ }
+
+ return true // found a valid start or end anchor
+ } else if r.code.BmPrefix != nil {
+ r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
+
+ if r.runtextpos == -1 {
+ if r.code.RightToLeft {
+ r.runtextpos = 0
+ } else {
+ r.runtextpos = r.runtextend
+ }
+ return false
+ }
+
+ return true
+ } else if r.code.FcPrefix == nil {
+ return true
+ }
+
+ r.rightToLeft = r.code.RightToLeft
+ r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
+
+ set := r.code.FcPrefix.PrefixSet
+ if set.IsSingleton() {
+ ch := set.SingletonChar()
+ for i := r.forwardchars(); i > 0; i-- {
+ if ch == r.forwardcharnext() {
+ r.backwardnext()
+ return true
+ }
+ }
+ } else {
+ for i := r.forwardchars(); i > 0; i-- {
+ n := r.forwardcharnext()
+ //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
+ if set.CharIn(n) {
+ r.backwardnext()
+ return true
+ }
+ }
+ }
+
+ return false
+}
+
+func (r *runner) initMatch() {
+ // Use a hashtable'ed Match object if the capture numbers are sparse
+
+ if r.runmatch == nil {
+ if r.re.caps != nil {
+ r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
+ } else {
+ r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
+ }
+ } else {
+ r.runmatch.reset(r.runtext, r.runtextstart)
+ }
+
+ // note we test runcrawl, because it is the last one to be allocated
+ // If there is an alloc failure in the middle of the three allocations,
+ // we may still return to reuse this instance, and we want to behave
+ // as if the allocations didn't occur. (we used to test _trackcount != 0)
+
+ if r.runcrawl != nil {
+ r.runtrackpos = len(r.runtrack)
+ r.runstackpos = len(r.runstack)
+ r.runcrawlpos = len(r.runcrawl)
+ return
+ }
+
+ r.initTrackCount()
+
+ tracksize := r.runtrackcount * 8
+ stacksize := r.runtrackcount * 8
+
+ if tracksize < 32 {
+ tracksize = 32
+ }
+ if stacksize < 16 {
+ stacksize = 16
+ }
+
+ r.runtrack = make([]int, tracksize)
+ r.runtrackpos = tracksize
+
+ r.runstack = make([]int, stacksize)
+ r.runstackpos = stacksize
+
+ r.runcrawl = make([]int, 32)
+ r.runcrawlpos = 32
+}
+
+func (r *runner) tidyMatch(quick bool) *Match {
+ if !quick {
+ match := r.runmatch
+
+ r.runmatch = nil
+
+ match.tidy(r.runtextpos)
+ return match
+ } else {
+ // send back our match -- it's not leaving the package, so it's safe to not clean it up
+ // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
+ return r.runmatch
+ }
+}
+
+// capture captures a subexpression. Note that the
+// capnum used here has already been mapped to a non-sparse
+// index (by the code generator RegexWriter).
+func (r *runner) capture(capnum, start, end int) {
+ if end < start {
+ T := end
+ end = start
+ start = T
+ }
+
+ r.crawl(capnum)
+ r.runmatch.addMatch(capnum, start, end-start)
+}
+
+// transferCapture captures a subexpression. Note that the
+// capnum used here has already been mapped to a non-sparse
+// index (by the code generator RegexWriter).
+func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
+ var start2, end2 int
+
+ // these are the two intervals that are cancelling each other
+
+ if end < start {
+ T := end
+ end = start
+ start = T
+ }
+
+ start2 = r.runmatch.matchIndex(uncapnum)
+ end2 = start2 + r.runmatch.matchLength(uncapnum)
+
+ // The new capture gets the innermost defined interval
+
+ if start >= end2 {
+ end = start
+ start = end2
+ } else if end <= start2 {
+ start = start2
+ } else {
+ if end > end2 {
+ end = end2
+ }
+ if start2 > start {
+ start = start2
+ }
+ }
+
+ r.crawl(uncapnum)
+ r.runmatch.balanceMatch(uncapnum)
+
+ if capnum != -1 {
+ r.crawl(capnum)
+ r.runmatch.addMatch(capnum, start, end-start)
+ }
+}
+
+// revert the last capture
+func (r *runner) uncapture() {
+ capnum := r.popcrawl()
+ r.runmatch.removeMatch(capnum)
+}
+
+//debug
+
+func (r *runner) dumpState() {
+ back := ""
+ if r.operator&syntax.Back != 0 {
+ back = " Back"
+ }
+ if r.operator&syntax.Back2 != 0 {
+ back += " Back2"
+ }
+ fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
+ r.textposDescription(),
+ r.stackDescription(r.runtrack, r.runtrackpos),
+ r.stackDescription(r.runstack, r.runstackpos),
+ r.code.OpcodeDescription(r.codepos),
+ back)
+}
+
+func (r *runner) stackDescription(a []int, index int) string {
+ buf := &bytes.Buffer{}
+
+ fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
+ if buf.Len() < 8 {
+ buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
+ }
+
+ buf.WriteRune('(')
+ for i := index; i < len(a); i++ {
+ if i > index {
+ buf.WriteRune(' ')
+ }
+
+ buf.WriteString(strconv.Itoa(a[i]))
+ }
+
+ buf.WriteRune(')')
+
+ return buf.String()
+}
+
+func (r *runner) textposDescription() string {
+ buf := &bytes.Buffer{}
+
+ buf.WriteString(strconv.Itoa(r.runtextpos))
+
+ if buf.Len() < 8 {
+ buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
+ }
+
+ if r.runtextpos > 0 {
+ buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
+ } else {
+ buf.WriteRune('^')
+ }
+
+ buf.WriteRune('>')
+
+ for i := r.runtextpos; i < r.runtextend; i++ {
+ buf.WriteString(syntax.CharDescription(r.runtext[i]))
+ }
+ if buf.Len() >= 64 {
+ buf.Truncate(61)
+ buf.WriteString("...")
+ } else {
+ buf.WriteRune('$')
+ }
+
+ return buf.String()
+}
+
+// decide whether the pos
+// at the specified index is a boundary or not. It's just not worth
+// emitting inline code for this logic.
+func (r *runner) isBoundary(index, startpos, endpos int) bool {
+ return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
+ (index < endpos && syntax.IsWordChar(r.runtext[index]))
+}
+
+func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
+ return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
+ (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
+}
+
+// this seems like a comment to justify randomly picking 1000 :-P
+// We have determined this value in a series of experiments where x86 retail
+// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
+// of TimeoutCheckFrequency did not tend to increase performance; smaller values
+// of TimeoutCheckFrequency tended to slow down the execution.
+const timeoutCheckFrequency int = 1000
+
+func (r *runner) startTimeoutWatch() {
+ if r.ignoreTimeout {
+ return
+ }
+
+ r.timeoutChecksToSkip = timeoutCheckFrequency
+ r.timeoutAt = time.Now().Add(r.timeout)
+}
+
+func (r *runner) checkTimeout() error {
+ if r.ignoreTimeout {
+ return nil
+ }
+ r.timeoutChecksToSkip--
+ if r.timeoutChecksToSkip != 0 {
+ return nil
+ }
+
+ r.timeoutChecksToSkip = timeoutCheckFrequency
+ return r.doCheckTimeout()
+}
+
+func (r *runner) doCheckTimeout() error {
+ current := time.Now()
+
+ if current.Before(r.timeoutAt) {
+ return nil
+ }
+
+ if r.re.Debug() {
+ //Debug.WriteLine("")
+ //Debug.WriteLine("RegEx match timeout occurred!")
+ //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
+ //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
+ //Debug.WriteLine("Search pattern: " + _runregex._pattern)
+ //Debug.WriteLine("Input: " + r.runtext)
+ //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
+ }
+
+ return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
+}
+
+func (r *runner) initTrackCount() {
+ r.runtrackcount = r.code.TrackCount
+}
+
+// getRunner returns a run to use for matching re.
+// It uses the re's runner cache if possible, to avoid
+// unnecessary allocation.
+func (re *Regexp) getRunner() *runner {
+ re.muRun.Lock()
+ if n := len(re.runner); n > 0 {
+ z := re.runner[n-1]
+ re.runner = re.runner[:n-1]
+ re.muRun.Unlock()
+ return z
+ }
+ re.muRun.Unlock()
+ z := &runner{
+ re: re,
+ code: re.code,
+ }
+ return z
+}
+
+// putRunner returns a runner to the re's cache.
+// There is no attempt to limit the size of the cache, so it will
+// grow to the maximum number of simultaneous matches
+// run using re. (The cache empties when re gets garbage collected.)
+func (re *Regexp) putRunner(r *runner) {
+ re.muRun.Lock()
+ re.runner = append(re.runner, r)
+ re.muRun.Unlock()
+}