diff options
Diffstat (limited to 'vendor/github.com/dlclark/regexp2/runner.go')
-rw-r--r-- | vendor/github.com/dlclark/regexp2/runner.go | 1621 |
1 files changed, 1621 insertions, 0 deletions
diff --git a/vendor/github.com/dlclark/regexp2/runner.go b/vendor/github.com/dlclark/regexp2/runner.go new file mode 100644 index 0000000000..2d84a934b0 --- /dev/null +++ b/vendor/github.com/dlclark/regexp2/runner.go @@ -0,0 +1,1621 @@ +package regexp2 + +import ( + "bytes" + "errors" + "fmt" + "math" + "strconv" + "strings" + "time" + "unicode" + + "github.com/dlclark/regexp2/syntax" +) + +type runner struct { + re *Regexp + code *syntax.Code + + runtextstart int // starting point for search + + runtext []rune // text to search + runtextpos int // current position in text + runtextend int + + // The backtracking stack. Opcodes use this to store data regarding + // what they have matched and where to backtrack to. Each "frame" on + // the stack takes the form of [CodePosition Data1 Data2...], where + // CodePosition is the position of the current opcode and + // the data values are all optional. The CodePosition can be negative, and + // these values (also called "back2") are used by the BranchMark family of opcodes + // to indicate whether they are backtracking after a successful or failed + // match. + // When we backtrack, we pop the CodePosition off the stack, set the current + // instruction pointer to that code position, and mark the opcode + // with a backtracking flag ("Back"). Each opcode then knows how to + // handle its own data. + runtrack []int + runtrackpos int + + // This stack is used to track text positions across different opcodes. + // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark + // pair. SetMark records the text position before we match a*b. Then + // CaptureMark uses that position to figure out where the capture starts. + // Opcodes which push onto this stack are always paired with other opcodes + // which will pop the value from it later. A successful match should mean + // that this stack is empty. + runstack []int + runstackpos int + + // The crawl stack is used to keep track of captures. Every time a group + // has a capture, we push its group number onto the runcrawl stack. In + // the case of a balanced match, we push BOTH groups onto the stack. + runcrawl []int + runcrawlpos int + + runtrackcount int // count of states that may do backtracking + + runmatch *Match // result object + + ignoreTimeout bool + timeout time.Duration // timeout in milliseconds (needed for actual) + timeoutChecksToSkip int + timeoutAt time.Time + + operator syntax.InstOp + codepos int + rightToLeft bool + caseInsensitive bool +} + +// run searches for matches and can continue from the previous match +// +// quick is usually false, but can be true to not return matches, just put it in caches +// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input +// input is the string to search for our regex pattern +func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) { + + // get a cached runner + runner := re.getRunner() + defer re.putRunner(runner) + + if textstart < 0 { + if re.RightToLeft() { + textstart = len(input) + } else { + textstart = 0 + } + } + + return runner.scan(input, textstart, quick, re.MatchTimeout) +} + +// Scans the string to find the first match. Uses the Match object +// both to feed text in and as a place to store matches that come out. +// +// All the action is in the Go() method. Our +// responsibility is to load up the class members before +// calling Go. +// +// The optimizer can compute a set of candidate starting characters, +// and we could use a separate method Skip() that will quickly scan past +// any characters that we know can't match. +func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) { + r.timeout = timeout + r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout) + r.runtextstart = textstart + r.runtext = rt + r.runtextend = len(rt) + + stoppos := r.runtextend + bump := 1 + + if r.re.RightToLeft() { + bump = -1 + stoppos = 0 + } + + r.runtextpos = textstart + initted := false + + r.startTimeoutWatch() + for { + if r.re.Debug() { + //fmt.Printf("\nSearch content: %v\n", string(r.runtext)) + fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend) + fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos) + } + + if r.findFirstChar() { + if err := r.checkTimeout(); err != nil { + return nil, err + } + + if !initted { + r.initMatch() + initted = true + } + + if r.re.Debug() { + fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos) + } + + if err := r.execute(); err != nil { + return nil, err + } + + if r.runmatch.matchcount[0] > 0 { + // We'll return a match even if it touches a previous empty match + return r.tidyMatch(quick), nil + } + + // reset state for another go + r.runtrackpos = len(r.runtrack) + r.runstackpos = len(r.runstack) + r.runcrawlpos = len(r.runcrawl) + } + + // failure! + + if r.runtextpos == stoppos { + r.tidyMatch(true) + return nil, nil + } + + // Recognize leading []* and various anchors, and bump on failure accordingly + + // r.bump by one and start again + + r.runtextpos += bump + } + // We never get here +} + +func (r *runner) execute() error { + + r.goTo(0) + + for { + + if r.re.Debug() { + r.dumpState() + } + + if err := r.checkTimeout(); err != nil { + return err + } + + switch r.operator { + case syntax.Stop: + return nil + + case syntax.Nothing: + break + + case syntax.Goto: + r.goTo(r.operand(0)) + continue + + case syntax.Testref: + if !r.runmatch.isMatched(r.operand(0)) { + break + } + r.advance(1) + continue + + case syntax.Lazybranch: + r.trackPush1(r.textPos()) + r.advance(1) + continue + + case syntax.Lazybranch | syntax.Back: + r.trackPop() + r.textto(r.trackPeek()) + r.goTo(r.operand(0)) + continue + + case syntax.Setmark: + r.stackPush(r.textPos()) + r.trackPush() + r.advance(0) + continue + + case syntax.Nullmark: + r.stackPush(-1) + r.trackPush() + r.advance(0) + continue + + case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back: + r.stackPop() + break + + case syntax.Getmark: + r.stackPop() + r.trackPush1(r.stackPeek()) + r.textto(r.stackPeek()) + r.advance(0) + continue + + case syntax.Getmark | syntax.Back: + r.trackPop() + r.stackPush(r.trackPeek()) + break + + case syntax.Capturemark: + if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) { + break + } + r.stackPop() + if r.operand(1) != -1 { + r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos()) + } else { + r.capture(r.operand(0), r.stackPeek(), r.textPos()) + } + r.trackPush1(r.stackPeek()) + + r.advance(2) + + continue + + case syntax.Capturemark | syntax.Back: + r.trackPop() + r.stackPush(r.trackPeek()) + r.uncapture() + if r.operand(0) != -1 && r.operand(1) != -1 { + r.uncapture() + } + + break + + case syntax.Branchmark: + r.stackPop() + + matched := r.textPos() - r.stackPeek() + + if matched != 0 { // Nonempty match -> loop now + r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos + r.stackPush(r.textPos()) // Make new mark + r.goTo(r.operand(0)) // Loop + } else { // Empty match -> straight now + r.trackPushNeg1(r.stackPeek()) // Save old mark + r.advance(1) // Straight + } + continue + + case syntax.Branchmark | syntax.Back: + r.trackPopN(2) + r.stackPop() + r.textto(r.trackPeekN(1)) // Recall position + r.trackPushNeg1(r.trackPeek()) // Save old mark + r.advance(1) // Straight + continue + + case syntax.Branchmark | syntax.Back2: + r.trackPop() + r.stackPush(r.trackPeek()) // Recall old mark + break // Backtrack + + case syntax.Lazybranchmark: + { + // We hit this the first time through a lazy loop and after each + // successful match of the inner expression. It simply continues + // on and doesn't loop. + r.stackPop() + + oldMarkPos := r.stackPeek() + + if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state + if oldMarkPos != -1 { + r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos + } else { + r.trackPush2(r.textPos(), r.textPos()) + } + } else { + // The inner expression found an empty match, so we'll go directly to 'back2' if we + // backtrack. In this case, we need to push something on the stack, since back2 pops. + // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text + // position associated with that empty match. + r.stackPush(oldMarkPos) + + r.trackPushNeg1(r.stackPeek()) // Save old mark + } + r.advance(1) + continue + } + + case syntax.Lazybranchmark | syntax.Back: + + // After the first time, Lazybranchmark | syntax.Back occurs + // with each iteration of the loop, and therefore with every attempted + // match of the inner expression. We'll try to match the inner expression, + // then go back to Lazybranchmark if successful. If the inner expression + // fails, we go to Lazybranchmark | syntax.Back2 + + r.trackPopN(2) + pos := r.trackPeekN(1) + r.trackPushNeg1(r.trackPeek()) // Save old mark + r.stackPush(pos) // Make new mark + r.textto(pos) // Recall position + r.goTo(r.operand(0)) // Loop + continue + + case syntax.Lazybranchmark | syntax.Back2: + // The lazy loop has failed. We'll do a true backtrack and + // start over before the lazy loop. + r.stackPop() + r.trackPop() + r.stackPush(r.trackPeek()) // Recall old mark + break + + case syntax.Setcount: + r.stackPush2(r.textPos(), r.operand(0)) + r.trackPush() + r.advance(1) + continue + + case syntax.Nullcount: + r.stackPush2(-1, r.operand(0)) + r.trackPush() + r.advance(1) + continue + + case syntax.Setcount | syntax.Back: + r.stackPopN(2) + break + + case syntax.Nullcount | syntax.Back: + r.stackPopN(2) + break + + case syntax.Branchcount: + // r.stackPush: + // 0: Mark + // 1: Count + + r.stackPopN(2) + mark := r.stackPeek() + count := r.stackPeekN(1) + matched := r.textPos() - mark + + if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now + r.trackPushNeg2(mark, count) // Save old mark, count + r.advance(2) // Straight + } else { // Nonempty match -> count+loop now + r.trackPush1(mark) // remember mark + r.stackPush2(r.textPos(), count+1) // Make new mark, incr count + r.goTo(r.operand(0)) // Loop + } + continue + + case syntax.Branchcount | syntax.Back: + // r.trackPush: + // 0: Previous mark + // r.stackPush: + // 0: Mark (= current pos, discarded) + // 1: Count + r.trackPop() + r.stackPopN(2) + if r.stackPeekN(1) > 0 { // Positive -> can go straight + r.textto(r.stackPeek()) // Zap to mark + r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count + r.advance(2) // Straight + continue + } + r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count + break + + case syntax.Branchcount | syntax.Back2: + // r.trackPush: + // 0: Previous mark + // 1: Previous count + r.trackPopN(2) + r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count + break // Backtrack + + case syntax.Lazybranchcount: + // r.stackPush: + // 0: Mark + // 1: Count + + r.stackPopN(2) + mark := r.stackPeek() + count := r.stackPeekN(1) + + if count < 0 { // Negative count -> loop now + r.trackPushNeg1(mark) // Save old mark + r.stackPush2(r.textPos(), count+1) // Make new mark, incr count + r.goTo(r.operand(0)) // Loop + } else { // Nonneg count -> straight now + r.trackPush3(mark, count, r.textPos()) // Save mark, count, position + r.advance(2) // Straight + } + continue + + case syntax.Lazybranchcount | syntax.Back: + // r.trackPush: + // 0: Mark + // 1: Count + // 2: r.textPos + + r.trackPopN(3) + mark := r.trackPeek() + textpos := r.trackPeekN(2) + + if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop + r.textto(textpos) // Recall position + r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count + r.trackPushNeg1(mark) // Save old mark + r.goTo(r.operand(0)) // Loop + continue + } else { // Max loops or empty match -> backtrack + r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count + break // backtrack + } + + case syntax.Lazybranchcount | syntax.Back2: + // r.trackPush: + // 0: Previous mark + // r.stackPush: + // 0: Mark (== current pos, discarded) + // 1: Count + r.trackPop() + r.stackPopN(2) + r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count + break // Backtrack + + case syntax.Setjump: + r.stackPush2(r.trackpos(), r.crawlpos()) + r.trackPush() + r.advance(0) + continue + + case syntax.Setjump | syntax.Back: + r.stackPopN(2) + break + + case syntax.Backjump: + // r.stackPush: + // 0: Saved trackpos + // 1: r.crawlpos + r.stackPopN(2) + r.trackto(r.stackPeek()) + + for r.crawlpos() != r.stackPeekN(1) { + r.uncapture() + } + + break + + case syntax.Forejump: + // r.stackPush: + // 0: Saved trackpos + // 1: r.crawlpos + r.stackPopN(2) + r.trackto(r.stackPeek()) + r.trackPush1(r.stackPeekN(1)) + r.advance(0) + continue + + case syntax.Forejump | syntax.Back: + // r.trackPush: + // 0: r.crawlpos + r.trackPop() + + for r.crawlpos() != r.trackPeek() { + r.uncapture() + } + + break + + case syntax.Bol: + if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' { + break + } + r.advance(0) + continue + + case syntax.Eol: + if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' { + break + } + r.advance(0) + continue + + case syntax.Boundary: + if !r.isBoundary(r.textPos(), 0, r.runtextend) { + break + } + r.advance(0) + continue + + case syntax.Nonboundary: + if r.isBoundary(r.textPos(), 0, r.runtextend) { + break + } + r.advance(0) + continue + + case syntax.ECMABoundary: + if !r.isECMABoundary(r.textPos(), 0, r.runtextend) { + break + } + r.advance(0) + continue + + case syntax.NonECMABoundary: + if r.isECMABoundary(r.textPos(), 0, r.runtextend) { + break + } + r.advance(0) + continue + + case syntax.Beginning: + if r.leftchars() > 0 { + break + } + r.advance(0) + continue + + case syntax.Start: + if r.textPos() != r.textstart() { + break + } + r.advance(0) + continue + + case syntax.EndZ: + if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' { + break + } + r.advance(0) + continue + + case syntax.End: + if r.rightchars() > 0 { + break + } + r.advance(0) + continue + + case syntax.One: + if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) { + break + } + + r.advance(1) + continue + + case syntax.Notone: + if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) { + break + } + + r.advance(1) + continue + + case syntax.Set: + + if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { + break + } + + r.advance(1) + continue + + case syntax.Multi: + if !r.runematch(r.code.Strings[r.operand(0)]) { + break + } + + r.advance(1) + continue + + case syntax.Ref: + + capnum := r.operand(0) + + if r.runmatch.isMatched(capnum) { + if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) { + break + } + } else { + if (r.re.options & ECMAScript) == 0 { + break + } + } + + r.advance(1) + continue + + case syntax.Onerep: + + c := r.operand(1) + + if r.forwardchars() < c { + break + } + + ch := rune(r.operand(0)) + + for c > 0 { + if r.forwardcharnext() != ch { + goto BreakBackward + } + c-- + } + + r.advance(2) + continue + + case syntax.Notonerep: + + c := r.operand(1) + + if r.forwardchars() < c { + break + } + ch := rune(r.operand(0)) + + for c > 0 { + if r.forwardcharnext() == ch { + goto BreakBackward + } + c-- + } + + r.advance(2) + continue + + case syntax.Setrep: + + c := r.operand(1) + + if r.forwardchars() < c { + break + } + + set := r.code.Sets[r.operand(0)] + + for c > 0 { + if !set.CharIn(r.forwardcharnext()) { + goto BreakBackward + } + c-- + } + + r.advance(2) + continue + + case syntax.Oneloop: + + c := r.operand(1) + + if c > r.forwardchars() { + c = r.forwardchars() + } + + ch := rune(r.operand(0)) + i := c + + for ; i > 0; i-- { + if r.forwardcharnext() != ch { + r.backwardnext() + break + } + } + + if c > i { + r.trackPush2(c-i-1, r.textPos()-r.bump()) + } + + r.advance(2) + continue + + case syntax.Notoneloop: + + c := r.operand(1) + + if c > r.forwardchars() { + c = r.forwardchars() + } + + ch := rune(r.operand(0)) + i := c + + for ; i > 0; i-- { + if r.forwardcharnext() == ch { + r.backwardnext() + break + } + } + + if c > i { + r.trackPush2(c-i-1, r.textPos()-r.bump()) + } + + r.advance(2) + continue + + case syntax.Setloop: + + c := r.operand(1) + + if c > r.forwardchars() { + c = r.forwardchars() + } + + set := r.code.Sets[r.operand(0)] + i := c + + for ; i > 0; i-- { + if !set.CharIn(r.forwardcharnext()) { + r.backwardnext() + break + } + } + + if c > i { + r.trackPush2(c-i-1, r.textPos()-r.bump()) + } + + r.advance(2) + continue + + case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back: + + r.trackPopN(2) + i := r.trackPeek() + pos := r.trackPeekN(1) + + r.textto(pos) + + if i > 0 { + r.trackPush2(i-1, pos-r.bump()) + } + + r.advance(2) + continue + + case syntax.Setloop | syntax.Back: + + r.trackPopN(2) + i := r.trackPeek() + pos := r.trackPeekN(1) + + r.textto(pos) + + if i > 0 { + r.trackPush2(i-1, pos-r.bump()) + } + + r.advance(2) + continue + + case syntax.Onelazy, syntax.Notonelazy: + + c := r.operand(1) + + if c > r.forwardchars() { + c = r.forwardchars() + } + + if c > 0 { + r.trackPush2(c-1, r.textPos()) + } + + r.advance(2) + continue + + case syntax.Setlazy: + + c := r.operand(1) + + if c > r.forwardchars() { + c = r.forwardchars() + } + + if c > 0 { + r.trackPush2(c-1, r.textPos()) + } + + r.advance(2) + continue + + case syntax.Onelazy | syntax.Back: + + r.trackPopN(2) + pos := r.trackPeekN(1) + r.textto(pos) + + if r.forwardcharnext() != rune(r.operand(0)) { + break + } + + i := r.trackPeek() + + if i > 0 { + r.trackPush2(i-1, pos+r.bump()) + } + + r.advance(2) + continue + + case syntax.Notonelazy | syntax.Back: + + r.trackPopN(2) + pos := r.trackPeekN(1) + r.textto(pos) + + if r.forwardcharnext() == rune(r.operand(0)) { + break + } + + i := r.trackPeek() + + if i > 0 { + r.trackPush2(i-1, pos+r.bump()) + } + + r.advance(2) + continue + + case syntax.Setlazy | syntax.Back: + + r.trackPopN(2) + pos := r.trackPeekN(1) + r.textto(pos) + + if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) { + break + } + + i := r.trackPeek() + + if i > 0 { + r.trackPush2(i-1, pos+r.bump()) + } + + r.advance(2) + continue + + default: + return errors.New("unknown state in regex runner") + } + + BreakBackward: + ; + + // "break Backward" comes here: + r.backtrack() + } +} + +// increase the size of stack and track storage +func (r *runner) ensureStorage() { + if r.runstackpos < r.runtrackcount*4 { + doubleIntSlice(&r.runstack, &r.runstackpos) + } + if r.runtrackpos < r.runtrackcount*4 { + doubleIntSlice(&r.runtrack, &r.runtrackpos) + } +} + +func doubleIntSlice(s *[]int, pos *int) { + oldLen := len(*s) + newS := make([]int, oldLen*2) + + copy(newS[oldLen:], *s) + *pos += oldLen + *s = newS +} + +// Save a number on the longjump unrolling stack +func (r *runner) crawl(i int) { + if r.runcrawlpos == 0 { + doubleIntSlice(&r.runcrawl, &r.runcrawlpos) + } + r.runcrawlpos-- + r.runcrawl[r.runcrawlpos] = i +} + +// Remove a number from the longjump unrolling stack +func (r *runner) popcrawl() int { + val := r.runcrawl[r.runcrawlpos] + r.runcrawlpos++ + return val +} + +// Get the height of the stack +func (r *runner) crawlpos() int { + return len(r.runcrawl) - r.runcrawlpos +} + +func (r *runner) advance(i int) { + r.codepos += (i + 1) + r.setOperator(r.code.Codes[r.codepos]) +} + +func (r *runner) goTo(newpos int) { + // when branching backward, ensure storage + if newpos < r.codepos { + r.ensureStorage() + } + + r.setOperator(r.code.Codes[newpos]) + r.codepos = newpos +} + +func (r *runner) textto(newpos int) { + r.runtextpos = newpos +} + +func (r *runner) trackto(newpos int) { + r.runtrackpos = len(r.runtrack) - newpos +} + +func (r *runner) textstart() int { + return r.runtextstart +} + +func (r *runner) textPos() int { + return r.runtextpos +} + +// push onto the backtracking stack +func (r *runner) trackpos() int { + return len(r.runtrack) - r.runtrackpos +} + +func (r *runner) trackPush() { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = r.codepos +} + +func (r *runner) trackPush1(I1 int) { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I1 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = r.codepos +} + +func (r *runner) trackPush2(I1, I2 int) { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I1 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I2 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = r.codepos +} + +func (r *runner) trackPush3(I1, I2, I3 int) { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I1 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I2 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I3 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = r.codepos +} + +func (r *runner) trackPushNeg1(I1 int) { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I1 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = -r.codepos +} + +func (r *runner) trackPushNeg2(I1, I2 int) { + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I1 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = I2 + r.runtrackpos-- + r.runtrack[r.runtrackpos] = -r.codepos +} + +func (r *runner) backtrack() { + newpos := r.runtrack[r.runtrackpos] + r.runtrackpos++ + + if r.re.Debug() { + if newpos < 0 { + fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos) + } else { + fmt.Printf(" Backtracking to code position %v\n", newpos) + } + } + + if newpos < 0 { + newpos = -newpos + r.setOperator(r.code.Codes[newpos] | syntax.Back2) + } else { + r.setOperator(r.code.Codes[newpos] | syntax.Back) + } + + // When branching backward, ensure storage + if newpos < r.codepos { + r.ensureStorage() + } + + r.codepos = newpos +} + +func (r *runner) setOperator(op int) { + r.caseInsensitive = (0 != (op & syntax.Ci)) + r.rightToLeft = (0 != (op & syntax.Rtl)) + r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci)) +} + +func (r *runner) trackPop() { + r.runtrackpos++ +} + +// pop framesize items from the backtracking stack +func (r *runner) trackPopN(framesize int) { + r.runtrackpos += framesize +} + +// Technically we are actually peeking at items already popped. So if you want to +// get and pop the top item from the stack, you do +// r.trackPop(); +// r.trackPeek(); +func (r *runner) trackPeek() int { + return r.runtrack[r.runtrackpos-1] +} + +// get the ith element down on the backtracking stack +func (r *runner) trackPeekN(i int) int { + return r.runtrack[r.runtrackpos-i-1] +} + +// Push onto the grouping stack +func (r *runner) stackPush(I1 int) { + r.runstackpos-- + r.runstack[r.runstackpos] = I1 +} + +func (r *runner) stackPush2(I1, I2 int) { + r.runstackpos-- + r.runstack[r.runstackpos] = I1 + r.runstackpos-- + r.runstack[r.runstackpos] = I2 +} + +func (r *runner) stackPop() { + r.runstackpos++ +} + +// pop framesize items from the grouping stack +func (r *runner) stackPopN(framesize int) { + r.runstackpos += framesize +} + +// Technically we are actually peeking at items already popped. So if you want to +// get and pop the top item from the stack, you do +// r.stackPop(); +// r.stackPeek(); +func (r *runner) stackPeek() int { + return r.runstack[r.runstackpos-1] +} + +// get the ith element down on the grouping stack +func (r *runner) stackPeekN(i int) int { + return r.runstack[r.runstackpos-i-1] +} + +func (r *runner) operand(i int) int { + return r.code.Codes[r.codepos+i+1] +} + +func (r *runner) leftchars() int { + return r.runtextpos +} + +func (r *runner) rightchars() int { + return r.runtextend - r.runtextpos +} + +func (r *runner) bump() int { + if r.rightToLeft { + return -1 + } + return 1 +} + +func (r *runner) forwardchars() int { + if r.rightToLeft { + return r.runtextpos + } + return r.runtextend - r.runtextpos +} + +func (r *runner) forwardcharnext() rune { + var ch rune + if r.rightToLeft { + r.runtextpos-- + ch = r.runtext[r.runtextpos] + } else { + ch = r.runtext[r.runtextpos] + r.runtextpos++ + } + + if r.caseInsensitive { + return unicode.ToLower(ch) + } + return ch +} + +func (r *runner) runematch(str []rune) bool { + var pos int + + c := len(str) + if !r.rightToLeft { + if r.runtextend-r.runtextpos < c { + return false + } + + pos = r.runtextpos + c + } else { + if r.runtextpos-0 < c { + return false + } + + pos = r.runtextpos + } + + if !r.caseInsensitive { + for c != 0 { + c-- + pos-- + if str[c] != r.runtext[pos] { + return false + } + } + } else { + for c != 0 { + c-- + pos-- + if str[c] != unicode.ToLower(r.runtext[pos]) { + return false + } + } + } + + if !r.rightToLeft { + pos += len(str) + } + + r.runtextpos = pos + + return true +} + +func (r *runner) refmatch(index, len int) bool { + var c, pos, cmpos int + + if !r.rightToLeft { + if r.runtextend-r.runtextpos < len { + return false + } + + pos = r.runtextpos + len + } else { + if r.runtextpos-0 < len { + return false + } + + pos = r.runtextpos + } + cmpos = index + len + + c = len + + if !r.caseInsensitive { + for c != 0 { + c-- + cmpos-- + pos-- + if r.runtext[cmpos] != r.runtext[pos] { + return false + } + + } + } else { + for c != 0 { + c-- + cmpos-- + pos-- + + if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) { + return false + } + } + } + + if !r.rightToLeft { + pos += len + } + + r.runtextpos = pos + + return true +} + +func (r *runner) backwardnext() { + if r.rightToLeft { + r.runtextpos++ + } else { + r.runtextpos-- + } +} + +func (r *runner) charAt(j int) rune { + return r.runtext[j] +} + +func (r *runner) findFirstChar() bool { + + if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) { + if !r.code.RightToLeft { + if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) || + (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) { + r.runtextpos = r.runtextend + return false + } + if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 { + r.runtextpos = r.runtextend - 1 + } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend { + r.runtextpos = r.runtextend + } + } else { + if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) || + (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 || + (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) || + (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) { + r.runtextpos = 0 + return false + } + if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 { + r.runtextpos = 0 + } + } + + if r.code.BmPrefix != nil { + return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend) + } + + return true // found a valid start or end anchor + } else if r.code.BmPrefix != nil { + r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend) + + if r.runtextpos == -1 { + if r.code.RightToLeft { + r.runtextpos = 0 + } else { + r.runtextpos = r.runtextend + } + return false + } + + return true + } else if r.code.FcPrefix == nil { + return true + } + + r.rightToLeft = r.code.RightToLeft + r.caseInsensitive = r.code.FcPrefix.CaseInsensitive + + set := r.code.FcPrefix.PrefixSet + if set.IsSingleton() { + ch := set.SingletonChar() + for i := r.forwardchars(); i > 0; i-- { + if ch == r.forwardcharnext() { + r.backwardnext() + return true + } + } + } else { + for i := r.forwardchars(); i > 0; i-- { + n := r.forwardcharnext() + //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n)) + if set.CharIn(n) { + r.backwardnext() + return true + } + } + } + + return false +} + +func (r *runner) initMatch() { + // Use a hashtable'ed Match object if the capture numbers are sparse + + if r.runmatch == nil { + if r.re.caps != nil { + r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart) + } else { + r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart) + } + } else { + r.runmatch.reset(r.runtext, r.runtextstart) + } + + // note we test runcrawl, because it is the last one to be allocated + // If there is an alloc failure in the middle of the three allocations, + // we may still return to reuse this instance, and we want to behave + // as if the allocations didn't occur. (we used to test _trackcount != 0) + + if r.runcrawl != nil { + r.runtrackpos = len(r.runtrack) + r.runstackpos = len(r.runstack) + r.runcrawlpos = len(r.runcrawl) + return + } + + r.initTrackCount() + + tracksize := r.runtrackcount * 8 + stacksize := r.runtrackcount * 8 + + if tracksize < 32 { + tracksize = 32 + } + if stacksize < 16 { + stacksize = 16 + } + + r.runtrack = make([]int, tracksize) + r.runtrackpos = tracksize + + r.runstack = make([]int, stacksize) + r.runstackpos = stacksize + + r.runcrawl = make([]int, 32) + r.runcrawlpos = 32 +} + +func (r *runner) tidyMatch(quick bool) *Match { + if !quick { + match := r.runmatch + + r.runmatch = nil + + match.tidy(r.runtextpos) + return match + } else { + // send back our match -- it's not leaving the package, so it's safe to not clean it up + // this reduces allocs for frequent calls to the "IsMatch" bool-only functions + return r.runmatch + } +} + +// capture captures a subexpression. Note that the +// capnum used here has already been mapped to a non-sparse +// index (by the code generator RegexWriter). +func (r *runner) capture(capnum, start, end int) { + if end < start { + T := end + end = start + start = T + } + + r.crawl(capnum) + r.runmatch.addMatch(capnum, start, end-start) +} + +// transferCapture captures a subexpression. Note that the +// capnum used here has already been mapped to a non-sparse +// index (by the code generator RegexWriter). +func (r *runner) transferCapture(capnum, uncapnum, start, end int) { + var start2, end2 int + + // these are the two intervals that are cancelling each other + + if end < start { + T := end + end = start + start = T + } + + start2 = r.runmatch.matchIndex(uncapnum) + end2 = start2 + r.runmatch.matchLength(uncapnum) + + // The new capture gets the innermost defined interval + + if start >= end2 { + end = start + start = end2 + } else if end <= start2 { + start = start2 + } else { + if end > end2 { + end = end2 + } + if start2 > start { + start = start2 + } + } + + r.crawl(uncapnum) + r.runmatch.balanceMatch(uncapnum) + + if capnum != -1 { + r.crawl(capnum) + r.runmatch.addMatch(capnum, start, end-start) + } +} + +// revert the last capture +func (r *runner) uncapture() { + capnum := r.popcrawl() + r.runmatch.removeMatch(capnum) +} + +//debug + +func (r *runner) dumpState() { + back := "" + if r.operator&syntax.Back != 0 { + back = " Back" + } + if r.operator&syntax.Back2 != 0 { + back += " Back2" + } + fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n", + r.textposDescription(), + r.stackDescription(r.runtrack, r.runtrackpos), + r.stackDescription(r.runstack, r.runstackpos), + r.code.OpcodeDescription(r.codepos), + back) +} + +func (r *runner) stackDescription(a []int, index int) string { + buf := &bytes.Buffer{} + + fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a)) + if buf.Len() < 8 { + buf.WriteString(strings.Repeat(" ", 8-buf.Len())) + } + + buf.WriteRune('(') + for i := index; i < len(a); i++ { + if i > index { + buf.WriteRune(' ') + } + + buf.WriteString(strconv.Itoa(a[i])) + } + + buf.WriteRune(')') + + return buf.String() +} + +func (r *runner) textposDescription() string { + buf := &bytes.Buffer{} + + buf.WriteString(strconv.Itoa(r.runtextpos)) + + if buf.Len() < 8 { + buf.WriteString(strings.Repeat(" ", 8-buf.Len())) + } + + if r.runtextpos > 0 { + buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1])) + } else { + buf.WriteRune('^') + } + + buf.WriteRune('>') + + for i := r.runtextpos; i < r.runtextend; i++ { + buf.WriteString(syntax.CharDescription(r.runtext[i])) + } + if buf.Len() >= 64 { + buf.Truncate(61) + buf.WriteString("...") + } else { + buf.WriteRune('$') + } + + return buf.String() +} + +// decide whether the pos +// at the specified index is a boundary or not. It's just not worth +// emitting inline code for this logic. +func (r *runner) isBoundary(index, startpos, endpos int) bool { + return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) != + (index < endpos && syntax.IsWordChar(r.runtext[index])) +} + +func (r *runner) isECMABoundary(index, startpos, endpos int) bool { + return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) != + (index < endpos && syntax.IsECMAWordChar(r.runtext[index])) +} + +// this seems like a comment to justify randomly picking 1000 :-P +// We have determined this value in a series of experiments where x86 retail +// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values +// of TimeoutCheckFrequency did not tend to increase performance; smaller values +// of TimeoutCheckFrequency tended to slow down the execution. +const timeoutCheckFrequency int = 1000 + +func (r *runner) startTimeoutWatch() { + if r.ignoreTimeout { + return + } + + r.timeoutChecksToSkip = timeoutCheckFrequency + r.timeoutAt = time.Now().Add(r.timeout) +} + +func (r *runner) checkTimeout() error { + if r.ignoreTimeout { + return nil + } + r.timeoutChecksToSkip-- + if r.timeoutChecksToSkip != 0 { + return nil + } + + r.timeoutChecksToSkip = timeoutCheckFrequency + return r.doCheckTimeout() +} + +func (r *runner) doCheckTimeout() error { + current := time.Now() + + if current.Before(r.timeoutAt) { + return nil + } + + if r.re.Debug() { + //Debug.WriteLine("") + //Debug.WriteLine("RegEx match timeout occurred!") + //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString()) + //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency) + //Debug.WriteLine("Search pattern: " + _runregex._pattern) + //Debug.WriteLine("Input: " + r.runtext) + //Debug.WriteLine("About to throw RegexMatchTimeoutException.") + } + + return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext)) +} + +func (r *runner) initTrackCount() { + r.runtrackcount = r.code.TrackCount +} + +// getRunner returns a run to use for matching re. +// It uses the re's runner cache if possible, to avoid +// unnecessary allocation. +func (re *Regexp) getRunner() *runner { + re.muRun.Lock() + if n := len(re.runner); n > 0 { + z := re.runner[n-1] + re.runner = re.runner[:n-1] + re.muRun.Unlock() + return z + } + re.muRun.Unlock() + z := &runner{ + re: re, + code: re.code, + } + return z +} + +// putRunner returns a runner to the re's cache. +// There is no attempt to limit the size of the cache, so it will +// grow to the maximum number of simultaneous matches +// run using re. (The cache empties when re gets garbage collected.) +func (re *Regexp) putRunner(r *runner) { + re.muRun.Lock() + re.runner = append(re.runner, r) + re.muRun.Unlock() +} |