12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634 |
- package regexp2
-
- import (
- "bytes"
- "errors"
- "fmt"
- "math"
- "strconv"
- "strings"
- "time"
- "unicode"
-
- "github.com/dlclark/regexp2/syntax"
- )
-
- type runner struct {
- re *Regexp
- code *syntax.Code
-
- runtextstart int // starting point for search
-
- runtext []rune // text to search
- runtextpos int // current position in text
- runtextend int
-
- // The backtracking stack. Opcodes use this to store data regarding
- // what they have matched and where to backtrack to. Each "frame" on
- // the stack takes the form of [CodePosition Data1 Data2...], where
- // CodePosition is the position of the current opcode and
- // the data values are all optional. The CodePosition can be negative, and
- // these values (also called "back2") are used by the BranchMark family of opcodes
- // to indicate whether they are backtracking after a successful or failed
- // match.
- // When we backtrack, we pop the CodePosition off the stack, set the current
- // instruction pointer to that code position, and mark the opcode
- // with a backtracking flag ("Back"). Each opcode then knows how to
- // handle its own data.
- runtrack []int
- runtrackpos int
-
- // This stack is used to track text positions across different opcodes.
- // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
- // pair. SetMark records the text position before we match a*b. Then
- // CaptureMark uses that position to figure out where the capture starts.
- // Opcodes which push onto this stack are always paired with other opcodes
- // which will pop the value from it later. A successful match should mean
- // that this stack is empty.
- runstack []int
- runstackpos int
-
- // The crawl stack is used to keep track of captures. Every time a group
- // has a capture, we push its group number onto the runcrawl stack. In
- // the case of a balanced match, we push BOTH groups onto the stack.
- runcrawl []int
- runcrawlpos int
-
- runtrackcount int // count of states that may do backtracking
-
- runmatch *Match // result object
-
- ignoreTimeout bool
- timeout time.Duration // timeout in milliseconds (needed for actual)
- timeoutChecksToSkip int
- timeoutAt time.Time
-
- operator syntax.InstOp
- codepos int
- rightToLeft bool
- caseInsensitive bool
- }
-
- // run searches for matches and can continue from the previous match
- //
- // quick is usually false, but can be true to not return matches, just put it in caches
- // textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
- // input is the string to search for our regex pattern
- func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
-
- // get a cached runner
- runner := re.getRunner()
- defer re.putRunner(runner)
-
- if textstart < 0 {
- if re.RightToLeft() {
- textstart = len(input)
- } else {
- textstart = 0
- }
- }
-
- return runner.scan(input, textstart, quick, re.MatchTimeout)
- }
-
- // Scans the string to find the first match. Uses the Match object
- // both to feed text in and as a place to store matches that come out.
- //
- // All the action is in the Go() method. Our
- // responsibility is to load up the class members before
- // calling Go.
- //
- // The optimizer can compute a set of candidate starting characters,
- // and we could use a separate method Skip() that will quickly scan past
- // any characters that we know can't match.
- func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
- r.timeout = timeout
- r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
- r.runtextstart = textstart
- r.runtext = rt
- r.runtextend = len(rt)
-
- stoppos := r.runtextend
- bump := 1
-
- if r.re.RightToLeft() {
- bump = -1
- stoppos = 0
- }
-
- r.runtextpos = textstart
- initted := false
-
- r.startTimeoutWatch()
- for {
- if r.re.Debug() {
- //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
- fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
- fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
- }
-
- if r.findFirstChar() {
- if err := r.checkTimeout(); err != nil {
- return nil, err
- }
-
- if !initted {
- r.initMatch()
- initted = true
- }
-
- if r.re.Debug() {
- fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
- }
-
- if err := r.execute(); err != nil {
- return nil, err
- }
-
- if r.runmatch.matchcount[0] > 0 {
- // We'll return a match even if it touches a previous empty match
- return r.tidyMatch(quick), nil
- }
-
- // reset state for another go
- r.runtrackpos = len(r.runtrack)
- r.runstackpos = len(r.runstack)
- r.runcrawlpos = len(r.runcrawl)
- }
-
- // failure!
-
- if r.runtextpos == stoppos {
- r.tidyMatch(true)
- return nil, nil
- }
-
- // Recognize leading []* and various anchors, and bump on failure accordingly
-
- // r.bump by one and start again
-
- r.runtextpos += bump
- }
- // We never get here
- }
-
- func (r *runner) execute() error {
-
- r.goTo(0)
-
- for {
-
- if r.re.Debug() {
- r.dumpState()
- }
-
- if err := r.checkTimeout(); err != nil {
- return err
- }
-
- switch r.operator {
- case syntax.Stop:
- return nil
-
- case syntax.Nothing:
- break
-
- case syntax.Goto:
- r.goTo(r.operand(0))
- continue
-
- case syntax.Testref:
- if !r.runmatch.isMatched(r.operand(0)) {
- break
- }
- r.advance(1)
- continue
-
- case syntax.Lazybranch:
- r.trackPush1(r.textPos())
- r.advance(1)
- continue
-
- case syntax.Lazybranch | syntax.Back:
- r.trackPop()
- r.textto(r.trackPeek())
- r.goTo(r.operand(0))
- continue
-
- case syntax.Setmark:
- r.stackPush(r.textPos())
- r.trackPush()
- r.advance(0)
- continue
-
- case syntax.Nullmark:
- r.stackPush(-1)
- r.trackPush()
- r.advance(0)
- continue
-
- case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
- r.stackPop()
- break
-
- case syntax.Getmark:
- r.stackPop()
- r.trackPush1(r.stackPeek())
- r.textto(r.stackPeek())
- r.advance(0)
- continue
-
- case syntax.Getmark | syntax.Back:
- r.trackPop()
- r.stackPush(r.trackPeek())
- break
-
- case syntax.Capturemark:
- if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
- break
- }
- r.stackPop()
- if r.operand(1) != -1 {
- r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
- } else {
- r.capture(r.operand(0), r.stackPeek(), r.textPos())
- }
- r.trackPush1(r.stackPeek())
-
- r.advance(2)
-
- continue
-
- case syntax.Capturemark | syntax.Back:
- r.trackPop()
- r.stackPush(r.trackPeek())
- r.uncapture()
- if r.operand(0) != -1 && r.operand(1) != -1 {
- r.uncapture()
- }
-
- break
-
- case syntax.Branchmark:
- r.stackPop()
-
- matched := r.textPos() - r.stackPeek()
-
- if matched != 0 { // Nonempty match -> loop now
- r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
- r.stackPush(r.textPos()) // Make new mark
- r.goTo(r.operand(0)) // Loop
- } else { // Empty match -> straight now
- r.trackPushNeg1(r.stackPeek()) // Save old mark
- r.advance(1) // Straight
- }
- continue
-
- case syntax.Branchmark | syntax.Back:
- r.trackPopN(2)
- r.stackPop()
- r.textto(r.trackPeekN(1)) // Recall position
- r.trackPushNeg1(r.trackPeek()) // Save old mark
- r.advance(1) // Straight
- continue
-
- case syntax.Branchmark | syntax.Back2:
- r.trackPop()
- r.stackPush(r.trackPeek()) // Recall old mark
- break // Backtrack
-
- case syntax.Lazybranchmark:
- {
- // We hit this the first time through a lazy loop and after each
- // successful match of the inner expression. It simply continues
- // on and doesn't loop.
- r.stackPop()
-
- oldMarkPos := r.stackPeek()
-
- if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
- if oldMarkPos != -1 {
- r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
- } else {
- r.trackPush2(r.textPos(), r.textPos())
- }
- } else {
- // The inner expression found an empty match, so we'll go directly to 'back2' if we
- // backtrack. In this case, we need to push something on the stack, since back2 pops.
- // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
- // position associated with that empty match.
- r.stackPush(oldMarkPos)
-
- r.trackPushNeg1(r.stackPeek()) // Save old mark
- }
- r.advance(1)
- continue
- }
-
- case syntax.Lazybranchmark | syntax.Back:
-
- // After the first time, Lazybranchmark | syntax.Back occurs
- // with each iteration of the loop, and therefore with every attempted
- // match of the inner expression. We'll try to match the inner expression,
- // then go back to Lazybranchmark if successful. If the inner expression
- // fails, we go to Lazybranchmark | syntax.Back2
-
- r.trackPopN(2)
- pos := r.trackPeekN(1)
- r.trackPushNeg1(r.trackPeek()) // Save old mark
- r.stackPush(pos) // Make new mark
- r.textto(pos) // Recall position
- r.goTo(r.operand(0)) // Loop
- continue
-
- case syntax.Lazybranchmark | syntax.Back2:
- // The lazy loop has failed. We'll do a true backtrack and
- // start over before the lazy loop.
- r.stackPop()
- r.trackPop()
- r.stackPush(r.trackPeek()) // Recall old mark
- break
-
- case syntax.Setcount:
- r.stackPush2(r.textPos(), r.operand(0))
- r.trackPush()
- r.advance(1)
- continue
-
- case syntax.Nullcount:
- r.stackPush2(-1, r.operand(0))
- r.trackPush()
- r.advance(1)
- continue
-
- case syntax.Setcount | syntax.Back:
- r.stackPopN(2)
- break
-
- case syntax.Nullcount | syntax.Back:
- r.stackPopN(2)
- break
-
- case syntax.Branchcount:
- // r.stackPush:
- // 0: Mark
- // 1: Count
-
- r.stackPopN(2)
- mark := r.stackPeek()
- count := r.stackPeekN(1)
- matched := r.textPos() - mark
-
- if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
- r.trackPushNeg2(mark, count) // Save old mark, count
- r.advance(2) // Straight
- } else { // Nonempty match -> count+loop now
- r.trackPush1(mark) // remember mark
- r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
- r.goTo(r.operand(0)) // Loop
- }
- continue
-
- case syntax.Branchcount | syntax.Back:
- // r.trackPush:
- // 0: Previous mark
- // r.stackPush:
- // 0: Mark (= current pos, discarded)
- // 1: Count
- r.trackPop()
- r.stackPopN(2)
- if r.stackPeekN(1) > 0 { // Positive -> can go straight
- r.textto(r.stackPeek()) // Zap to mark
- r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
- r.advance(2) // Straight
- continue
- }
- r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
- break
-
- case syntax.Branchcount | syntax.Back2:
- // r.trackPush:
- // 0: Previous mark
- // 1: Previous count
- r.trackPopN(2)
- r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
- break // Backtrack
-
- case syntax.Lazybranchcount:
- // r.stackPush:
- // 0: Mark
- // 1: Count
-
- r.stackPopN(2)
- mark := r.stackPeek()
- count := r.stackPeekN(1)
-
- if count < 0 { // Negative count -> loop now
- r.trackPushNeg1(mark) // Save old mark
- r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
- r.goTo(r.operand(0)) // Loop
- } else { // Nonneg count -> straight now
- r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
- r.advance(2) // Straight
- }
- continue
-
- case syntax.Lazybranchcount | syntax.Back:
- // r.trackPush:
- // 0: Mark
- // 1: Count
- // 2: r.textPos
-
- r.trackPopN(3)
- mark := r.trackPeek()
- textpos := r.trackPeekN(2)
-
- if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
- r.textto(textpos) // Recall position
- r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
- r.trackPushNeg1(mark) // Save old mark
- r.goTo(r.operand(0)) // Loop
- continue
- } else { // Max loops or empty match -> backtrack
- r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
- break // backtrack
- }
-
- case syntax.Lazybranchcount | syntax.Back2:
- // r.trackPush:
- // 0: Previous mark
- // r.stackPush:
- // 0: Mark (== current pos, discarded)
- // 1: Count
- r.trackPop()
- r.stackPopN(2)
- r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
- break // Backtrack
-
- case syntax.Setjump:
- r.stackPush2(r.trackpos(), r.crawlpos())
- r.trackPush()
- r.advance(0)
- continue
-
- case syntax.Setjump | syntax.Back:
- r.stackPopN(2)
- break
-
- case syntax.Backjump:
- // r.stackPush:
- // 0: Saved trackpos
- // 1: r.crawlpos
- r.stackPopN(2)
- r.trackto(r.stackPeek())
-
- for r.crawlpos() != r.stackPeekN(1) {
- r.uncapture()
- }
-
- break
-
- case syntax.Forejump:
- // r.stackPush:
- // 0: Saved trackpos
- // 1: r.crawlpos
- r.stackPopN(2)
- r.trackto(r.stackPeek())
- r.trackPush1(r.stackPeekN(1))
- r.advance(0)
- continue
-
- case syntax.Forejump | syntax.Back:
- // r.trackPush:
- // 0: r.crawlpos
- r.trackPop()
-
- for r.crawlpos() != r.trackPeek() {
- r.uncapture()
- }
-
- break
-
- case syntax.Bol:
- if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
- break
- }
- r.advance(0)
- continue
-
- case syntax.Eol:
- if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
- break
- }
- r.advance(0)
- continue
-
- case syntax.Boundary:
- if !r.isBoundary(r.textPos(), 0, r.runtextend) {
- break
- }
- r.advance(0)
- continue
-
- case syntax.Nonboundary:
- if r.isBoundary(r.textPos(), 0, r.runtextend) {
- break
- }
- r.advance(0)
- continue
-
- case syntax.ECMABoundary:
- if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
- break
- }
- r.advance(0)
- continue
-
- case syntax.NonECMABoundary:
- if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
- break
- }
- r.advance(0)
- continue
-
- case syntax.Beginning:
- if r.leftchars() > 0 {
- break
- }
- r.advance(0)
- continue
-
- case syntax.Start:
- if r.textPos() != r.textstart() {
- break
- }
- r.advance(0)
- continue
-
- case syntax.EndZ:
- rchars := r.rightchars()
- if rchars > 1 {
- break
- }
- // RE2 and EcmaScript define $ as "asserts position at the end of the string"
- // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
- if (r.re.options & (RE2 | ECMAScript)) != 0 {
- // RE2/Ecmascript mode
- if rchars > 0 {
- break
- }
- } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
- // "regular" mode
- break
- }
-
- r.advance(0)
- continue
-
- case syntax.End:
- if r.rightchars() > 0 {
- break
- }
- r.advance(0)
- continue
-
- case syntax.One:
- if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
- break
- }
-
- r.advance(1)
- continue
-
- case syntax.Notone:
- if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
- break
- }
-
- r.advance(1)
- continue
-
- case syntax.Set:
-
- if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
- break
- }
-
- r.advance(1)
- continue
-
- case syntax.Multi:
- if !r.runematch(r.code.Strings[r.operand(0)]) {
- break
- }
-
- r.advance(1)
- continue
-
- case syntax.Ref:
-
- capnum := r.operand(0)
-
- if r.runmatch.isMatched(capnum) {
- if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
- break
- }
- } else {
- if (r.re.options & ECMAScript) == 0 {
- break
- }
- }
-
- r.advance(1)
- continue
-
- case syntax.Onerep:
-
- c := r.operand(1)
-
- if r.forwardchars() < c {
- break
- }
-
- ch := rune(r.operand(0))
-
- for c > 0 {
- if r.forwardcharnext() != ch {
- goto BreakBackward
- }
- c--
- }
-
- r.advance(2)
- continue
-
- case syntax.Notonerep:
-
- c := r.operand(1)
-
- if r.forwardchars() < c {
- break
- }
- ch := rune(r.operand(0))
-
- for c > 0 {
- if r.forwardcharnext() == ch {
- goto BreakBackward
- }
- c--
- }
-
- r.advance(2)
- continue
-
- case syntax.Setrep:
-
- c := r.operand(1)
-
- if r.forwardchars() < c {
- break
- }
-
- set := r.code.Sets[r.operand(0)]
-
- for c > 0 {
- if !set.CharIn(r.forwardcharnext()) {
- goto BreakBackward
- }
- c--
- }
-
- r.advance(2)
- continue
-
- case syntax.Oneloop:
-
- c := r.operand(1)
-
- if c > r.forwardchars() {
- c = r.forwardchars()
- }
-
- ch := rune(r.operand(0))
- i := c
-
- for ; i > 0; i-- {
- if r.forwardcharnext() != ch {
- r.backwardnext()
- break
- }
- }
-
- if c > i {
- r.trackPush2(c-i-1, r.textPos()-r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Notoneloop:
-
- c := r.operand(1)
-
- if c > r.forwardchars() {
- c = r.forwardchars()
- }
-
- ch := rune(r.operand(0))
- i := c
-
- for ; i > 0; i-- {
- if r.forwardcharnext() == ch {
- r.backwardnext()
- break
- }
- }
-
- if c > i {
- r.trackPush2(c-i-1, r.textPos()-r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Setloop:
-
- c := r.operand(1)
-
- if c > r.forwardchars() {
- c = r.forwardchars()
- }
-
- set := r.code.Sets[r.operand(0)]
- i := c
-
- for ; i > 0; i-- {
- if !set.CharIn(r.forwardcharnext()) {
- r.backwardnext()
- break
- }
- }
-
- if c > i {
- r.trackPush2(c-i-1, r.textPos()-r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
-
- r.trackPopN(2)
- i := r.trackPeek()
- pos := r.trackPeekN(1)
-
- r.textto(pos)
-
- if i > 0 {
- r.trackPush2(i-1, pos-r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Setloop | syntax.Back:
-
- r.trackPopN(2)
- i := r.trackPeek()
- pos := r.trackPeekN(1)
-
- r.textto(pos)
-
- if i > 0 {
- r.trackPush2(i-1, pos-r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Onelazy, syntax.Notonelazy:
-
- c := r.operand(1)
-
- if c > r.forwardchars() {
- c = r.forwardchars()
- }
-
- if c > 0 {
- r.trackPush2(c-1, r.textPos())
- }
-
- r.advance(2)
- continue
-
- case syntax.Setlazy:
-
- c := r.operand(1)
-
- if c > r.forwardchars() {
- c = r.forwardchars()
- }
-
- if c > 0 {
- r.trackPush2(c-1, r.textPos())
- }
-
- r.advance(2)
- continue
-
- case syntax.Onelazy | syntax.Back:
-
- r.trackPopN(2)
- pos := r.trackPeekN(1)
- r.textto(pos)
-
- if r.forwardcharnext() != rune(r.operand(0)) {
- break
- }
-
- i := r.trackPeek()
-
- if i > 0 {
- r.trackPush2(i-1, pos+r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Notonelazy | syntax.Back:
-
- r.trackPopN(2)
- pos := r.trackPeekN(1)
- r.textto(pos)
-
- if r.forwardcharnext() == rune(r.operand(0)) {
- break
- }
-
- i := r.trackPeek()
-
- if i > 0 {
- r.trackPush2(i-1, pos+r.bump())
- }
-
- r.advance(2)
- continue
-
- case syntax.Setlazy | syntax.Back:
-
- r.trackPopN(2)
- pos := r.trackPeekN(1)
- r.textto(pos)
-
- if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
- break
- }
-
- i := r.trackPeek()
-
- if i > 0 {
- r.trackPush2(i-1, pos+r.bump())
- }
-
- r.advance(2)
- continue
-
- default:
- return errors.New("unknown state in regex runner")
- }
-
- BreakBackward:
- ;
-
- // "break Backward" comes here:
- r.backtrack()
- }
- }
-
- // increase the size of stack and track storage
- func (r *runner) ensureStorage() {
- if r.runstackpos < r.runtrackcount*4 {
- doubleIntSlice(&r.runstack, &r.runstackpos)
- }
- if r.runtrackpos < r.runtrackcount*4 {
- doubleIntSlice(&r.runtrack, &r.runtrackpos)
- }
- }
-
- func doubleIntSlice(s *[]int, pos *int) {
- oldLen := len(*s)
- newS := make([]int, oldLen*2)
-
- copy(newS[oldLen:], *s)
- *pos += oldLen
- *s = newS
- }
-
- // Save a number on the longjump unrolling stack
- func (r *runner) crawl(i int) {
- if r.runcrawlpos == 0 {
- doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
- }
- r.runcrawlpos--
- r.runcrawl[r.runcrawlpos] = i
- }
-
- // Remove a number from the longjump unrolling stack
- func (r *runner) popcrawl() int {
- val := r.runcrawl[r.runcrawlpos]
- r.runcrawlpos++
- return val
- }
-
- // Get the height of the stack
- func (r *runner) crawlpos() int {
- return len(r.runcrawl) - r.runcrawlpos
- }
-
- func (r *runner) advance(i int) {
- r.codepos += (i + 1)
- r.setOperator(r.code.Codes[r.codepos])
- }
-
- func (r *runner) goTo(newpos int) {
- // when branching backward or in place, ensure storage
- if newpos <= r.codepos {
- r.ensureStorage()
- }
-
- r.setOperator(r.code.Codes[newpos])
- r.codepos = newpos
- }
-
- func (r *runner) textto(newpos int) {
- r.runtextpos = newpos
- }
-
- func (r *runner) trackto(newpos int) {
- r.runtrackpos = len(r.runtrack) - newpos
- }
-
- func (r *runner) textstart() int {
- return r.runtextstart
- }
-
- func (r *runner) textPos() int {
- return r.runtextpos
- }
-
- // push onto the backtracking stack
- func (r *runner) trackpos() int {
- return len(r.runtrack) - r.runtrackpos
- }
-
- func (r *runner) trackPush() {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = r.codepos
- }
-
- func (r *runner) trackPush1(I1 int) {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I1
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = r.codepos
- }
-
- func (r *runner) trackPush2(I1, I2 int) {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I1
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I2
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = r.codepos
- }
-
- func (r *runner) trackPush3(I1, I2, I3 int) {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I1
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I2
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I3
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = r.codepos
- }
-
- func (r *runner) trackPushNeg1(I1 int) {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I1
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = -r.codepos
- }
-
- func (r *runner) trackPushNeg2(I1, I2 int) {
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I1
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = I2
- r.runtrackpos--
- r.runtrack[r.runtrackpos] = -r.codepos
- }
-
- func (r *runner) backtrack() {
- newpos := r.runtrack[r.runtrackpos]
- r.runtrackpos++
-
- if r.re.Debug() {
- if newpos < 0 {
- fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
- } else {
- fmt.Printf(" Backtracking to code position %v\n", newpos)
- }
- }
-
- if newpos < 0 {
- newpos = -newpos
- r.setOperator(r.code.Codes[newpos] | syntax.Back2)
- } else {
- r.setOperator(r.code.Codes[newpos] | syntax.Back)
- }
-
- // When branching backward, ensure storage
- if newpos < r.codepos {
- r.ensureStorage()
- }
-
- r.codepos = newpos
- }
-
- func (r *runner) setOperator(op int) {
- r.caseInsensitive = (0 != (op & syntax.Ci))
- r.rightToLeft = (0 != (op & syntax.Rtl))
- r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
- }
-
- func (r *runner) trackPop() {
- r.runtrackpos++
- }
-
- // pop framesize items from the backtracking stack
- func (r *runner) trackPopN(framesize int) {
- r.runtrackpos += framesize
- }
-
- // Technically we are actually peeking at items already popped. So if you want to
- // get and pop the top item from the stack, you do
- // r.trackPop();
- // r.trackPeek();
- func (r *runner) trackPeek() int {
- return r.runtrack[r.runtrackpos-1]
- }
-
- // get the ith element down on the backtracking stack
- func (r *runner) trackPeekN(i int) int {
- return r.runtrack[r.runtrackpos-i-1]
- }
-
- // Push onto the grouping stack
- func (r *runner) stackPush(I1 int) {
- r.runstackpos--
- r.runstack[r.runstackpos] = I1
- }
-
- func (r *runner) stackPush2(I1, I2 int) {
- r.runstackpos--
- r.runstack[r.runstackpos] = I1
- r.runstackpos--
- r.runstack[r.runstackpos] = I2
- }
-
- func (r *runner) stackPop() {
- r.runstackpos++
- }
-
- // pop framesize items from the grouping stack
- func (r *runner) stackPopN(framesize int) {
- r.runstackpos += framesize
- }
-
- // Technically we are actually peeking at items already popped. So if you want to
- // get and pop the top item from the stack, you do
- // r.stackPop();
- // r.stackPeek();
- func (r *runner) stackPeek() int {
- return r.runstack[r.runstackpos-1]
- }
-
- // get the ith element down on the grouping stack
- func (r *runner) stackPeekN(i int) int {
- return r.runstack[r.runstackpos-i-1]
- }
-
- func (r *runner) operand(i int) int {
- return r.code.Codes[r.codepos+i+1]
- }
-
- func (r *runner) leftchars() int {
- return r.runtextpos
- }
-
- func (r *runner) rightchars() int {
- return r.runtextend - r.runtextpos
- }
-
- func (r *runner) bump() int {
- if r.rightToLeft {
- return -1
- }
- return 1
- }
-
- func (r *runner) forwardchars() int {
- if r.rightToLeft {
- return r.runtextpos
- }
- return r.runtextend - r.runtextpos
- }
-
- func (r *runner) forwardcharnext() rune {
- var ch rune
- if r.rightToLeft {
- r.runtextpos--
- ch = r.runtext[r.runtextpos]
- } else {
- ch = r.runtext[r.runtextpos]
- r.runtextpos++
- }
-
- if r.caseInsensitive {
- return unicode.ToLower(ch)
- }
- return ch
- }
-
- func (r *runner) runematch(str []rune) bool {
- var pos int
-
- c := len(str)
- if !r.rightToLeft {
- if r.runtextend-r.runtextpos < c {
- return false
- }
-
- pos = r.runtextpos + c
- } else {
- if r.runtextpos-0 < c {
- return false
- }
-
- pos = r.runtextpos
- }
-
- if !r.caseInsensitive {
- for c != 0 {
- c--
- pos--
- if str[c] != r.runtext[pos] {
- return false
- }
- }
- } else {
- for c != 0 {
- c--
- pos--
- if str[c] != unicode.ToLower(r.runtext[pos]) {
- return false
- }
- }
- }
-
- if !r.rightToLeft {
- pos += len(str)
- }
-
- r.runtextpos = pos
-
- return true
- }
-
- func (r *runner) refmatch(index, len int) bool {
- var c, pos, cmpos int
-
- if !r.rightToLeft {
- if r.runtextend-r.runtextpos < len {
- return false
- }
-
- pos = r.runtextpos + len
- } else {
- if r.runtextpos-0 < len {
- return false
- }
-
- pos = r.runtextpos
- }
- cmpos = index + len
-
- c = len
-
- if !r.caseInsensitive {
- for c != 0 {
- c--
- cmpos--
- pos--
- if r.runtext[cmpos] != r.runtext[pos] {
- return false
- }
-
- }
- } else {
- for c != 0 {
- c--
- cmpos--
- pos--
-
- if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
- return false
- }
- }
- }
-
- if !r.rightToLeft {
- pos += len
- }
-
- r.runtextpos = pos
-
- return true
- }
-
- func (r *runner) backwardnext() {
- if r.rightToLeft {
- r.runtextpos++
- } else {
- r.runtextpos--
- }
- }
-
- func (r *runner) charAt(j int) rune {
- return r.runtext[j]
- }
-
- func (r *runner) findFirstChar() bool {
-
- if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
- if !r.code.RightToLeft {
- if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
- (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
- r.runtextpos = r.runtextend
- return false
- }
- if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
- r.runtextpos = r.runtextend - 1
- } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
- r.runtextpos = r.runtextend
- }
- } else {
- if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
- (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
- (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
- (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
- r.runtextpos = 0
- return false
- }
- if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
- r.runtextpos = 0
- }
- }
-
- if r.code.BmPrefix != nil {
- return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
- }
-
- return true // found a valid start or end anchor
- } else if r.code.BmPrefix != nil {
- r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
-
- if r.runtextpos == -1 {
- if r.code.RightToLeft {
- r.runtextpos = 0
- } else {
- r.runtextpos = r.runtextend
- }
- return false
- }
-
- return true
- } else if r.code.FcPrefix == nil {
- return true
- }
-
- r.rightToLeft = r.code.RightToLeft
- r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
-
- set := r.code.FcPrefix.PrefixSet
- if set.IsSingleton() {
- ch := set.SingletonChar()
- for i := r.forwardchars(); i > 0; i-- {
- if ch == r.forwardcharnext() {
- r.backwardnext()
- return true
- }
- }
- } else {
- for i := r.forwardchars(); i > 0; i-- {
- n := r.forwardcharnext()
- //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
- if set.CharIn(n) {
- r.backwardnext()
- return true
- }
- }
- }
-
- return false
- }
-
- func (r *runner) initMatch() {
- // Use a hashtable'ed Match object if the capture numbers are sparse
-
- if r.runmatch == nil {
- if r.re.caps != nil {
- r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
- } else {
- r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
- }
- } else {
- r.runmatch.reset(r.runtext, r.runtextstart)
- }
-
- // note we test runcrawl, because it is the last one to be allocated
- // If there is an alloc failure in the middle of the three allocations,
- // we may still return to reuse this instance, and we want to behave
- // as if the allocations didn't occur. (we used to test _trackcount != 0)
-
- if r.runcrawl != nil {
- r.runtrackpos = len(r.runtrack)
- r.runstackpos = len(r.runstack)
- r.runcrawlpos = len(r.runcrawl)
- return
- }
-
- r.initTrackCount()
-
- tracksize := r.runtrackcount * 8
- stacksize := r.runtrackcount * 8
-
- if tracksize < 32 {
- tracksize = 32
- }
- if stacksize < 16 {
- stacksize = 16
- }
-
- r.runtrack = make([]int, tracksize)
- r.runtrackpos = tracksize
-
- r.runstack = make([]int, stacksize)
- r.runstackpos = stacksize
-
- r.runcrawl = make([]int, 32)
- r.runcrawlpos = 32
- }
-
- func (r *runner) tidyMatch(quick bool) *Match {
- if !quick {
- match := r.runmatch
-
- r.runmatch = nil
-
- match.tidy(r.runtextpos)
- return match
- } else {
- // send back our match -- it's not leaving the package, so it's safe to not clean it up
- // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
- return r.runmatch
- }
- }
-
- // capture captures a subexpression. Note that the
- // capnum used here has already been mapped to a non-sparse
- // index (by the code generator RegexWriter).
- func (r *runner) capture(capnum, start, end int) {
- if end < start {
- T := end
- end = start
- start = T
- }
-
- r.crawl(capnum)
- r.runmatch.addMatch(capnum, start, end-start)
- }
-
- // transferCapture captures a subexpression. Note that the
- // capnum used here has already been mapped to a non-sparse
- // index (by the code generator RegexWriter).
- func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
- var start2, end2 int
-
- // these are the two intervals that are cancelling each other
-
- if end < start {
- T := end
- end = start
- start = T
- }
-
- start2 = r.runmatch.matchIndex(uncapnum)
- end2 = start2 + r.runmatch.matchLength(uncapnum)
-
- // The new capture gets the innermost defined interval
-
- if start >= end2 {
- end = start
- start = end2
- } else if end <= start2 {
- start = start2
- } else {
- if end > end2 {
- end = end2
- }
- if start2 > start {
- start = start2
- }
- }
-
- r.crawl(uncapnum)
- r.runmatch.balanceMatch(uncapnum)
-
- if capnum != -1 {
- r.crawl(capnum)
- r.runmatch.addMatch(capnum, start, end-start)
- }
- }
-
- // revert the last capture
- func (r *runner) uncapture() {
- capnum := r.popcrawl()
- r.runmatch.removeMatch(capnum)
- }
-
- //debug
-
- func (r *runner) dumpState() {
- back := ""
- if r.operator&syntax.Back != 0 {
- back = " Back"
- }
- if r.operator&syntax.Back2 != 0 {
- back += " Back2"
- }
- fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
- r.textposDescription(),
- r.stackDescription(r.runtrack, r.runtrackpos),
- r.stackDescription(r.runstack, r.runstackpos),
- r.code.OpcodeDescription(r.codepos),
- back)
- }
-
- func (r *runner) stackDescription(a []int, index int) string {
- buf := &bytes.Buffer{}
-
- fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
- if buf.Len() < 8 {
- buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
- }
-
- buf.WriteRune('(')
- for i := index; i < len(a); i++ {
- if i > index {
- buf.WriteRune(' ')
- }
-
- buf.WriteString(strconv.Itoa(a[i]))
- }
-
- buf.WriteRune(')')
-
- return buf.String()
- }
-
- func (r *runner) textposDescription() string {
- buf := &bytes.Buffer{}
-
- buf.WriteString(strconv.Itoa(r.runtextpos))
-
- if buf.Len() < 8 {
- buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
- }
-
- if r.runtextpos > 0 {
- buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
- } else {
- buf.WriteRune('^')
- }
-
- buf.WriteRune('>')
-
- for i := r.runtextpos; i < r.runtextend; i++ {
- buf.WriteString(syntax.CharDescription(r.runtext[i]))
- }
- if buf.Len() >= 64 {
- buf.Truncate(61)
- buf.WriteString("...")
- } else {
- buf.WriteRune('$')
- }
-
- return buf.String()
- }
-
- // decide whether the pos
- // at the specified index is a boundary or not. It's just not worth
- // emitting inline code for this logic.
- func (r *runner) isBoundary(index, startpos, endpos int) bool {
- return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
- (index < endpos && syntax.IsWordChar(r.runtext[index]))
- }
-
- func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
- return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
- (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
- }
-
- // this seems like a comment to justify randomly picking 1000 :-P
- // We have determined this value in a series of experiments where x86 retail
- // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
- // of TimeoutCheckFrequency did not tend to increase performance; smaller values
- // of TimeoutCheckFrequency tended to slow down the execution.
- const timeoutCheckFrequency int = 1000
-
- func (r *runner) startTimeoutWatch() {
- if r.ignoreTimeout {
- return
- }
-
- r.timeoutChecksToSkip = timeoutCheckFrequency
- r.timeoutAt = time.Now().Add(r.timeout)
- }
-
- func (r *runner) checkTimeout() error {
- if r.ignoreTimeout {
- return nil
- }
- r.timeoutChecksToSkip--
- if r.timeoutChecksToSkip != 0 {
- return nil
- }
-
- r.timeoutChecksToSkip = timeoutCheckFrequency
- return r.doCheckTimeout()
- }
-
- func (r *runner) doCheckTimeout() error {
- current := time.Now()
-
- if current.Before(r.timeoutAt) {
- return nil
- }
-
- if r.re.Debug() {
- //Debug.WriteLine("")
- //Debug.WriteLine("RegEx match timeout occurred!")
- //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
- //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
- //Debug.WriteLine("Search pattern: " + _runregex._pattern)
- //Debug.WriteLine("Input: " + r.runtext)
- //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
- }
-
- return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
- }
-
- func (r *runner) initTrackCount() {
- r.runtrackcount = r.code.TrackCount
- }
-
- // getRunner returns a run to use for matching re.
- // It uses the re's runner cache if possible, to avoid
- // unnecessary allocation.
- func (re *Regexp) getRunner() *runner {
- re.muRun.Lock()
- if n := len(re.runner); n > 0 {
- z := re.runner[n-1]
- re.runner = re.runner[:n-1]
- re.muRun.Unlock()
- return z
- }
- re.muRun.Unlock()
- z := &runner{
- re: re,
- code: re.code,
- }
- return z
- }
-
- // putRunner returns a runner to the re's cache.
- // There is no attempt to limit the size of the cache, so it will
- // grow to the maximum number of simultaneous matches
- // run using re. (The cache empties when re gets garbage collected.)
- func (re *Regexp) putRunner(r *runner) {
- re.muRun.Lock()
- re.runner = append(re.runner, r)
- re.muRun.Unlock()
- }
|