You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

runner.go 35KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634
  1. package regexp2
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "math"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "unicode"
  11. "github.com/dlclark/regexp2/syntax"
  12. )
  13. type runner struct {
  14. re *Regexp
  15. code *syntax.Code
  16. runtextstart int // starting point for search
  17. runtext []rune // text to search
  18. runtextpos int // current position in text
  19. runtextend int
  20. // The backtracking stack. Opcodes use this to store data regarding
  21. // what they have matched and where to backtrack to. Each "frame" on
  22. // the stack takes the form of [CodePosition Data1 Data2...], where
  23. // CodePosition is the position of the current opcode and
  24. // the data values are all optional. The CodePosition can be negative, and
  25. // these values (also called "back2") are used by the BranchMark family of opcodes
  26. // to indicate whether they are backtracking after a successful or failed
  27. // match.
  28. // When we backtrack, we pop the CodePosition off the stack, set the current
  29. // instruction pointer to that code position, and mark the opcode
  30. // with a backtracking flag ("Back"). Each opcode then knows how to
  31. // handle its own data.
  32. runtrack []int
  33. runtrackpos int
  34. // This stack is used to track text positions across different opcodes.
  35. // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
  36. // pair. SetMark records the text position before we match a*b. Then
  37. // CaptureMark uses that position to figure out where the capture starts.
  38. // Opcodes which push onto this stack are always paired with other opcodes
  39. // which will pop the value from it later. A successful match should mean
  40. // that this stack is empty.
  41. runstack []int
  42. runstackpos int
  43. // The crawl stack is used to keep track of captures. Every time a group
  44. // has a capture, we push its group number onto the runcrawl stack. In
  45. // the case of a balanced match, we push BOTH groups onto the stack.
  46. runcrawl []int
  47. runcrawlpos int
  48. runtrackcount int // count of states that may do backtracking
  49. runmatch *Match // result object
  50. ignoreTimeout bool
  51. timeout time.Duration // timeout in milliseconds (needed for actual)
  52. timeoutChecksToSkip int
  53. timeoutAt time.Time
  54. operator syntax.InstOp
  55. codepos int
  56. rightToLeft bool
  57. caseInsensitive bool
  58. }
  59. // run searches for matches and can continue from the previous match
  60. //
  61. // quick is usually false, but can be true to not return matches, just put it in caches
  62. // textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
  63. // input is the string to search for our regex pattern
  64. func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
  65. // get a cached runner
  66. runner := re.getRunner()
  67. defer re.putRunner(runner)
  68. if textstart < 0 {
  69. if re.RightToLeft() {
  70. textstart = len(input)
  71. } else {
  72. textstart = 0
  73. }
  74. }
  75. return runner.scan(input, textstart, quick, re.MatchTimeout)
  76. }
  77. // Scans the string to find the first match. Uses the Match object
  78. // both to feed text in and as a place to store matches that come out.
  79. //
  80. // All the action is in the Go() method. Our
  81. // responsibility is to load up the class members before
  82. // calling Go.
  83. //
  84. // The optimizer can compute a set of candidate starting characters,
  85. // and we could use a separate method Skip() that will quickly scan past
  86. // any characters that we know can't match.
  87. func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
  88. r.timeout = timeout
  89. r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
  90. r.runtextstart = textstart
  91. r.runtext = rt
  92. r.runtextend = len(rt)
  93. stoppos := r.runtextend
  94. bump := 1
  95. if r.re.RightToLeft() {
  96. bump = -1
  97. stoppos = 0
  98. }
  99. r.runtextpos = textstart
  100. initted := false
  101. r.startTimeoutWatch()
  102. for {
  103. if r.re.Debug() {
  104. //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
  105. fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
  106. fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
  107. }
  108. if r.findFirstChar() {
  109. if err := r.checkTimeout(); err != nil {
  110. return nil, err
  111. }
  112. if !initted {
  113. r.initMatch()
  114. initted = true
  115. }
  116. if r.re.Debug() {
  117. fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
  118. }
  119. if err := r.execute(); err != nil {
  120. return nil, err
  121. }
  122. if r.runmatch.matchcount[0] > 0 {
  123. // We'll return a match even if it touches a previous empty match
  124. return r.tidyMatch(quick), nil
  125. }
  126. // reset state for another go
  127. r.runtrackpos = len(r.runtrack)
  128. r.runstackpos = len(r.runstack)
  129. r.runcrawlpos = len(r.runcrawl)
  130. }
  131. // failure!
  132. if r.runtextpos == stoppos {
  133. r.tidyMatch(true)
  134. return nil, nil
  135. }
  136. // Recognize leading []* and various anchors, and bump on failure accordingly
  137. // r.bump by one and start again
  138. r.runtextpos += bump
  139. }
  140. // We never get here
  141. }
  142. func (r *runner) execute() error {
  143. r.goTo(0)
  144. for {
  145. if r.re.Debug() {
  146. r.dumpState()
  147. }
  148. if err := r.checkTimeout(); err != nil {
  149. return err
  150. }
  151. switch r.operator {
  152. case syntax.Stop:
  153. return nil
  154. case syntax.Nothing:
  155. break
  156. case syntax.Goto:
  157. r.goTo(r.operand(0))
  158. continue
  159. case syntax.Testref:
  160. if !r.runmatch.isMatched(r.operand(0)) {
  161. break
  162. }
  163. r.advance(1)
  164. continue
  165. case syntax.Lazybranch:
  166. r.trackPush1(r.textPos())
  167. r.advance(1)
  168. continue
  169. case syntax.Lazybranch | syntax.Back:
  170. r.trackPop()
  171. r.textto(r.trackPeek())
  172. r.goTo(r.operand(0))
  173. continue
  174. case syntax.Setmark:
  175. r.stackPush(r.textPos())
  176. r.trackPush()
  177. r.advance(0)
  178. continue
  179. case syntax.Nullmark:
  180. r.stackPush(-1)
  181. r.trackPush()
  182. r.advance(0)
  183. continue
  184. case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
  185. r.stackPop()
  186. break
  187. case syntax.Getmark:
  188. r.stackPop()
  189. r.trackPush1(r.stackPeek())
  190. r.textto(r.stackPeek())
  191. r.advance(0)
  192. continue
  193. case syntax.Getmark | syntax.Back:
  194. r.trackPop()
  195. r.stackPush(r.trackPeek())
  196. break
  197. case syntax.Capturemark:
  198. if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
  199. break
  200. }
  201. r.stackPop()
  202. if r.operand(1) != -1 {
  203. r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
  204. } else {
  205. r.capture(r.operand(0), r.stackPeek(), r.textPos())
  206. }
  207. r.trackPush1(r.stackPeek())
  208. r.advance(2)
  209. continue
  210. case syntax.Capturemark | syntax.Back:
  211. r.trackPop()
  212. r.stackPush(r.trackPeek())
  213. r.uncapture()
  214. if r.operand(0) != -1 && r.operand(1) != -1 {
  215. r.uncapture()
  216. }
  217. break
  218. case syntax.Branchmark:
  219. r.stackPop()
  220. matched := r.textPos() - r.stackPeek()
  221. if matched != 0 { // Nonempty match -> loop now
  222. r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
  223. r.stackPush(r.textPos()) // Make new mark
  224. r.goTo(r.operand(0)) // Loop
  225. } else { // Empty match -> straight now
  226. r.trackPushNeg1(r.stackPeek()) // Save old mark
  227. r.advance(1) // Straight
  228. }
  229. continue
  230. case syntax.Branchmark | syntax.Back:
  231. r.trackPopN(2)
  232. r.stackPop()
  233. r.textto(r.trackPeekN(1)) // Recall position
  234. r.trackPushNeg1(r.trackPeek()) // Save old mark
  235. r.advance(1) // Straight
  236. continue
  237. case syntax.Branchmark | syntax.Back2:
  238. r.trackPop()
  239. r.stackPush(r.trackPeek()) // Recall old mark
  240. break // Backtrack
  241. case syntax.Lazybranchmark:
  242. {
  243. // We hit this the first time through a lazy loop and after each
  244. // successful match of the inner expression. It simply continues
  245. // on and doesn't loop.
  246. r.stackPop()
  247. oldMarkPos := r.stackPeek()
  248. if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
  249. if oldMarkPos != -1 {
  250. r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
  251. } else {
  252. r.trackPush2(r.textPos(), r.textPos())
  253. }
  254. } else {
  255. // The inner expression found an empty match, so we'll go directly to 'back2' if we
  256. // backtrack. In this case, we need to push something on the stack, since back2 pops.
  257. // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
  258. // position associated with that empty match.
  259. r.stackPush(oldMarkPos)
  260. r.trackPushNeg1(r.stackPeek()) // Save old mark
  261. }
  262. r.advance(1)
  263. continue
  264. }
  265. case syntax.Lazybranchmark | syntax.Back:
  266. // After the first time, Lazybranchmark | syntax.Back occurs
  267. // with each iteration of the loop, and therefore with every attempted
  268. // match of the inner expression. We'll try to match the inner expression,
  269. // then go back to Lazybranchmark if successful. If the inner expression
  270. // fails, we go to Lazybranchmark | syntax.Back2
  271. r.trackPopN(2)
  272. pos := r.trackPeekN(1)
  273. r.trackPushNeg1(r.trackPeek()) // Save old mark
  274. r.stackPush(pos) // Make new mark
  275. r.textto(pos) // Recall position
  276. r.goTo(r.operand(0)) // Loop
  277. continue
  278. case syntax.Lazybranchmark | syntax.Back2:
  279. // The lazy loop has failed. We'll do a true backtrack and
  280. // start over before the lazy loop.
  281. r.stackPop()
  282. r.trackPop()
  283. r.stackPush(r.trackPeek()) // Recall old mark
  284. break
  285. case syntax.Setcount:
  286. r.stackPush2(r.textPos(), r.operand(0))
  287. r.trackPush()
  288. r.advance(1)
  289. continue
  290. case syntax.Nullcount:
  291. r.stackPush2(-1, r.operand(0))
  292. r.trackPush()
  293. r.advance(1)
  294. continue
  295. case syntax.Setcount | syntax.Back:
  296. r.stackPopN(2)
  297. break
  298. case syntax.Nullcount | syntax.Back:
  299. r.stackPopN(2)
  300. break
  301. case syntax.Branchcount:
  302. // r.stackPush:
  303. // 0: Mark
  304. // 1: Count
  305. r.stackPopN(2)
  306. mark := r.stackPeek()
  307. count := r.stackPeekN(1)
  308. matched := r.textPos() - mark
  309. if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
  310. r.trackPushNeg2(mark, count) // Save old mark, count
  311. r.advance(2) // Straight
  312. } else { // Nonempty match -> count+loop now
  313. r.trackPush1(mark) // remember mark
  314. r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
  315. r.goTo(r.operand(0)) // Loop
  316. }
  317. continue
  318. case syntax.Branchcount | syntax.Back:
  319. // r.trackPush:
  320. // 0: Previous mark
  321. // r.stackPush:
  322. // 0: Mark (= current pos, discarded)
  323. // 1: Count
  324. r.trackPop()
  325. r.stackPopN(2)
  326. if r.stackPeekN(1) > 0 { // Positive -> can go straight
  327. r.textto(r.stackPeek()) // Zap to mark
  328. r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
  329. r.advance(2) // Straight
  330. continue
  331. }
  332. r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
  333. break
  334. case syntax.Branchcount | syntax.Back2:
  335. // r.trackPush:
  336. // 0: Previous mark
  337. // 1: Previous count
  338. r.trackPopN(2)
  339. r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
  340. break // Backtrack
  341. case syntax.Lazybranchcount:
  342. // r.stackPush:
  343. // 0: Mark
  344. // 1: Count
  345. r.stackPopN(2)
  346. mark := r.stackPeek()
  347. count := r.stackPeekN(1)
  348. if count < 0 { // Negative count -> loop now
  349. r.trackPushNeg1(mark) // Save old mark
  350. r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
  351. r.goTo(r.operand(0)) // Loop
  352. } else { // Nonneg count -> straight now
  353. r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
  354. r.advance(2) // Straight
  355. }
  356. continue
  357. case syntax.Lazybranchcount | syntax.Back:
  358. // r.trackPush:
  359. // 0: Mark
  360. // 1: Count
  361. // 2: r.textPos
  362. r.trackPopN(3)
  363. mark := r.trackPeek()
  364. textpos := r.trackPeekN(2)
  365. if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
  366. r.textto(textpos) // Recall position
  367. r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
  368. r.trackPushNeg1(mark) // Save old mark
  369. r.goTo(r.operand(0)) // Loop
  370. continue
  371. } else { // Max loops or empty match -> backtrack
  372. r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
  373. break // backtrack
  374. }
  375. case syntax.Lazybranchcount | syntax.Back2:
  376. // r.trackPush:
  377. // 0: Previous mark
  378. // r.stackPush:
  379. // 0: Mark (== current pos, discarded)
  380. // 1: Count
  381. r.trackPop()
  382. r.stackPopN(2)
  383. r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
  384. break // Backtrack
  385. case syntax.Setjump:
  386. r.stackPush2(r.trackpos(), r.crawlpos())
  387. r.trackPush()
  388. r.advance(0)
  389. continue
  390. case syntax.Setjump | syntax.Back:
  391. r.stackPopN(2)
  392. break
  393. case syntax.Backjump:
  394. // r.stackPush:
  395. // 0: Saved trackpos
  396. // 1: r.crawlpos
  397. r.stackPopN(2)
  398. r.trackto(r.stackPeek())
  399. for r.crawlpos() != r.stackPeekN(1) {
  400. r.uncapture()
  401. }
  402. break
  403. case syntax.Forejump:
  404. // r.stackPush:
  405. // 0: Saved trackpos
  406. // 1: r.crawlpos
  407. r.stackPopN(2)
  408. r.trackto(r.stackPeek())
  409. r.trackPush1(r.stackPeekN(1))
  410. r.advance(0)
  411. continue
  412. case syntax.Forejump | syntax.Back:
  413. // r.trackPush:
  414. // 0: r.crawlpos
  415. r.trackPop()
  416. for r.crawlpos() != r.trackPeek() {
  417. r.uncapture()
  418. }
  419. break
  420. case syntax.Bol:
  421. if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
  422. break
  423. }
  424. r.advance(0)
  425. continue
  426. case syntax.Eol:
  427. if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
  428. break
  429. }
  430. r.advance(0)
  431. continue
  432. case syntax.Boundary:
  433. if !r.isBoundary(r.textPos(), 0, r.runtextend) {
  434. break
  435. }
  436. r.advance(0)
  437. continue
  438. case syntax.Nonboundary:
  439. if r.isBoundary(r.textPos(), 0, r.runtextend) {
  440. break
  441. }
  442. r.advance(0)
  443. continue
  444. case syntax.ECMABoundary:
  445. if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
  446. break
  447. }
  448. r.advance(0)
  449. continue
  450. case syntax.NonECMABoundary:
  451. if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
  452. break
  453. }
  454. r.advance(0)
  455. continue
  456. case syntax.Beginning:
  457. if r.leftchars() > 0 {
  458. break
  459. }
  460. r.advance(0)
  461. continue
  462. case syntax.Start:
  463. if r.textPos() != r.textstart() {
  464. break
  465. }
  466. r.advance(0)
  467. continue
  468. case syntax.EndZ:
  469. rchars := r.rightchars()
  470. if rchars > 1 {
  471. break
  472. }
  473. // RE2 and EcmaScript define $ as "asserts position at the end of the string"
  474. // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
  475. if (r.re.options & (RE2 | ECMAScript)) != 0 {
  476. // RE2/Ecmascript mode
  477. if rchars > 0 {
  478. break
  479. }
  480. } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
  481. // "regular" mode
  482. break
  483. }
  484. r.advance(0)
  485. continue
  486. case syntax.End:
  487. if r.rightchars() > 0 {
  488. break
  489. }
  490. r.advance(0)
  491. continue
  492. case syntax.One:
  493. if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
  494. break
  495. }
  496. r.advance(1)
  497. continue
  498. case syntax.Notone:
  499. if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
  500. break
  501. }
  502. r.advance(1)
  503. continue
  504. case syntax.Set:
  505. if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
  506. break
  507. }
  508. r.advance(1)
  509. continue
  510. case syntax.Multi:
  511. if !r.runematch(r.code.Strings[r.operand(0)]) {
  512. break
  513. }
  514. r.advance(1)
  515. continue
  516. case syntax.Ref:
  517. capnum := r.operand(0)
  518. if r.runmatch.isMatched(capnum) {
  519. if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
  520. break
  521. }
  522. } else {
  523. if (r.re.options & ECMAScript) == 0 {
  524. break
  525. }
  526. }
  527. r.advance(1)
  528. continue
  529. case syntax.Onerep:
  530. c := r.operand(1)
  531. if r.forwardchars() < c {
  532. break
  533. }
  534. ch := rune(r.operand(0))
  535. for c > 0 {
  536. if r.forwardcharnext() != ch {
  537. goto BreakBackward
  538. }
  539. c--
  540. }
  541. r.advance(2)
  542. continue
  543. case syntax.Notonerep:
  544. c := r.operand(1)
  545. if r.forwardchars() < c {
  546. break
  547. }
  548. ch := rune(r.operand(0))
  549. for c > 0 {
  550. if r.forwardcharnext() == ch {
  551. goto BreakBackward
  552. }
  553. c--
  554. }
  555. r.advance(2)
  556. continue
  557. case syntax.Setrep:
  558. c := r.operand(1)
  559. if r.forwardchars() < c {
  560. break
  561. }
  562. set := r.code.Sets[r.operand(0)]
  563. for c > 0 {
  564. if !set.CharIn(r.forwardcharnext()) {
  565. goto BreakBackward
  566. }
  567. c--
  568. }
  569. r.advance(2)
  570. continue
  571. case syntax.Oneloop:
  572. c := r.operand(1)
  573. if c > r.forwardchars() {
  574. c = r.forwardchars()
  575. }
  576. ch := rune(r.operand(0))
  577. i := c
  578. for ; i > 0; i-- {
  579. if r.forwardcharnext() != ch {
  580. r.backwardnext()
  581. break
  582. }
  583. }
  584. if c > i {
  585. r.trackPush2(c-i-1, r.textPos()-r.bump())
  586. }
  587. r.advance(2)
  588. continue
  589. case syntax.Notoneloop:
  590. c := r.operand(1)
  591. if c > r.forwardchars() {
  592. c = r.forwardchars()
  593. }
  594. ch := rune(r.operand(0))
  595. i := c
  596. for ; i > 0; i-- {
  597. if r.forwardcharnext() == ch {
  598. r.backwardnext()
  599. break
  600. }
  601. }
  602. if c > i {
  603. r.trackPush2(c-i-1, r.textPos()-r.bump())
  604. }
  605. r.advance(2)
  606. continue
  607. case syntax.Setloop:
  608. c := r.operand(1)
  609. if c > r.forwardchars() {
  610. c = r.forwardchars()
  611. }
  612. set := r.code.Sets[r.operand(0)]
  613. i := c
  614. for ; i > 0; i-- {
  615. if !set.CharIn(r.forwardcharnext()) {
  616. r.backwardnext()
  617. break
  618. }
  619. }
  620. if c > i {
  621. r.trackPush2(c-i-1, r.textPos()-r.bump())
  622. }
  623. r.advance(2)
  624. continue
  625. case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
  626. r.trackPopN(2)
  627. i := r.trackPeek()
  628. pos := r.trackPeekN(1)
  629. r.textto(pos)
  630. if i > 0 {
  631. r.trackPush2(i-1, pos-r.bump())
  632. }
  633. r.advance(2)
  634. continue
  635. case syntax.Setloop | syntax.Back:
  636. r.trackPopN(2)
  637. i := r.trackPeek()
  638. pos := r.trackPeekN(1)
  639. r.textto(pos)
  640. if i > 0 {
  641. r.trackPush2(i-1, pos-r.bump())
  642. }
  643. r.advance(2)
  644. continue
  645. case syntax.Onelazy, syntax.Notonelazy:
  646. c := r.operand(1)
  647. if c > r.forwardchars() {
  648. c = r.forwardchars()
  649. }
  650. if c > 0 {
  651. r.trackPush2(c-1, r.textPos())
  652. }
  653. r.advance(2)
  654. continue
  655. case syntax.Setlazy:
  656. c := r.operand(1)
  657. if c > r.forwardchars() {
  658. c = r.forwardchars()
  659. }
  660. if c > 0 {
  661. r.trackPush2(c-1, r.textPos())
  662. }
  663. r.advance(2)
  664. continue
  665. case syntax.Onelazy | syntax.Back:
  666. r.trackPopN(2)
  667. pos := r.trackPeekN(1)
  668. r.textto(pos)
  669. if r.forwardcharnext() != rune(r.operand(0)) {
  670. break
  671. }
  672. i := r.trackPeek()
  673. if i > 0 {
  674. r.trackPush2(i-1, pos+r.bump())
  675. }
  676. r.advance(2)
  677. continue
  678. case syntax.Notonelazy | syntax.Back:
  679. r.trackPopN(2)
  680. pos := r.trackPeekN(1)
  681. r.textto(pos)
  682. if r.forwardcharnext() == rune(r.operand(0)) {
  683. break
  684. }
  685. i := r.trackPeek()
  686. if i > 0 {
  687. r.trackPush2(i-1, pos+r.bump())
  688. }
  689. r.advance(2)
  690. continue
  691. case syntax.Setlazy | syntax.Back:
  692. r.trackPopN(2)
  693. pos := r.trackPeekN(1)
  694. r.textto(pos)
  695. if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
  696. break
  697. }
  698. i := r.trackPeek()
  699. if i > 0 {
  700. r.trackPush2(i-1, pos+r.bump())
  701. }
  702. r.advance(2)
  703. continue
  704. default:
  705. return errors.New("unknown state in regex runner")
  706. }
  707. BreakBackward:
  708. ;
  709. // "break Backward" comes here:
  710. r.backtrack()
  711. }
  712. }
  713. // increase the size of stack and track storage
  714. func (r *runner) ensureStorage() {
  715. if r.runstackpos < r.runtrackcount*4 {
  716. doubleIntSlice(&r.runstack, &r.runstackpos)
  717. }
  718. if r.runtrackpos < r.runtrackcount*4 {
  719. doubleIntSlice(&r.runtrack, &r.runtrackpos)
  720. }
  721. }
  722. func doubleIntSlice(s *[]int, pos *int) {
  723. oldLen := len(*s)
  724. newS := make([]int, oldLen*2)
  725. copy(newS[oldLen:], *s)
  726. *pos += oldLen
  727. *s = newS
  728. }
  729. // Save a number on the longjump unrolling stack
  730. func (r *runner) crawl(i int) {
  731. if r.runcrawlpos == 0 {
  732. doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
  733. }
  734. r.runcrawlpos--
  735. r.runcrawl[r.runcrawlpos] = i
  736. }
  737. // Remove a number from the longjump unrolling stack
  738. func (r *runner) popcrawl() int {
  739. val := r.runcrawl[r.runcrawlpos]
  740. r.runcrawlpos++
  741. return val
  742. }
  743. // Get the height of the stack
  744. func (r *runner) crawlpos() int {
  745. return len(r.runcrawl) - r.runcrawlpos
  746. }
  747. func (r *runner) advance(i int) {
  748. r.codepos += (i + 1)
  749. r.setOperator(r.code.Codes[r.codepos])
  750. }
  751. func (r *runner) goTo(newpos int) {
  752. // when branching backward or in place, ensure storage
  753. if newpos <= r.codepos {
  754. r.ensureStorage()
  755. }
  756. r.setOperator(r.code.Codes[newpos])
  757. r.codepos = newpos
  758. }
  759. func (r *runner) textto(newpos int) {
  760. r.runtextpos = newpos
  761. }
  762. func (r *runner) trackto(newpos int) {
  763. r.runtrackpos = len(r.runtrack) - newpos
  764. }
  765. func (r *runner) textstart() int {
  766. return r.runtextstart
  767. }
  768. func (r *runner) textPos() int {
  769. return r.runtextpos
  770. }
  771. // push onto the backtracking stack
  772. func (r *runner) trackpos() int {
  773. return len(r.runtrack) - r.runtrackpos
  774. }
  775. func (r *runner) trackPush() {
  776. r.runtrackpos--
  777. r.runtrack[r.runtrackpos] = r.codepos
  778. }
  779. func (r *runner) trackPush1(I1 int) {
  780. r.runtrackpos--
  781. r.runtrack[r.runtrackpos] = I1
  782. r.runtrackpos--
  783. r.runtrack[r.runtrackpos] = r.codepos
  784. }
  785. func (r *runner) trackPush2(I1, I2 int) {
  786. r.runtrackpos--
  787. r.runtrack[r.runtrackpos] = I1
  788. r.runtrackpos--
  789. r.runtrack[r.runtrackpos] = I2
  790. r.runtrackpos--
  791. r.runtrack[r.runtrackpos] = r.codepos
  792. }
  793. func (r *runner) trackPush3(I1, I2, I3 int) {
  794. r.runtrackpos--
  795. r.runtrack[r.runtrackpos] = I1
  796. r.runtrackpos--
  797. r.runtrack[r.runtrackpos] = I2
  798. r.runtrackpos--
  799. r.runtrack[r.runtrackpos] = I3
  800. r.runtrackpos--
  801. r.runtrack[r.runtrackpos] = r.codepos
  802. }
  803. func (r *runner) trackPushNeg1(I1 int) {
  804. r.runtrackpos--
  805. r.runtrack[r.runtrackpos] = I1
  806. r.runtrackpos--
  807. r.runtrack[r.runtrackpos] = -r.codepos
  808. }
  809. func (r *runner) trackPushNeg2(I1, I2 int) {
  810. r.runtrackpos--
  811. r.runtrack[r.runtrackpos] = I1
  812. r.runtrackpos--
  813. r.runtrack[r.runtrackpos] = I2
  814. r.runtrackpos--
  815. r.runtrack[r.runtrackpos] = -r.codepos
  816. }
  817. func (r *runner) backtrack() {
  818. newpos := r.runtrack[r.runtrackpos]
  819. r.runtrackpos++
  820. if r.re.Debug() {
  821. if newpos < 0 {
  822. fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
  823. } else {
  824. fmt.Printf(" Backtracking to code position %v\n", newpos)
  825. }
  826. }
  827. if newpos < 0 {
  828. newpos = -newpos
  829. r.setOperator(r.code.Codes[newpos] | syntax.Back2)
  830. } else {
  831. r.setOperator(r.code.Codes[newpos] | syntax.Back)
  832. }
  833. // When branching backward, ensure storage
  834. if newpos < r.codepos {
  835. r.ensureStorage()
  836. }
  837. r.codepos = newpos
  838. }
  839. func (r *runner) setOperator(op int) {
  840. r.caseInsensitive = (0 != (op & syntax.Ci))
  841. r.rightToLeft = (0 != (op & syntax.Rtl))
  842. r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
  843. }
  844. func (r *runner) trackPop() {
  845. r.runtrackpos++
  846. }
  847. // pop framesize items from the backtracking stack
  848. func (r *runner) trackPopN(framesize int) {
  849. r.runtrackpos += framesize
  850. }
  851. // Technically we are actually peeking at items already popped. So if you want to
  852. // get and pop the top item from the stack, you do
  853. // r.trackPop();
  854. // r.trackPeek();
  855. func (r *runner) trackPeek() int {
  856. return r.runtrack[r.runtrackpos-1]
  857. }
  858. // get the ith element down on the backtracking stack
  859. func (r *runner) trackPeekN(i int) int {
  860. return r.runtrack[r.runtrackpos-i-1]
  861. }
  862. // Push onto the grouping stack
  863. func (r *runner) stackPush(I1 int) {
  864. r.runstackpos--
  865. r.runstack[r.runstackpos] = I1
  866. }
  867. func (r *runner) stackPush2(I1, I2 int) {
  868. r.runstackpos--
  869. r.runstack[r.runstackpos] = I1
  870. r.runstackpos--
  871. r.runstack[r.runstackpos] = I2
  872. }
  873. func (r *runner) stackPop() {
  874. r.runstackpos++
  875. }
  876. // pop framesize items from the grouping stack
  877. func (r *runner) stackPopN(framesize int) {
  878. r.runstackpos += framesize
  879. }
  880. // Technically we are actually peeking at items already popped. So if you want to
  881. // get and pop the top item from the stack, you do
  882. // r.stackPop();
  883. // r.stackPeek();
  884. func (r *runner) stackPeek() int {
  885. return r.runstack[r.runstackpos-1]
  886. }
  887. // get the ith element down on the grouping stack
  888. func (r *runner) stackPeekN(i int) int {
  889. return r.runstack[r.runstackpos-i-1]
  890. }
  891. func (r *runner) operand(i int) int {
  892. return r.code.Codes[r.codepos+i+1]
  893. }
  894. func (r *runner) leftchars() int {
  895. return r.runtextpos
  896. }
  897. func (r *runner) rightchars() int {
  898. return r.runtextend - r.runtextpos
  899. }
  900. func (r *runner) bump() int {
  901. if r.rightToLeft {
  902. return -1
  903. }
  904. return 1
  905. }
  906. func (r *runner) forwardchars() int {
  907. if r.rightToLeft {
  908. return r.runtextpos
  909. }
  910. return r.runtextend - r.runtextpos
  911. }
  912. func (r *runner) forwardcharnext() rune {
  913. var ch rune
  914. if r.rightToLeft {
  915. r.runtextpos--
  916. ch = r.runtext[r.runtextpos]
  917. } else {
  918. ch = r.runtext[r.runtextpos]
  919. r.runtextpos++
  920. }
  921. if r.caseInsensitive {
  922. return unicode.ToLower(ch)
  923. }
  924. return ch
  925. }
  926. func (r *runner) runematch(str []rune) bool {
  927. var pos int
  928. c := len(str)
  929. if !r.rightToLeft {
  930. if r.runtextend-r.runtextpos < c {
  931. return false
  932. }
  933. pos = r.runtextpos + c
  934. } else {
  935. if r.runtextpos-0 < c {
  936. return false
  937. }
  938. pos = r.runtextpos
  939. }
  940. if !r.caseInsensitive {
  941. for c != 0 {
  942. c--
  943. pos--
  944. if str[c] != r.runtext[pos] {
  945. return false
  946. }
  947. }
  948. } else {
  949. for c != 0 {
  950. c--
  951. pos--
  952. if str[c] != unicode.ToLower(r.runtext[pos]) {
  953. return false
  954. }
  955. }
  956. }
  957. if !r.rightToLeft {
  958. pos += len(str)
  959. }
  960. r.runtextpos = pos
  961. return true
  962. }
  963. func (r *runner) refmatch(index, len int) bool {
  964. var c, pos, cmpos int
  965. if !r.rightToLeft {
  966. if r.runtextend-r.runtextpos < len {
  967. return false
  968. }
  969. pos = r.runtextpos + len
  970. } else {
  971. if r.runtextpos-0 < len {
  972. return false
  973. }
  974. pos = r.runtextpos
  975. }
  976. cmpos = index + len
  977. c = len
  978. if !r.caseInsensitive {
  979. for c != 0 {
  980. c--
  981. cmpos--
  982. pos--
  983. if r.runtext[cmpos] != r.runtext[pos] {
  984. return false
  985. }
  986. }
  987. } else {
  988. for c != 0 {
  989. c--
  990. cmpos--
  991. pos--
  992. if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
  993. return false
  994. }
  995. }
  996. }
  997. if !r.rightToLeft {
  998. pos += len
  999. }
  1000. r.runtextpos = pos
  1001. return true
  1002. }
  1003. func (r *runner) backwardnext() {
  1004. if r.rightToLeft {
  1005. r.runtextpos++
  1006. } else {
  1007. r.runtextpos--
  1008. }
  1009. }
  1010. func (r *runner) charAt(j int) rune {
  1011. return r.runtext[j]
  1012. }
  1013. func (r *runner) findFirstChar() bool {
  1014. if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
  1015. if !r.code.RightToLeft {
  1016. if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
  1017. (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
  1018. r.runtextpos = r.runtextend
  1019. return false
  1020. }
  1021. if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
  1022. r.runtextpos = r.runtextend - 1
  1023. } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
  1024. r.runtextpos = r.runtextend
  1025. }
  1026. } else {
  1027. if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
  1028. (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
  1029. (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
  1030. (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
  1031. r.runtextpos = 0
  1032. return false
  1033. }
  1034. if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
  1035. r.runtextpos = 0
  1036. }
  1037. }
  1038. if r.code.BmPrefix != nil {
  1039. return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
  1040. }
  1041. return true // found a valid start or end anchor
  1042. } else if r.code.BmPrefix != nil {
  1043. r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
  1044. if r.runtextpos == -1 {
  1045. if r.code.RightToLeft {
  1046. r.runtextpos = 0
  1047. } else {
  1048. r.runtextpos = r.runtextend
  1049. }
  1050. return false
  1051. }
  1052. return true
  1053. } else if r.code.FcPrefix == nil {
  1054. return true
  1055. }
  1056. r.rightToLeft = r.code.RightToLeft
  1057. r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
  1058. set := r.code.FcPrefix.PrefixSet
  1059. if set.IsSingleton() {
  1060. ch := set.SingletonChar()
  1061. for i := r.forwardchars(); i > 0; i-- {
  1062. if ch == r.forwardcharnext() {
  1063. r.backwardnext()
  1064. return true
  1065. }
  1066. }
  1067. } else {
  1068. for i := r.forwardchars(); i > 0; i-- {
  1069. n := r.forwardcharnext()
  1070. //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
  1071. if set.CharIn(n) {
  1072. r.backwardnext()
  1073. return true
  1074. }
  1075. }
  1076. }
  1077. return false
  1078. }
  1079. func (r *runner) initMatch() {
  1080. // Use a hashtable'ed Match object if the capture numbers are sparse
  1081. if r.runmatch == nil {
  1082. if r.re.caps != nil {
  1083. r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
  1084. } else {
  1085. r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
  1086. }
  1087. } else {
  1088. r.runmatch.reset(r.runtext, r.runtextstart)
  1089. }
  1090. // note we test runcrawl, because it is the last one to be allocated
  1091. // If there is an alloc failure in the middle of the three allocations,
  1092. // we may still return to reuse this instance, and we want to behave
  1093. // as if the allocations didn't occur. (we used to test _trackcount != 0)
  1094. if r.runcrawl != nil {
  1095. r.runtrackpos = len(r.runtrack)
  1096. r.runstackpos = len(r.runstack)
  1097. r.runcrawlpos = len(r.runcrawl)
  1098. return
  1099. }
  1100. r.initTrackCount()
  1101. tracksize := r.runtrackcount * 8
  1102. stacksize := r.runtrackcount * 8
  1103. if tracksize < 32 {
  1104. tracksize = 32
  1105. }
  1106. if stacksize < 16 {
  1107. stacksize = 16
  1108. }
  1109. r.runtrack = make([]int, tracksize)
  1110. r.runtrackpos = tracksize
  1111. r.runstack = make([]int, stacksize)
  1112. r.runstackpos = stacksize
  1113. r.runcrawl = make([]int, 32)
  1114. r.runcrawlpos = 32
  1115. }
  1116. func (r *runner) tidyMatch(quick bool) *Match {
  1117. if !quick {
  1118. match := r.runmatch
  1119. r.runmatch = nil
  1120. match.tidy(r.runtextpos)
  1121. return match
  1122. } else {
  1123. // send back our match -- it's not leaving the package, so it's safe to not clean it up
  1124. // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
  1125. return r.runmatch
  1126. }
  1127. }
  1128. // capture captures a subexpression. Note that the
  1129. // capnum used here has already been mapped to a non-sparse
  1130. // index (by the code generator RegexWriter).
  1131. func (r *runner) capture(capnum, start, end int) {
  1132. if end < start {
  1133. T := end
  1134. end = start
  1135. start = T
  1136. }
  1137. r.crawl(capnum)
  1138. r.runmatch.addMatch(capnum, start, end-start)
  1139. }
  1140. // transferCapture captures a subexpression. Note that the
  1141. // capnum used here has already been mapped to a non-sparse
  1142. // index (by the code generator RegexWriter).
  1143. func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
  1144. var start2, end2 int
  1145. // these are the two intervals that are cancelling each other
  1146. if end < start {
  1147. T := end
  1148. end = start
  1149. start = T
  1150. }
  1151. start2 = r.runmatch.matchIndex(uncapnum)
  1152. end2 = start2 + r.runmatch.matchLength(uncapnum)
  1153. // The new capture gets the innermost defined interval
  1154. if start >= end2 {
  1155. end = start
  1156. start = end2
  1157. } else if end <= start2 {
  1158. start = start2
  1159. } else {
  1160. if end > end2 {
  1161. end = end2
  1162. }
  1163. if start2 > start {
  1164. start = start2
  1165. }
  1166. }
  1167. r.crawl(uncapnum)
  1168. r.runmatch.balanceMatch(uncapnum)
  1169. if capnum != -1 {
  1170. r.crawl(capnum)
  1171. r.runmatch.addMatch(capnum, start, end-start)
  1172. }
  1173. }
  1174. // revert the last capture
  1175. func (r *runner) uncapture() {
  1176. capnum := r.popcrawl()
  1177. r.runmatch.removeMatch(capnum)
  1178. }
  1179. //debug
  1180. func (r *runner) dumpState() {
  1181. back := ""
  1182. if r.operator&syntax.Back != 0 {
  1183. back = " Back"
  1184. }
  1185. if r.operator&syntax.Back2 != 0 {
  1186. back += " Back2"
  1187. }
  1188. fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
  1189. r.textposDescription(),
  1190. r.stackDescription(r.runtrack, r.runtrackpos),
  1191. r.stackDescription(r.runstack, r.runstackpos),
  1192. r.code.OpcodeDescription(r.codepos),
  1193. back)
  1194. }
  1195. func (r *runner) stackDescription(a []int, index int) string {
  1196. buf := &bytes.Buffer{}
  1197. fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
  1198. if buf.Len() < 8 {
  1199. buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
  1200. }
  1201. buf.WriteRune('(')
  1202. for i := index; i < len(a); i++ {
  1203. if i > index {
  1204. buf.WriteRune(' ')
  1205. }
  1206. buf.WriteString(strconv.Itoa(a[i]))
  1207. }
  1208. buf.WriteRune(')')
  1209. return buf.String()
  1210. }
  1211. func (r *runner) textposDescription() string {
  1212. buf := &bytes.Buffer{}
  1213. buf.WriteString(strconv.Itoa(r.runtextpos))
  1214. if buf.Len() < 8 {
  1215. buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
  1216. }
  1217. if r.runtextpos > 0 {
  1218. buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
  1219. } else {
  1220. buf.WriteRune('^')
  1221. }
  1222. buf.WriteRune('>')
  1223. for i := r.runtextpos; i < r.runtextend; i++ {
  1224. buf.WriteString(syntax.CharDescription(r.runtext[i]))
  1225. }
  1226. if buf.Len() >= 64 {
  1227. buf.Truncate(61)
  1228. buf.WriteString("...")
  1229. } else {
  1230. buf.WriteRune('$')
  1231. }
  1232. return buf.String()
  1233. }
  1234. // decide whether the pos
  1235. // at the specified index is a boundary or not. It's just not worth
  1236. // emitting inline code for this logic.
  1237. func (r *runner) isBoundary(index, startpos, endpos int) bool {
  1238. return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
  1239. (index < endpos && syntax.IsWordChar(r.runtext[index]))
  1240. }
  1241. func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
  1242. return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
  1243. (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
  1244. }
  1245. // this seems like a comment to justify randomly picking 1000 :-P
  1246. // We have determined this value in a series of experiments where x86 retail
  1247. // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
  1248. // of TimeoutCheckFrequency did not tend to increase performance; smaller values
  1249. // of TimeoutCheckFrequency tended to slow down the execution.
  1250. const timeoutCheckFrequency int = 1000
  1251. func (r *runner) startTimeoutWatch() {
  1252. if r.ignoreTimeout {
  1253. return
  1254. }
  1255. r.timeoutChecksToSkip = timeoutCheckFrequency
  1256. r.timeoutAt = time.Now().Add(r.timeout)
  1257. }
  1258. func (r *runner) checkTimeout() error {
  1259. if r.ignoreTimeout {
  1260. return nil
  1261. }
  1262. r.timeoutChecksToSkip--
  1263. if r.timeoutChecksToSkip != 0 {
  1264. return nil
  1265. }
  1266. r.timeoutChecksToSkip = timeoutCheckFrequency
  1267. return r.doCheckTimeout()
  1268. }
  1269. func (r *runner) doCheckTimeout() error {
  1270. current := time.Now()
  1271. if current.Before(r.timeoutAt) {
  1272. return nil
  1273. }
  1274. if r.re.Debug() {
  1275. //Debug.WriteLine("")
  1276. //Debug.WriteLine("RegEx match timeout occurred!")
  1277. //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
  1278. //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
  1279. //Debug.WriteLine("Search pattern: " + _runregex._pattern)
  1280. //Debug.WriteLine("Input: " + r.runtext)
  1281. //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
  1282. }
  1283. return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
  1284. }
  1285. func (r *runner) initTrackCount() {
  1286. r.runtrackcount = r.code.TrackCount
  1287. }
  1288. // getRunner returns a run to use for matching re.
  1289. // It uses the re's runner cache if possible, to avoid
  1290. // unnecessary allocation.
  1291. func (re *Regexp) getRunner() *runner {
  1292. re.muRun.Lock()
  1293. if n := len(re.runner); n > 0 {
  1294. z := re.runner[n-1]
  1295. re.runner = re.runner[:n-1]
  1296. re.muRun.Unlock()
  1297. return z
  1298. }
  1299. re.muRun.Unlock()
  1300. z := &runner{
  1301. re: re,
  1302. code: re.code,
  1303. }
  1304. return z
  1305. }
  1306. // putRunner returns a runner to the re's cache.
  1307. // There is no attempt to limit the size of the cache, so it will
  1308. // grow to the maximum number of simultaneous matches
  1309. // run using re. (The cache empties when re gets garbage collected.)
  1310. func (re *Regexp) putRunner(r *runner) {
  1311. re.muRun.Lock()
  1312. re.runner = append(re.runner, r)
  1313. re.muRun.Unlock()
  1314. }