You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lpcap.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /*
  2. ** $Id: lpcap.c $
  3. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  4. */
  5. #include "lua.h"
  6. #include "lauxlib.h"
  7. #include "lpcap.h"
  8. #include "lptypes.h"
  9. #define captype(cap) ((cap)->kind)
  10. #define isclosecap(cap) (captype(cap) == Cclose)
  11. #define closeaddr(c) ((c)->s + (c)->siz - 1)
  12. #define isfullcap(cap) ((cap)->siz != 0)
  13. #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
  14. #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
  15. /*
  16. ** Put at the cache for Lua values the value indexed by 'v' in ktable
  17. ** of the running pattern (if it is not there yet); returns its index.
  18. */
  19. static int updatecache (CapState *cs, int v) {
  20. int idx = cs->ptop + 1; /* stack index of cache for Lua values */
  21. if (v != cs->valuecached) { /* not there? */
  22. getfromktable(cs, v); /* get value from 'ktable' */
  23. lua_replace(cs->L, idx); /* put it at reserved stack position */
  24. cs->valuecached = v; /* keep track of what is there */
  25. }
  26. return idx;
  27. }
  28. static int pushcapture (CapState *cs);
  29. /*
  30. ** Goes back in a list of captures looking for an open capture
  31. ** corresponding to a close
  32. */
  33. static Capture *findopen (Capture *cap) {
  34. int n = 0; /* number of closes waiting an open */
  35. for (;;) {
  36. cap--;
  37. if (isclosecap(cap)) n++; /* one more open to skip */
  38. else if (!isfullcap(cap))
  39. if (n-- == 0) return cap;
  40. }
  41. }
  42. /*
  43. ** Go to the next capture
  44. */
  45. static void nextcap (CapState *cs) {
  46. Capture *cap = cs->cap;
  47. if (!isfullcap(cap)) { /* not a single capture? */
  48. int n = 0; /* number of opens waiting a close */
  49. for (;;) { /* look for corresponding close */
  50. cap++;
  51. if (isclosecap(cap)) {
  52. if (n-- == 0) break;
  53. }
  54. else if (!isfullcap(cap)) n++;
  55. }
  56. }
  57. cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
  58. }
  59. /*
  60. ** Push on the Lua stack all values generated by nested captures inside
  61. ** the current capture. Returns number of values pushed. 'addextra'
  62. ** makes it push the entire match after all captured values. The
  63. ** entire match is pushed also if there are no other nested values,
  64. ** so the function never returns zero.
  65. */
  66. static int pushnestedvalues (CapState *cs, int addextra) {
  67. Capture *co = cs->cap;
  68. if (isfullcap(cs->cap++)) { /* no nested captures? */
  69. lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
  70. return 1; /* that is it */
  71. }
  72. else {
  73. int n = 0;
  74. while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
  75. n += pushcapture(cs);
  76. if (addextra || n == 0) { /* need extra? */
  77. lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
  78. n++;
  79. }
  80. cs->cap++; /* skip close entry */
  81. return n;
  82. }
  83. }
  84. /*
  85. ** Push only the first value generated by nested captures
  86. */
  87. static void pushonenestedvalue (CapState *cs) {
  88. int n = pushnestedvalues(cs, 0);
  89. if (n > 1)
  90. lua_pop(cs->L, n - 1); /* pop extra values */
  91. }
  92. /*
  93. ** Try to find a named group capture with the name given at the top of
  94. ** the stack; goes backward from 'cap'.
  95. */
  96. static Capture *findback (CapState *cs, Capture *cap) {
  97. lua_State *L = cs->L;
  98. while (cap-- > cs->ocap) { /* repeat until end of list */
  99. if (isclosecap(cap))
  100. cap = findopen(cap); /* skip nested captures */
  101. else if (!isfullcap(cap))
  102. continue; /* opening an enclosing capture: skip and get previous */
  103. if (captype(cap) == Cgroup) {
  104. getfromktable(cs, cap->idx); /* get group name */
  105. if (lp_equal(L, -2, -1)) { /* right group? */
  106. lua_pop(L, 2); /* remove reference name and group name */
  107. return cap;
  108. }
  109. else lua_pop(L, 1); /* remove group name */
  110. }
  111. }
  112. luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
  113. return NULL; /* to avoid warnings */
  114. }
  115. /*
  116. ** Back-reference capture. Return number of values pushed.
  117. */
  118. static int backrefcap (CapState *cs) {
  119. int n;
  120. Capture *curr = cs->cap;
  121. pushluaval(cs); /* reference name */
  122. cs->cap = findback(cs, curr); /* find corresponding group */
  123. n = pushnestedvalues(cs, 0); /* push group's values */
  124. cs->cap = curr + 1;
  125. return n;
  126. }
  127. /*
  128. ** Table capture: creates a new table and populates it with nested
  129. ** captures.
  130. */
  131. static int tablecap (CapState *cs) {
  132. lua_State *L = cs->L;
  133. int n = 0;
  134. lua_newtable(L);
  135. if (isfullcap(cs->cap++))
  136. return 1; /* table is empty */
  137. while (!isclosecap(cs->cap)) {
  138. if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
  139. pushluaval(cs); /* push group name */
  140. pushonenestedvalue(cs);
  141. lua_settable(L, -3);
  142. }
  143. else { /* not a named group */
  144. int i;
  145. int k = pushcapture(cs);
  146. for (i = k; i > 0; i--) /* store all values into table */
  147. lua_rawseti(L, -(i + 1), n + i);
  148. n += k;
  149. }
  150. }
  151. cs->cap++; /* skip close entry */
  152. return 1; /* number of values pushed (only the table) */
  153. }
  154. /*
  155. ** Table-query capture
  156. */
  157. static int querycap (CapState *cs) {
  158. int idx = cs->cap->idx;
  159. pushonenestedvalue(cs); /* get nested capture */
  160. lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
  161. if (!lua_isnil(cs->L, -1))
  162. return 1;
  163. else { /* no value */
  164. lua_pop(cs->L, 1); /* remove nil */
  165. return 0;
  166. }
  167. }
  168. /*
  169. ** Fold capture
  170. */
  171. static int foldcap (CapState *cs) {
  172. int n;
  173. lua_State *L = cs->L;
  174. int idx = cs->cap->idx;
  175. if (isfullcap(cs->cap++) || /* no nested captures? */
  176. isclosecap(cs->cap) || /* no nested captures (large subject)? */
  177. (n = pushcapture(cs)) == 0) /* nested captures with no values? */
  178. return luaL_error(L, "no initial value for fold capture");
  179. if (n > 1)
  180. lua_pop(L, n - 1); /* leave only one result for accumulator */
  181. while (!isclosecap(cs->cap)) {
  182. lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
  183. lua_insert(L, -2); /* put it before accumulator */
  184. n = pushcapture(cs); /* get next capture's values */
  185. lua_call(L, n + 1, 1); /* call folding function */
  186. }
  187. cs->cap++; /* skip close entry */
  188. return 1; /* only accumulator left on the stack */
  189. }
  190. /*
  191. ** Function capture
  192. */
  193. static int functioncap (CapState *cs) {
  194. int n;
  195. int top = lua_gettop(cs->L);
  196. pushluaval(cs); /* push function */
  197. n = pushnestedvalues(cs, 0); /* push nested captures */
  198. lua_call(cs->L, n, LUA_MULTRET); /* call function */
  199. return lua_gettop(cs->L) - top; /* return function's results */
  200. }
  201. /*
  202. ** Select capture
  203. */
  204. static int numcap (CapState *cs) {
  205. int idx = cs->cap->idx; /* value to select */
  206. if (idx == 0) { /* no values? */
  207. nextcap(cs); /* skip entire capture */
  208. return 0; /* no value produced */
  209. }
  210. else {
  211. int n = pushnestedvalues(cs, 0);
  212. if (n < idx) /* invalid index? */
  213. return luaL_error(cs->L, "no capture '%d'", idx);
  214. else {
  215. lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
  216. lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
  217. lua_pop(cs->L, n - 1); /* remove other captures */
  218. return 1;
  219. }
  220. }
  221. }
  222. /*
  223. ** Return the stack index of the first runtime capture in the given
  224. ** list of captures (or zero if no runtime captures)
  225. */
  226. int finddyncap (Capture *cap, Capture *last) {
  227. for (; cap < last; cap++) {
  228. if (cap->kind == Cruntime)
  229. return cap->idx; /* stack position of first capture */
  230. }
  231. return 0; /* no dynamic captures in this segment */
  232. }
  233. /*
  234. ** Calls a runtime capture. Returns number of captures "removed" by the
  235. ** call, that is, those inside the group capture. Captures to be added
  236. ** are on the Lua stack.
  237. */
  238. int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
  239. int n, id;
  240. lua_State *L = cs->L;
  241. int otop = lua_gettop(L);
  242. Capture *open = findopen(close); /* get open group capture */
  243. assert(captype(open) == Cgroup);
  244. id = finddyncap(open, close); /* get first dynamic capture argument */
  245. close->kind = Cclose; /* closes the group */
  246. close->s = s;
  247. cs->cap = open; cs->valuecached = 0; /* prepare capture state */
  248. luaL_checkstack(L, 4, "too many runtime captures");
  249. pushluaval(cs); /* push function to be called */
  250. lua_pushvalue(L, SUBJIDX); /* push original subject */
  251. lua_pushinteger(L, s - cs->s + 1); /* push current position */
  252. n = pushnestedvalues(cs, 0); /* push nested captures */
  253. lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
  254. if (id > 0) { /* are there old dynamic captures to be removed? */
  255. int i;
  256. for (i = id; i <= otop; i++)
  257. lua_remove(L, id); /* remove old dynamic captures */
  258. *rem = otop - id + 1; /* total number of dynamic captures removed */
  259. }
  260. else
  261. *rem = 0; /* no dynamic captures removed */
  262. return close - open - 1; /* number of captures to be removed */
  263. }
  264. /*
  265. ** Auxiliary structure for substitution and string captures: keep
  266. ** information about nested captures for future use, avoiding to push
  267. ** string results into Lua
  268. */
  269. typedef struct StrAux {
  270. int isstring; /* whether capture is a string */
  271. union {
  272. Capture *cp; /* if not a string, respective capture */
  273. struct { /* if it is a string... */
  274. const char *s; /* ... starts here */
  275. const char *e; /* ... ends here */
  276. } s;
  277. } u;
  278. } StrAux;
  279. #define MAXSTRCAPS 10
  280. /*
  281. ** Collect values from current capture into array 'cps'. Current
  282. ** capture must be Cstring (first call) or Csimple (recursive calls).
  283. ** (In first call, fills %0 with whole match for Cstring.)
  284. ** Returns number of elements in the array that were filled.
  285. */
  286. static int getstrcaps (CapState *cs, StrAux *cps, int n) {
  287. int k = n++;
  288. cps[k].isstring = 1; /* get string value */
  289. cps[k].u.s.s = cs->cap->s; /* starts here */
  290. if (!isfullcap(cs->cap++)) { /* nested captures? */
  291. while (!isclosecap(cs->cap)) { /* traverse them */
  292. if (n >= MAXSTRCAPS) /* too many captures? */
  293. nextcap(cs); /* skip extra captures (will not need them) */
  294. else if (captype(cs->cap) == Csimple) /* string? */
  295. n = getstrcaps(cs, cps, n); /* put info. into array */
  296. else {
  297. cps[n].isstring = 0; /* not a string */
  298. cps[n].u.cp = cs->cap; /* keep original capture */
  299. nextcap(cs);
  300. n++;
  301. }
  302. }
  303. cs->cap++; /* skip close */
  304. }
  305. cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
  306. return n;
  307. }
  308. /*
  309. ** add next capture value (which should be a string) to buffer 'b'
  310. */
  311. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
  312. /*
  313. ** String capture: add result to buffer 'b' (instead of pushing
  314. ** it into the stack)
  315. */
  316. static void stringcap (luaL_Buffer *b, CapState *cs) {
  317. StrAux cps[MAXSTRCAPS];
  318. int n;
  319. size_t len, i;
  320. const char *fmt; /* format string */
  321. fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
  322. n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
  323. for (i = 0; i < len; i++) { /* traverse them */
  324. if (fmt[i] != '%') /* not an escape? */
  325. luaL_addchar(b, fmt[i]); /* add it to buffer */
  326. else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
  327. luaL_addchar(b, fmt[i]); /* add to buffer */
  328. else {
  329. int l = fmt[i] - '0'; /* capture index */
  330. if (l > n)
  331. luaL_error(cs->L, "invalid capture index (%d)", l);
  332. else if (cps[l].isstring)
  333. luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
  334. else {
  335. Capture *curr = cs->cap;
  336. cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
  337. if (!addonestring(b, cs, "capture"))
  338. luaL_error(cs->L, "no values in capture index %d", l);
  339. cs->cap = curr; /* continue from where it stopped */
  340. }
  341. }
  342. }
  343. }
  344. /*
  345. ** Substitution capture: add result to buffer 'b'
  346. */
  347. static void substcap (luaL_Buffer *b, CapState *cs) {
  348. const char *curr = cs->cap->s;
  349. if (isfullcap(cs->cap)) /* no nested captures? */
  350. luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
  351. else {
  352. cs->cap++; /* skip open entry */
  353. while (!isclosecap(cs->cap)) { /* traverse nested captures */
  354. const char *next = cs->cap->s;
  355. luaL_addlstring(b, curr, next - curr); /* add text up to capture */
  356. if (addonestring(b, cs, "replacement"))
  357. curr = closeaddr(cs->cap - 1); /* continue after match */
  358. else /* no capture value */
  359. curr = next; /* keep original text in final result */
  360. }
  361. luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
  362. }
  363. cs->cap++; /* go to next capture */
  364. }
  365. /*
  366. ** Evaluates a capture and adds its first value to buffer 'b'; returns
  367. ** whether there was a value
  368. */
  369. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
  370. switch (captype(cs->cap)) {
  371. case Cstring:
  372. stringcap(b, cs); /* add capture directly to buffer */
  373. return 1;
  374. case Csubst:
  375. substcap(b, cs); /* add capture directly to buffer */
  376. return 1;
  377. default: {
  378. lua_State *L = cs->L;
  379. int n = pushcapture(cs);
  380. if (n > 0) {
  381. if (n > 1) lua_pop(L, n - 1); /* only one result */
  382. if (!lua_isstring(L, -1))
  383. luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
  384. luaL_addvalue(b);
  385. }
  386. return n;
  387. }
  388. }
  389. }
  390. #if !defined(MAXRECLEVEL)
  391. #define MAXRECLEVEL 200
  392. #endif
  393. /*
  394. ** Push all values of the current capture into the stack; returns
  395. ** number of values pushed
  396. */
  397. static int pushcapture (CapState *cs) {
  398. lua_State *L = cs->L;
  399. int res;
  400. luaL_checkstack(L, 4, "too many captures");
  401. if (cs->reclevel++ > MAXRECLEVEL)
  402. return luaL_error(L, "subcapture nesting too deep");
  403. switch (captype(cs->cap)) {
  404. case Cposition: {
  405. lua_pushinteger(L, cs->cap->s - cs->s + 1);
  406. cs->cap++;
  407. res = 1;
  408. break;
  409. }
  410. case Cconst: {
  411. pushluaval(cs);
  412. cs->cap++;
  413. res = 1;
  414. break;
  415. }
  416. case Carg: {
  417. int arg = (cs->cap++)->idx;
  418. if (arg + FIXEDARGS > cs->ptop)
  419. return luaL_error(L, "reference to absent extra argument #%d", arg);
  420. lua_pushvalue(L, arg + FIXEDARGS);
  421. res = 1;
  422. break;
  423. }
  424. case Csimple: {
  425. int k = pushnestedvalues(cs, 1);
  426. lua_insert(L, -k); /* make whole match be first result */
  427. res = k;
  428. break;
  429. }
  430. case Cruntime: {
  431. lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
  432. res = 1;
  433. break;
  434. }
  435. case Cstring: {
  436. luaL_Buffer b;
  437. luaL_buffinit(L, &b);
  438. stringcap(&b, cs);
  439. luaL_pushresult(&b);
  440. res = 1;
  441. break;
  442. }
  443. case Csubst: {
  444. luaL_Buffer b;
  445. luaL_buffinit(L, &b);
  446. substcap(&b, cs);
  447. luaL_pushresult(&b);
  448. res = 1;
  449. break;
  450. }
  451. case Cgroup: {
  452. if (cs->cap->idx == 0) /* anonymous group? */
  453. res = pushnestedvalues(cs, 0); /* add all nested values */
  454. else { /* named group: add no values */
  455. nextcap(cs); /* skip capture */
  456. res = 0;
  457. }
  458. break;
  459. }
  460. case Cbackref: res = backrefcap(cs); break;
  461. case Ctable: res = tablecap(cs); break;
  462. case Cfunction: res = functioncap(cs); break;
  463. case Cnum: res = numcap(cs); break;
  464. case Cquery: res = querycap(cs); break;
  465. case Cfold: res = foldcap(cs); break;
  466. default: assert(0); res = 0;
  467. }
  468. cs->reclevel--;
  469. return res;
  470. }
  471. /*
  472. ** Prepare a CapState structure and traverse the entire list of
  473. ** captures in the stack pushing its results. 's' is the subject
  474. ** string, 'r' is the final position of the match, and 'ptop'
  475. ** the index in the stack where some useful values were pushed.
  476. ** Returns the number of results pushed. (If the list produces no
  477. ** results, push the final position of the match.)
  478. */
  479. int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
  480. Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
  481. int n = 0;
  482. if (!isclosecap(capture)) { /* is there any capture? */
  483. CapState cs;
  484. cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
  485. cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
  486. do { /* collect their values */
  487. n += pushcapture(&cs);
  488. } while (!isclosecap(cs.cap));
  489. }
  490. if (n == 0) { /* no capture values? */
  491. lua_pushinteger(L, r - s + 1); /* return only end position */
  492. n = 1;
  493. }
  494. return n;
  495. }