/*
-** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
+** $Id: lpcap.c $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
/*
-** Calls a runtime capture. Returns number of captures removed by
-** the call, including the initial Cgroup. (Captures to be added are
-** on the Lua stack.)
+** Calls a runtime capture. Returns number of captures "removed" by the
+** call, that is, those inside the group capture. Captures to be added
+** are on the Lua stack.
*/
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
int n, id;
lua_State *L = cs->L;
int otop = lua_gettop(L);
- Capture *open = findopen(close);
+ Capture *open = findopen(close); /* get open group capture */
assert(captype(open) == Cgroup);
id = finddyncap(open, close); /* get first dynamic capture argument */
close->kind = Cclose; /* closes the group */
}
else
*rem = 0; /* no dynamic captures removed */
- return close - open; /* number of captures of all kinds removed */
+ return close - open - 1; /* number of captures to be removed */
}
}
+#if !defined(MAXRECLEVEL)
+#define MAXRECLEVEL 200
+#endif
+
+
/*
** Push all values of the current capture into the stack; returns
** number of values pushed
*/
static int pushcapture (CapState *cs) {
lua_State *L = cs->L;
+ int res;
luaL_checkstack(L, 4, "too many captures");
+ if (cs->reclevel++ > MAXRECLEVEL)
+ return luaL_error(L, "subcapture nesting too deep");
switch (captype(cs->cap)) {
case Cposition: {
lua_pushinteger(L, cs->cap->s - cs->s + 1);
cs->cap++;
- return 1;
+ res = 1;
+ break;
}
case Cconst: {
pushluaval(cs);
cs->cap++;
- return 1;
+ res = 1;
+ break;
}
case Carg: {
int arg = (cs->cap++)->idx;
if (arg + FIXEDARGS > cs->ptop)
return luaL_error(L, "reference to absent extra argument #%d", arg);
lua_pushvalue(L, arg + FIXEDARGS);
- return 1;
+ res = 1;
+ break;
}
case Csimple: {
int k = pushnestedvalues(cs, 1);
lua_insert(L, -k); /* make whole match be first result */
- return k;
+ res = k;
+ break;
}
case Cruntime: {
lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
- return 1;
+ res = 1;
+ break;
}
case Cstring: {
luaL_Buffer b;
luaL_buffinit(L, &b);
stringcap(&b, cs);
luaL_pushresult(&b);
- return 1;
+ res = 1;
+ break;
}
case Csubst: {
luaL_Buffer b;
luaL_buffinit(L, &b);
substcap(&b, cs);
luaL_pushresult(&b);
- return 1;
+ res = 1;
+ break;
}
case Cgroup: {
if (cs->cap->idx == 0) /* anonymous group? */
- return pushnestedvalues(cs, 0); /* add all nested values */
+ res = pushnestedvalues(cs, 0); /* add all nested values */
else { /* named group: add no values */
nextcap(cs); /* skip capture */
- return 0;
+ res = 0;
}
+ break;
}
- case Cbackref: return backrefcap(cs);
- case Ctable: return tablecap(cs);
- case Cfunction: return functioncap(cs);
- case Cnum: return numcap(cs);
- case Cquery: return querycap(cs);
- case Cfold: return foldcap(cs);
- default: assert(0); return 0;
+ case Cbackref: res = backrefcap(cs); break;
+ case Ctable: res = tablecap(cs); break;
+ case Cfunction: res = functioncap(cs); break;
+ case Cnum: res = numcap(cs); break;
+ case Cquery: res = querycap(cs); break;
+ case Cfold: res = foldcap(cs); break;
+ default: assert(0); res = 0;
}
+ cs->reclevel--;
+ return res;
}
int n = 0;
if (!isclosecap(capture)) { /* is there any capture? */
CapState cs;
- cs.ocap = cs.cap = capture; cs.L = L;
+ cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
do { /* collect their values */
n += pushcapture(&cs);
/*
-** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
+** $Id: lpcap.h $
*/
#if !defined(lpcap_h)
/* kinds of captures */
typedef enum CapKind {
- Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
- Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
+ Cclose, /* not used in trees */
+ Cposition,
+ Cconst, /* ktable[key] is Lua constant */
+ Cbackref, /* ktable[key] is "name" of group to get capture */
+ Carg, /* 'key' is arg's number */
+ Csimple, /* next node is pattern */
+ Ctable, /* next node is pattern */
+ Cfunction, /* ktable[key] is function; next node is pattern */
+ Cquery, /* ktable[key] is table; next node is pattern */
+ Cstring, /* ktable[key] is string; next node is pattern */
+ Cnum, /* numbered capture; 'key' is number of value to return */
+ Csubst, /* substitution capture; next node is pattern */
+ Cfold, /* ktable[key] is function; next node is pattern */
+ Cruntime, /* not used in trees (is uses another type for tree) */
+ Cgroup /* ktable[key] is group's "name" */
} CapKind;
int ptop; /* index of last argument to 'match' */
const char *s; /* original string */
int valuecached; /* value stored in cache slot */
+ int reclevel; /* recursion level */
} CapState;
/*
-** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
+** $Id: lpcode.c $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
}
+/*
+** Visit a TCall node taking care to stop recursion. If node not yet
+** visited, return 'f(sib2(tree))', otherwise return 'def' (default
+** value)
+*/
+static int callrecursive (TTree *tree, int f (TTree *t), int def) {
+ int key = tree->key;
+ assert(tree->tag == TCall);
+ assert(sib2(tree)->tag == TRule);
+ if (key == 0) /* node already visited? */
+ return def; /* return default value */
+ else { /* first visit */
+ int result;
+ tree->key = 0; /* mark call as already visited */
+ result = f(sib2(tree)); /* go to called rule */
+ tree->key = key; /* restore tree */
+ return result;
+ }
+}
+
+
/*
** Check whether a pattern tree has captures
*/
case TCapture: case TRunTime:
return 1;
case TCall:
- tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
+ return callrecursive(tree, hascaptures, 0);
+ case TRule: /* do not follow siblings */
+ tree = sib1(tree); goto tailcall;
case TOpenCall: assert(0);
default: {
switch (numsiblings[tree->tag]) {
case 1: /* return hascaptures(sib1(tree)); */
tree = sib1(tree); goto tailcall;
case 2:
- if (hascaptures(sib1(tree))) return 1;
+ if (hascaptures(sib1(tree)))
+ return 1;
/* else return hascaptures(sib2(tree)); */
tree = sib2(tree); goto tailcall;
default: assert(numsiblings[tree->tag] == 0); return 0;
/*
** number of characters to match a pattern (or -1 if variable)
-** ('count' avoids infinite loops for grammars)
*/
-int fixedlenx (TTree *tree, int count, int len) {
+int fixedlen (TTree *tree) {
+ int len = 0; /* to accumulate in tail calls */
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny:
case TRep: case TRunTime: case TOpenCall:
return -1;
case TCapture: case TRule: case TGrammar:
- /* return fixedlenx(sib1(tree), count); */
+ /* return fixedlen(sib1(tree)); */
tree = sib1(tree); goto tailcall;
- case TCall:
- if (count++ >= MAXRULES)
- return -1; /* may be a loop */
- /* else return fixedlenx(sib2(tree), count); */
- tree = sib2(tree); goto tailcall;
+ case TCall: {
+ int n1 = callrecursive(tree, fixedlen, -1);
+ if (n1 < 0)
+ return -1;
+ else
+ return len + n1;
+ }
case TSeq: {
- len = fixedlenx(sib1(tree), count, len);
- if (len < 0) return -1;
- /* else return fixedlenx(sib2(tree), count, len); */
- tree = sib2(tree); goto tailcall;
+ int n1 = fixedlen(sib1(tree));
+ if (n1 < 0)
+ return -1;
+ /* else return fixedlen(sib2(tree)) + len; */
+ len += n1; tree = sib2(tree); goto tailcall;
}
case TChoice: {
- int n1, n2;
- n1 = fixedlenx(sib1(tree), count, len);
- if (n1 < 0) return -1;
- n2 = fixedlenx(sib2(tree), count, len);
- if (n1 == n2) return n1;
- else return -1;
+ int n1 = fixedlen(sib1(tree));
+ int n2 = fixedlen(sib2(tree));
+ if (n1 != n2 || n1 < 0)
+ return -1;
+ else
+ return len + n1;
}
default: assert(0); return 0;
};
/*
** Computes the 'first set' of a pattern.
-** The result is a conservative approximation:
+** The result is a conservative aproximation:
** match p ax -> x (for some x) ==> a belongs to first(p)
** or
** a not in first(p) ==> match p ax -> fail (for all x)
/*
-** Captures: if pattern has fixed (and not too big) length, use
-** a single IFullCapture instruction after the match; otherwise,
-** enclose the pattern with OpenCapture - CloseCapture.
+** Captures: if pattern has fixed (and not too big) length, and it
+** has no nested captures, use a single IFullCapture instruction
+** after the match; otherwise, enclose the pattern with OpenCapture -
+** CloseCapture.
*/
static void codecapture (CompileState *compst, TTree *tree, int tt,
const Charset *fl) {
/*
-** Repetition; optimizations:
+** Repetion; optimizations:
** When pattern is a charset, can use special instruction ISpan.
** When pattern is head fail, or if it starts with characters that
** are disjoint from what follows the repetions, a simple test
** is enough (a fail inside the repetition would backtrack to fail
** again in the following pattern, so there is no need for a choice).
-** When 'opt' is true, the repetition can reuse the Choice already
+** When 'opt' is true, the repetion can reuse the Choice already
** active in the stack.
*/
static void coderep (CompileState *compst, TTree *tree, int opt,
/*
-** Main code-generation function: dispatch to auxiliary functions
+** Main code-generation function: dispatch to auxiliar functions
** according to kind of tree. ('needfollow' should return true
** only for consructions that use 'fl'.)
*/
/*
-** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
+** $Id: lpcode.h $
*/
#if !defined(lpcode_h)
int tocharset (TTree *tree, Charset *cs);
int checkaux (TTree *tree, int pred);
-int fixedlenx (TTree *tree, int count, int len);
+int fixedlen (TTree *tree);
int hascaptures (TTree *tree);
int lp_gc (lua_State *L);
Instruction *compile (lua_State *L, Pattern *p);
*/
#define nullable(t) checkaux(t, PEnullable)
-#define fixedlen(t) fixedlenx(t, 0, 0)
-
#endif
/*
-** Double the size of the array of captures
+** Ensures the size of array 'capture' (with size '*capsize' and
+** 'captop' elements being used) is enough to accomodate 'n' extra
+** elements plus one. (Because several opcodes add stuff to the capture
+** array, it is simpler to ensure the array always has at least one free
+** slot upfront and check its size later.)
*/
-static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
- Capture *newc;
- if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
- luaL_error(L, "too many captures");
- newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
- memcpy(newc, cap, captop * sizeof(Capture));
- lua_replace(L, caplistidx(ptop));
- return newc;
+static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
+ int captop, int n, int ptop) {
+ if (*capsize - captop > n)
+ return capture; /* no need to grow array */
+ else { /* must grow */
+ Capture *newc;
+ int newsize = captop + n + 1; /* minimum size needed */
+ if (newsize < INT_MAX/((int)sizeof(Capture) * 2))
+ newsize *= 2; /* twice that size, if not too big */
+ else if (newsize >= INT_MAX/((int)sizeof(Capture)))
+ luaL_error(L, "too many captures");
+ newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
+ memcpy(newc, capture, captop * sizeof(Capture));
+ *capsize = newsize;
+ lua_replace(L, caplistidx(ptop));
+ return newc;
+ }
}
/*
-** Add capture values returned by a dynamic capture to the capture list
-** 'base', nested inside a group capture. 'fd' indexes the first capture
-** value, 'n' is the number of values (at least 1).
+** Add capture values returned by a dynamic capture to the list
+** 'capture', nested inside a group. 'fd' indexes the first capture
+** value, 'n' is the number of values (at least 1). The open group
+** capture is already in 'capture', before the place for the new entries.
*/
-static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
+static void adddyncaptures (const char *s, Capture *capture, int n, int fd) {
int i;
- /* Cgroup capture is already there */
- assert(base[0].kind == Cgroup && base[0].siz == 0);
- base[0].idx = 0; /* make it an anonymous group */
- for (i = 1; i <= n; i++) { /* add runtime captures */
- base[i].kind = Cruntime;
- base[i].siz = 1; /* mark it as closed */
- base[i].idx = fd + i - 1; /* stack index of capture value */
- base[i].s = s;
+ assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
+ capture[-1].idx = 0; /* make group capture an anonymous group */
+ for (i = 0; i < n; i++) { /* add runtime captures */
+ capture[i].kind = Cruntime;
+ capture[i].siz = 1; /* mark it as closed */
+ capture[i].idx = fd + i; /* stack index of capture value */
+ capture[i].s = s;
}
- base[i].kind = Cclose; /* close group */
- base[i].siz = 1;
- base[i].s = s;
+ capture[n].kind = Cclose; /* close group */
+ capture[n].siz = 1;
+ capture[n].s = s;
}
for (;;) {
#if defined(DEBUG)
printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
- s, stack - getstackbase(L, ptop), ndyncap, captop);
+ s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
printinst(op, p);
- printcaplist(capture, capture + captop);
#endif
assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
switch ((Opcode)p->i.code) {
CapState cs;
int rem, res, n;
int fr = lua_gettop(L) + 1; /* stack index of first result */
- cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
+ cs.reclevel = 0; cs.L = L;
+ cs.s = o; cs.ocap = capture; cs.ptop = ptop;
n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
captop -= n; /* remove nested captures */
+ ndyncap -= rem; /* update number of dynamic captures */
fr -= rem; /* 'rem' items were popped from Lua stack */
res = resdyncaptures(L, fr, s - o, e - o); /* get result */
if (res == -1) /* fail? */
goto fail;
s = o + res; /* else update current position */
n = lua_gettop(L) - fr + 1; /* number of new captures */
- ndyncap += n - rem; /* update number of dynamic captures */
- if (n > 0) { /* any new capture? */
- if ((captop += n + 2) >= capsize) {
- capture = doublecap(L, capture, captop, ptop);
- capsize = 2 * captop;
- }
- /* add new captures to 'capture' list */
- adddyncaptures(s, capture + captop - n - 2, n, fr);
+ ndyncap += n; /* update number of dynamic captures */
+ if (n == 0) /* no new captures? */
+ captop--; /* remove open group */
+ else { /* new captures; keep original open group */
+ if (fr + n >= SHRT_MAX)
+ luaL_error(L, "too many results in match-time capture");
+ /* add new captures + close group to 'capture' list */
+ capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
+ adddyncaptures(s, capture + captop, n, fr);
+ captop += n + 1; /* new captures + close group */
}
p++;
continue;
pushcapture: {
capture[captop].idx = p->i.key;
capture[captop].kind = getkind(p);
- if (++captop >= capsize) {
- capture = doublecap(L, capture, captop, ptop);
- capsize = 2 * captop;
- }
+ captop++;
+ capture = growcap(L, capture, &capsize, captop, 0, ptop);
p++;
continue;
}