@@ -1,5 +1,5 @@ | |||
/* | |||
** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ | |||
** $Id: lpcap.c $ | |||
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | |||
*/ | |||
@@ -271,15 +271,15 @@ int finddyncap (Capture *cap, Capture *last) { | |||
/* | |||
** Calls a runtime capture. Returns number of captures removed by | |||
** the call, including the initial Cgroup. (Captures to be added are | |||
** on the Lua stack.) | |||
** Calls a runtime capture. Returns number of captures "removed" by the | |||
** call, that is, those inside the group capture. Captures to be added | |||
** are on the Lua stack. | |||
*/ | |||
int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | |||
int n, id; | |||
lua_State *L = cs->L; | |||
int otop = lua_gettop(L); | |||
Capture *open = findopen(close); | |||
Capture *open = findopen(close); /* get open group capture */ | |||
assert(captype(open) == Cgroup); | |||
id = finddyncap(open, close); /* get first dynamic capture argument */ | |||
close->kind = Cclose; /* closes the group */ | |||
@@ -299,7 +299,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | |||
} | |||
else | |||
*rem = 0; /* no dynamic captures removed */ | |||
return close - open; /* number of captures of all kinds removed */ | |||
return close - open - 1; /* number of captures to be removed */ | |||
} | |||
@@ -441,70 +441,88 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { | |||
} | |||
#if !defined(MAXRECLEVEL) | |||
#define MAXRECLEVEL 200 | |||
#endif | |||
/* | |||
** Push all values of the current capture into the stack; returns | |||
** number of values pushed | |||
*/ | |||
static int pushcapture (CapState *cs) { | |||
lua_State *L = cs->L; | |||
int res; | |||
luaL_checkstack(L, 4, "too many captures"); | |||
if (cs->reclevel++ > MAXRECLEVEL) | |||
return luaL_error(L, "subcapture nesting too deep"); | |||
switch (captype(cs->cap)) { | |||
case Cposition: { | |||
lua_pushinteger(L, cs->cap->s - cs->s + 1); | |||
cs->cap++; | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Cconst: { | |||
pushluaval(cs); | |||
cs->cap++; | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Carg: { | |||
int arg = (cs->cap++)->idx; | |||
if (arg + FIXEDARGS > cs->ptop) | |||
return luaL_error(L, "reference to absent extra argument #%d", arg); | |||
lua_pushvalue(L, arg + FIXEDARGS); | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Csimple: { | |||
int k = pushnestedvalues(cs, 1); | |||
lua_insert(L, -k); /* make whole match be first result */ | |||
return k; | |||
res = k; | |||
break; | |||
} | |||
case Cruntime: { | |||
lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Cstring: { | |||
luaL_Buffer b; | |||
luaL_buffinit(L, &b); | |||
stringcap(&b, cs); | |||
luaL_pushresult(&b); | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Csubst: { | |||
luaL_Buffer b; | |||
luaL_buffinit(L, &b); | |||
substcap(&b, cs); | |||
luaL_pushresult(&b); | |||
return 1; | |||
res = 1; | |||
break; | |||
} | |||
case Cgroup: { | |||
if (cs->cap->idx == 0) /* anonymous group? */ | |||
return pushnestedvalues(cs, 0); /* add all nested values */ | |||
res = pushnestedvalues(cs, 0); /* add all nested values */ | |||
else { /* named group: add no values */ | |||
nextcap(cs); /* skip capture */ | |||
return 0; | |||
res = 0; | |||
} | |||
break; | |||
} | |||
case Cbackref: return backrefcap(cs); | |||
case Ctable: return tablecap(cs); | |||
case Cfunction: return functioncap(cs); | |||
case Cnum: return numcap(cs); | |||
case Cquery: return querycap(cs); | |||
case Cfold: return foldcap(cs); | |||
default: assert(0); return 0; | |||
case Cbackref: res = backrefcap(cs); break; | |||
case Ctable: res = tablecap(cs); break; | |||
case Cfunction: res = functioncap(cs); break; | |||
case Cnum: res = numcap(cs); break; | |||
case Cquery: res = querycap(cs); break; | |||
case Cfold: res = foldcap(cs); break; | |||
default: assert(0); res = 0; | |||
} | |||
cs->reclevel--; | |||
return res; | |||
} | |||
@@ -521,7 +539,7 @@ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { | |||
int n = 0; | |||
if (!isclosecap(capture)) { /* is there any capture? */ | |||
CapState cs; | |||
cs.ocap = cs.cap = capture; cs.L = L; | |||
cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; | |||
cs.s = s; cs.valuecached = 0; cs.ptop = ptop; | |||
do { /* collect their values */ | |||
n += pushcapture(&cs); |
@@ -1,5 +1,5 @@ | |||
/* | |||
** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ | |||
** $Id: lpcap.h $ | |||
*/ | |||
#if !defined(lpcap_h) | |||
@@ -11,8 +11,21 @@ | |||
/* kinds of captures */ | |||
typedef enum CapKind { | |||
Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, | |||
Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup | |||
Cclose, /* not used in trees */ | |||
Cposition, | |||
Cconst, /* ktable[key] is Lua constant */ | |||
Cbackref, /* ktable[key] is "name" of group to get capture */ | |||
Carg, /* 'key' is arg's number */ | |||
Csimple, /* next node is pattern */ | |||
Ctable, /* next node is pattern */ | |||
Cfunction, /* ktable[key] is function; next node is pattern */ | |||
Cquery, /* ktable[key] is table; next node is pattern */ | |||
Cstring, /* ktable[key] is string; next node is pattern */ | |||
Cnum, /* numbered capture; 'key' is number of value to return */ | |||
Csubst, /* substitution capture; next node is pattern */ | |||
Cfold, /* ktable[key] is function; next node is pattern */ | |||
Cruntime, /* not used in trees (is uses another type for tree) */ | |||
Cgroup /* ktable[key] is group's "name" */ | |||
} CapKind; | |||
@@ -31,6 +44,7 @@ typedef struct CapState { | |||
int ptop; /* index of last argument to 'match' */ | |||
const char *s; /* original string */ | |||
int valuecached; /* value stored in cache slot */ | |||
int reclevel; /* recursion level */ | |||
} CapState; | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ | |||
** $Id: lpcode.c $ | |||
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | |||
*/ | |||
@@ -125,6 +125,27 @@ int tocharset (TTree *tree, Charset *cs) { | |||
} | |||
/* | |||
** Visit a TCall node taking care to stop recursion. If node not yet | |||
** visited, return 'f(sib2(tree))', otherwise return 'def' (default | |||
** value) | |||
*/ | |||
static int callrecursive (TTree *tree, int f (TTree *t), int def) { | |||
int key = tree->key; | |||
assert(tree->tag == TCall); | |||
assert(sib2(tree)->tag == TRule); | |||
if (key == 0) /* node already visited? */ | |||
return def; /* return default value */ | |||
else { /* first visit */ | |||
int result; | |||
tree->key = 0; /* mark call as already visited */ | |||
result = f(sib2(tree)); /* go to called rule */ | |||
tree->key = key; /* restore tree */ | |||
return result; | |||
} | |||
} | |||
/* | |||
** Check whether a pattern tree has captures | |||
*/ | |||
@@ -134,14 +155,17 @@ int hascaptures (TTree *tree) { | |||
case TCapture: case TRunTime: | |||
return 1; | |||
case TCall: | |||
tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ | |||
return callrecursive(tree, hascaptures, 0); | |||
case TRule: /* do not follow siblings */ | |||
tree = sib1(tree); goto tailcall; | |||
case TOpenCall: assert(0); | |||
default: { | |||
switch (numsiblings[tree->tag]) { | |||
case 1: /* return hascaptures(sib1(tree)); */ | |||
tree = sib1(tree); goto tailcall; | |||
case 2: | |||
if (hascaptures(sib1(tree))) return 1; | |||
if (hascaptures(sib1(tree))) | |||
return 1; | |||
/* else return hascaptures(sib2(tree)); */ | |||
tree = sib2(tree); goto tailcall; | |||
default: assert(numsiblings[tree->tag] == 0); return 0; | |||
@@ -208,9 +232,9 @@ int checkaux (TTree *tree, int pred) { | |||
/* | |||
** number of characters to match a pattern (or -1 if variable) | |||
** ('count' avoids infinite loops for grammars) | |||
*/ | |||
int fixedlenx (TTree *tree, int count, int len) { | |||
int fixedlen (TTree *tree) { | |||
int len = 0; /* to accumulate in tail calls */ | |||
tailcall: | |||
switch (tree->tag) { | |||
case TChar: case TSet: case TAny: | |||
@@ -220,26 +244,29 @@ int fixedlenx (TTree *tree, int count, int len) { | |||
case TRep: case TRunTime: case TOpenCall: | |||
return -1; | |||
case TCapture: case TRule: case TGrammar: | |||
/* return fixedlenx(sib1(tree), count); */ | |||
/* return fixedlen(sib1(tree)); */ | |||
tree = sib1(tree); goto tailcall; | |||
case TCall: | |||
if (count++ >= MAXRULES) | |||
return -1; /* may be a loop */ | |||
/* else return fixedlenx(sib2(tree), count); */ | |||
tree = sib2(tree); goto tailcall; | |||
case TCall: { | |||
int n1 = callrecursive(tree, fixedlen, -1); | |||
if (n1 < 0) | |||
return -1; | |||
else | |||
return len + n1; | |||
} | |||
case TSeq: { | |||
len = fixedlenx(sib1(tree), count, len); | |||
if (len < 0) return -1; | |||
/* else return fixedlenx(sib2(tree), count, len); */ | |||
tree = sib2(tree); goto tailcall; | |||
int n1 = fixedlen(sib1(tree)); | |||
if (n1 < 0) | |||
return -1; | |||
/* else return fixedlen(sib2(tree)) + len; */ | |||
len += n1; tree = sib2(tree); goto tailcall; | |||
} | |||
case TChoice: { | |||
int n1, n2; | |||
n1 = fixedlenx(sib1(tree), count, len); | |||
if (n1 < 0) return -1; | |||
n2 = fixedlenx(sib2(tree), count, len); | |||
if (n1 == n2) return n1; | |||
else return -1; | |||
int n1 = fixedlen(sib1(tree)); | |||
int n2 = fixedlen(sib2(tree)); | |||
if (n1 != n2 || n1 < 0) | |||
return -1; | |||
else | |||
return len + n1; | |||
} | |||
default: assert(0); return 0; | |||
}; | |||
@@ -248,7 +275,7 @@ int fixedlenx (TTree *tree, int count, int len) { | |||
/* | |||
** Computes the 'first set' of a pattern. | |||
** The result is a conservative approximation: | |||
** The result is a conservative aproximation: | |||
** match p ax -> x (for some x) ==> a belongs to first(p) | |||
** or | |||
** a not in first(p) ==> match p ax -> fail (for all x) | |||
@@ -710,9 +737,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { | |||
/* | |||
** Captures: if pattern has fixed (and not too big) length, use | |||
** a single IFullCapture instruction after the match; otherwise, | |||
** enclose the pattern with OpenCapture - CloseCapture. | |||
** Captures: if pattern has fixed (and not too big) length, and it | |||
** has no nested captures, use a single IFullCapture instruction | |||
** after the match; otherwise, enclose the pattern with OpenCapture - | |||
** CloseCapture. | |||
*/ | |||
static void codecapture (CompileState *compst, TTree *tree, int tt, | |||
const Charset *fl) { | |||
@@ -737,13 +765,13 @@ static void coderuntime (CompileState *compst, TTree *tree, int tt) { | |||
/* | |||
** Repetition; optimizations: | |||
** Repetion; optimizations: | |||
** When pattern is a charset, can use special instruction ISpan. | |||
** When pattern is head fail, or if it starts with characters that | |||
** are disjoint from what follows the repetions, a simple test | |||
** is enough (a fail inside the repetition would backtrack to fail | |||
** again in the following pattern, so there is no need for a choice). | |||
** When 'opt' is true, the repetition can reuse the Choice already | |||
** When 'opt' is true, the repetion can reuse the Choice already | |||
** active in the stack. | |||
*/ | |||
static void coderep (CompileState *compst, TTree *tree, int opt, | |||
@@ -884,7 +912,7 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, | |||
/* | |||
** Main code-generation function: dispatch to auxiliary functions | |||
** Main code-generation function: dispatch to auxiliar functions | |||
** according to kind of tree. ('needfollow' should return true | |||
** only for consructions that use 'fl'.) | |||
*/ |
@@ -1,5 +1,5 @@ | |||
/* | |||
** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ | |||
** $Id: lpcode.h $ | |||
*/ | |||
#if !defined(lpcode_h) | |||
@@ -13,7 +13,7 @@ | |||
int tocharset (TTree *tree, Charset *cs); | |||
int checkaux (TTree *tree, int pred); | |||
int fixedlenx (TTree *tree, int count, int len); | |||
int fixedlen (TTree *tree); | |||
int hascaptures (TTree *tree); | |||
int lp_gc (lua_State *L); | |||
Instruction *compile (lua_State *L, Pattern *p); | |||
@@ -35,8 +35,6 @@ int sizei (const Instruction *i); | |||
*/ | |||
#define nullable(t) checkaux(t, PEnullable) | |||
#define fixedlen(t) fixedlenx(t, 0, 0) | |||
#endif |
@@ -151,16 +151,29 @@ typedef struct Stack { | |||
/* | |||
** Double the size of the array of captures | |||
** Ensures the size of array 'capture' (with size '*capsize' and | |||
** 'captop' elements being used) is enough to accomodate 'n' extra | |||
** elements plus one. (Because several opcodes add stuff to the capture | |||
** array, it is simpler to ensure the array always has at least one free | |||
** slot upfront and check its size later.) | |||
*/ | |||
static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { | |||
Capture *newc; | |||
if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | |||
luaL_error(L, "too many captures"); | |||
newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | |||
memcpy(newc, cap, captop * sizeof(Capture)); | |||
lua_replace(L, caplistidx(ptop)); | |||
return newc; | |||
static Capture *growcap (lua_State *L, Capture *capture, int *capsize, | |||
int captop, int n, int ptop) { | |||
if (*capsize - captop > n) | |||
return capture; /* no need to grow array */ | |||
else { /* must grow */ | |||
Capture *newc; | |||
int newsize = captop + n + 1; /* minimum size needed */ | |||
if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) | |||
newsize *= 2; /* twice that size, if not too big */ | |||
else if (newsize >= INT_MAX/((int)sizeof(Capture))) | |||
luaL_error(L, "too many captures"); | |||
newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); | |||
memcpy(newc, capture, captop * sizeof(Capture)); | |||
*capsize = newsize; | |||
lua_replace(L, caplistidx(ptop)); | |||
return newc; | |||
} | |||
} | |||
@@ -213,24 +226,24 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
/* | |||
** Add capture values returned by a dynamic capture to the capture list | |||
** 'base', nested inside a group capture. 'fd' indexes the first capture | |||
** value, 'n' is the number of values (at least 1). | |||
** Add capture values returned by a dynamic capture to the list | |||
** 'capture', nested inside a group. 'fd' indexes the first capture | |||
** value, 'n' is the number of values (at least 1). The open group | |||
** capture is already in 'capture', before the place for the new entries. | |||
*/ | |||
static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | |||
static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | |||
int i; | |||
/* Cgroup capture is already there */ | |||
assert(base[0].kind == Cgroup && base[0].siz == 0); | |||
base[0].idx = 0; /* make it an anonymous group */ | |||
for (i = 1; i <= n; i++) { /* add runtime captures */ | |||
base[i].kind = Cruntime; | |||
base[i].siz = 1; /* mark it as closed */ | |||
base[i].idx = fd + i - 1; /* stack index of capture value */ | |||
base[i].s = s; | |||
assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); | |||
capture[-1].idx = 0; /* make group capture an anonymous group */ | |||
for (i = 0; i < n; i++) { /* add runtime captures */ | |||
capture[i].kind = Cruntime; | |||
capture[i].siz = 1; /* mark it as closed */ | |||
capture[i].idx = fd + i; /* stack index of capture value */ | |||
capture[i].s = s; | |||
} | |||
base[i].kind = Cclose; /* close group */ | |||
base[i].siz = 1; | |||
base[i].s = s; | |||
capture[n].kind = Cclose; /* close group */ | |||
capture[n].siz = 1; | |||
capture[n].s = s; | |||
} | |||
@@ -268,9 +281,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
for (;;) { | |||
#if defined(DEBUG) | |||
printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | |||
s, stack - getstackbase(L, ptop), ndyncap, captop); | |||
s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | |||
printinst(op, p); | |||
printcaplist(capture, capture + captop); | |||
#endif | |||
assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | |||
switch ((Opcode)p->i.code) { | |||
@@ -418,23 +430,27 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
CapState cs; | |||
int rem, res, n; | |||
int fr = lua_gettop(L) + 1; /* stack index of first result */ | |||
cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | |||
cs.reclevel = 0; cs.L = L; | |||
cs.s = o; cs.ocap = capture; cs.ptop = ptop; | |||
n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | |||
captop -= n; /* remove nested captures */ | |||
ndyncap -= rem; /* update number of dynamic captures */ | |||
fr -= rem; /* 'rem' items were popped from Lua stack */ | |||
res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | |||
if (res == -1) /* fail? */ | |||
goto fail; | |||
s = o + res; /* else update current position */ | |||
n = lua_gettop(L) - fr + 1; /* number of new captures */ | |||
ndyncap += n - rem; /* update number of dynamic captures */ | |||
if (n > 0) { /* any new capture? */ | |||
if ((captop += n + 2) >= capsize) { | |||
capture = doublecap(L, capture, captop, ptop); | |||
capsize = 2 * captop; | |||
} | |||
/* add new captures to 'capture' list */ | |||
adddyncaptures(s, capture + captop - n - 2, n, fr); | |||
ndyncap += n; /* update number of dynamic captures */ | |||
if (n == 0) /* no new captures? */ | |||
captop--; /* remove open group */ | |||
else { /* new captures; keep original open group */ | |||
if (fr + n >= SHRT_MAX) | |||
luaL_error(L, "too many results in match-time capture"); | |||
/* add new captures + close group to 'capture' list */ | |||
capture = growcap(L, capture, &capsize, captop, n + 1, ptop); | |||
adddyncaptures(s, capture + captop, n, fr); | |||
captop += n + 1; /* new captures + close group */ | |||
} | |||
p++; | |||
continue; | |||
@@ -466,10 +482,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
pushcapture: { | |||
capture[captop].idx = p->i.key; | |||
capture[captop].kind = getkind(p); | |||
if (++captop >= capsize) { | |||
capture = doublecap(L, capture, captop, ptop); | |||
capsize = 2 * captop; | |||
} | |||
captop++; | |||
capture = growcap(L, capture, &capsize, captop, 0, ptop); | |||
p++; | |||
continue; | |||
} |