From 175d7ac25a9d638e859d677d662ced1cc746c6ce Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 9 Jul 2020 13:08:39 +0100 Subject: [PATCH] [Minor] Update lpeg to the upstream --- contrib/lua-lpeg/lpcap.c | 64 +++++++++++++++++---------- contrib/lua-lpeg/lpcap.h | 20 +++++++-- contrib/lua-lpeg/lpcode.c | 84 +++++++++++++++++++++++------------ contrib/lua-lpeg/lpcode.h | 6 +-- contrib/lua-lpeg/lpvm.c | 92 ++++++++++++++++++++++----------------- 5 files changed, 169 insertions(+), 97 deletions(-) diff --git a/contrib/lua-lpeg/lpcap.c b/contrib/lua-lpeg/lpcap.c index c9085de06..b332fde49 100644 --- a/contrib/lua-lpeg/lpcap.c +++ b/contrib/lua-lpeg/lpcap.c @@ -1,5 +1,5 @@ /* -** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ +** $Id: lpcap.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -271,15 +271,15 @@ int finddyncap (Capture *cap, Capture *last) { /* -** Calls a runtime capture. Returns number of captures removed by -** the call, including the initial Cgroup. (Captures to be added are -** on the Lua stack.) +** Calls a runtime capture. Returns number of captures "removed" by the +** call, that is, those inside the group capture. Captures to be added +** are on the Lua stack. */ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { int n, id; lua_State *L = cs->L; int otop = lua_gettop(L); - Capture *open = findopen(close); + Capture *open = findopen(close); /* get open group capture */ assert(captype(open) == Cgroup); id = finddyncap(open, close); /* get first dynamic capture argument */ close->kind = Cclose; /* closes the group */ @@ -299,7 +299,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { } else *rem = 0; /* no dynamic captures removed */ - return close - open; /* number of captures of all kinds removed */ + return close - open - 1; /* number of captures to be removed */ } @@ -441,70 +441,88 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { } +#if !defined(MAXRECLEVEL) +#define MAXRECLEVEL 200 +#endif + + /* ** Push all values of the current capture into the stack; returns ** number of values pushed */ static int pushcapture (CapState *cs) { lua_State *L = cs->L; + int res; luaL_checkstack(L, 4, "too many captures"); + if (cs->reclevel++ > MAXRECLEVEL) + return luaL_error(L, "subcapture nesting too deep"); switch (captype(cs->cap)) { case Cposition: { lua_pushinteger(L, cs->cap->s - cs->s + 1); cs->cap++; - return 1; + res = 1; + break; } case Cconst: { pushluaval(cs); cs->cap++; - return 1; + res = 1; + break; } case Carg: { int arg = (cs->cap++)->idx; if (arg + FIXEDARGS > cs->ptop) return luaL_error(L, "reference to absent extra argument #%d", arg); lua_pushvalue(L, arg + FIXEDARGS); - return 1; + res = 1; + break; } case Csimple: { int k = pushnestedvalues(cs, 1); lua_insert(L, -k); /* make whole match be first result */ - return k; + res = k; + break; } case Cruntime: { lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ - return 1; + res = 1; + break; } case Cstring: { luaL_Buffer b; luaL_buffinit(L, &b); stringcap(&b, cs); luaL_pushresult(&b); - return 1; + res = 1; + break; } case Csubst: { luaL_Buffer b; luaL_buffinit(L, &b); substcap(&b, cs); luaL_pushresult(&b); - return 1; + res = 1; + break; } case Cgroup: { if (cs->cap->idx == 0) /* anonymous group? */ - return pushnestedvalues(cs, 0); /* add all nested values */ + res = pushnestedvalues(cs, 0); /* add all nested values */ else { /* named group: add no values */ nextcap(cs); /* skip capture */ - return 0; + res = 0; } + break; } - case Cbackref: return backrefcap(cs); - case Ctable: return tablecap(cs); - case Cfunction: return functioncap(cs); - case Cnum: return numcap(cs); - case Cquery: return querycap(cs); - case Cfold: return foldcap(cs); - default: assert(0); return 0; + case Cbackref: res = backrefcap(cs); break; + case Ctable: res = tablecap(cs); break; + case Cfunction: res = functioncap(cs); break; + case Cnum: res = numcap(cs); break; + case Cquery: res = querycap(cs); break; + case Cfold: res = foldcap(cs); break; + default: assert(0); res = 0; } + cs->reclevel--; + return res; } @@ -521,7 +539,7 @@ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { int n = 0; if (!isclosecap(capture)) { /* is there any capture? */ CapState cs; - cs.ocap = cs.cap = capture; cs.L = L; + cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; cs.s = s; cs.valuecached = 0; cs.ptop = ptop; do { /* collect their values */ n += pushcapture(&cs); diff --git a/contrib/lua-lpeg/lpcap.h b/contrib/lua-lpeg/lpcap.h index d762fdcfa..dc10d6969 100644 --- a/contrib/lua-lpeg/lpcap.h +++ b/contrib/lua-lpeg/lpcap.h @@ -1,5 +1,5 @@ /* -** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ +** $Id: lpcap.h $ */ #if !defined(lpcap_h) @@ -11,8 +11,21 @@ /* kinds of captures */ typedef enum CapKind { - Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, - Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup + Cclose, /* not used in trees */ + Cposition, + Cconst, /* ktable[key] is Lua constant */ + Cbackref, /* ktable[key] is "name" of group to get capture */ + Carg, /* 'key' is arg's number */ + Csimple, /* next node is pattern */ + Ctable, /* next node is pattern */ + Cfunction, /* ktable[key] is function; next node is pattern */ + Cquery, /* ktable[key] is table; next node is pattern */ + Cstring, /* ktable[key] is string; next node is pattern */ + Cnum, /* numbered capture; 'key' is number of value to return */ + Csubst, /* substitution capture; next node is pattern */ + Cfold, /* ktable[key] is function; next node is pattern */ + Cruntime, /* not used in trees (is uses another type for tree) */ + Cgroup /* ktable[key] is group's "name" */ } CapKind; @@ -31,6 +44,7 @@ typedef struct CapState { int ptop; /* index of last argument to 'match' */ const char *s; /* original string */ int valuecached; /* value stored in cache slot */ + int reclevel; /* recursion level */ } CapState; diff --git a/contrib/lua-lpeg/lpcode.c b/contrib/lua-lpeg/lpcode.c index 6feefeb43..392345972 100644 --- a/contrib/lua-lpeg/lpcode.c +++ b/contrib/lua-lpeg/lpcode.c @@ -1,5 +1,5 @@ /* -** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ +** $Id: lpcode.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -125,6 +125,27 @@ int tocharset (TTree *tree, Charset *cs) { } +/* +** Visit a TCall node taking care to stop recursion. If node not yet +** visited, return 'f(sib2(tree))', otherwise return 'def' (default +** value) +*/ +static int callrecursive (TTree *tree, int f (TTree *t), int def) { + int key = tree->key; + assert(tree->tag == TCall); + assert(sib2(tree)->tag == TRule); + if (key == 0) /* node already visited? */ + return def; /* return default value */ + else { /* first visit */ + int result; + tree->key = 0; /* mark call as already visited */ + result = f(sib2(tree)); /* go to called rule */ + tree->key = key; /* restore tree */ + return result; + } +} + + /* ** Check whether a pattern tree has captures */ @@ -134,14 +155,17 @@ int hascaptures (TTree *tree) { case TCapture: case TRunTime: return 1; case TCall: - tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ + return callrecursive(tree, hascaptures, 0); + case TRule: /* do not follow siblings */ + tree = sib1(tree); goto tailcall; case TOpenCall: assert(0); default: { switch (numsiblings[tree->tag]) { case 1: /* return hascaptures(sib1(tree)); */ tree = sib1(tree); goto tailcall; case 2: - if (hascaptures(sib1(tree))) return 1; + if (hascaptures(sib1(tree))) + return 1; /* else return hascaptures(sib2(tree)); */ tree = sib2(tree); goto tailcall; default: assert(numsiblings[tree->tag] == 0); return 0; @@ -208,9 +232,9 @@ int checkaux (TTree *tree, int pred) { /* ** number of characters to match a pattern (or -1 if variable) -** ('count' avoids infinite loops for grammars) */ -int fixedlenx (TTree *tree, int count, int len) { +int fixedlen (TTree *tree) { + int len = 0; /* to accumulate in tail calls */ tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: @@ -220,26 +244,29 @@ int fixedlenx (TTree *tree, int count, int len) { case TRep: case TRunTime: case TOpenCall: return -1; case TCapture: case TRule: case TGrammar: - /* return fixedlenx(sib1(tree), count); */ + /* return fixedlen(sib1(tree)); */ tree = sib1(tree); goto tailcall; - case TCall: - if (count++ >= MAXRULES) - return -1; /* may be a loop */ - /* else return fixedlenx(sib2(tree), count); */ - tree = sib2(tree); goto tailcall; + case TCall: { + int n1 = callrecursive(tree, fixedlen, -1); + if (n1 < 0) + return -1; + else + return len + n1; + } case TSeq: { - len = fixedlenx(sib1(tree), count, len); - if (len < 0) return -1; - /* else return fixedlenx(sib2(tree), count, len); */ - tree = sib2(tree); goto tailcall; + int n1 = fixedlen(sib1(tree)); + if (n1 < 0) + return -1; + /* else return fixedlen(sib2(tree)) + len; */ + len += n1; tree = sib2(tree); goto tailcall; } case TChoice: { - int n1, n2; - n1 = fixedlenx(sib1(tree), count, len); - if (n1 < 0) return -1; - n2 = fixedlenx(sib2(tree), count, len); - if (n1 == n2) return n1; - else return -1; + int n1 = fixedlen(sib1(tree)); + int n2 = fixedlen(sib2(tree)); + if (n1 != n2 || n1 < 0) + return -1; + else + return len + n1; } default: assert(0); return 0; }; @@ -248,7 +275,7 @@ int fixedlenx (TTree *tree, int count, int len) { /* ** Computes the 'first set' of a pattern. -** The result is a conservative approximation: +** The result is a conservative aproximation: ** match p ax -> x (for some x) ==> a belongs to first(p) ** or ** a not in first(p) ==> match p ax -> fail (for all x) @@ -710,9 +737,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { /* -** Captures: if pattern has fixed (and not too big) length, use -** a single IFullCapture instruction after the match; otherwise, -** enclose the pattern with OpenCapture - CloseCapture. +** Captures: if pattern has fixed (and not too big) length, and it +** has no nested captures, use a single IFullCapture instruction +** after the match; otherwise, enclose the pattern with OpenCapture - +** CloseCapture. */ static void codecapture (CompileState *compst, TTree *tree, int tt, const Charset *fl) { @@ -737,13 +765,13 @@ static void coderuntime (CompileState *compst, TTree *tree, int tt) { /* -** Repetition; optimizations: +** Repetion; optimizations: ** When pattern is a charset, can use special instruction ISpan. ** When pattern is head fail, or if it starts with characters that ** are disjoint from what follows the repetions, a simple test ** is enough (a fail inside the repetition would backtrack to fail ** again in the following pattern, so there is no need for a choice). -** When 'opt' is true, the repetition can reuse the Choice already +** When 'opt' is true, the repetion can reuse the Choice already ** active in the stack. */ static void coderep (CompileState *compst, TTree *tree, int opt, @@ -884,7 +912,7 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, /* -** Main code-generation function: dispatch to auxiliary functions +** Main code-generation function: dispatch to auxiliar functions ** according to kind of tree. ('needfollow' should return true ** only for consructions that use 'fl'.) */ diff --git a/contrib/lua-lpeg/lpcode.h b/contrib/lua-lpeg/lpcode.h index 896d3c79a..34ee27637 100644 --- a/contrib/lua-lpeg/lpcode.h +++ b/contrib/lua-lpeg/lpcode.h @@ -1,5 +1,5 @@ /* -** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ +** $Id: lpcode.h $ */ #if !defined(lpcode_h) @@ -13,7 +13,7 @@ int tocharset (TTree *tree, Charset *cs); int checkaux (TTree *tree, int pred); -int fixedlenx (TTree *tree, int count, int len); +int fixedlen (TTree *tree); int hascaptures (TTree *tree); int lp_gc (lua_State *L); Instruction *compile (lua_State *L, Pattern *p); @@ -35,8 +35,6 @@ int sizei (const Instruction *i); */ #define nullable(t) checkaux(t, PEnullable) -#define fixedlen(t) fixedlenx(t, 0, 0) - #endif diff --git a/contrib/lua-lpeg/lpvm.c b/contrib/lua-lpeg/lpvm.c index e107292e2..3e67fcaf0 100644 --- a/contrib/lua-lpeg/lpvm.c +++ b/contrib/lua-lpeg/lpvm.c @@ -151,16 +151,29 @@ typedef struct Stack { /* -** Double the size of the array of captures +** Ensures the size of array 'capture' (with size '*capsize' and +** 'captop' elements being used) is enough to accomodate 'n' extra +** elements plus one. (Because several opcodes add stuff to the capture +** array, it is simpler to ensure the array always has at least one free +** slot upfront and check its size later.) */ -static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { - Capture *newc; - if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) - luaL_error(L, "too many captures"); - newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); - memcpy(newc, cap, captop * sizeof(Capture)); - lua_replace(L, caplistidx(ptop)); - return newc; +static Capture *growcap (lua_State *L, Capture *capture, int *capsize, + int captop, int n, int ptop) { + if (*capsize - captop > n) + return capture; /* no need to grow array */ + else { /* must grow */ + Capture *newc; + int newsize = captop + n + 1; /* minimum size needed */ + if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) + newsize *= 2; /* twice that size, if not too big */ + else if (newsize >= INT_MAX/((int)sizeof(Capture))) + luaL_error(L, "too many captures"); + newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); + memcpy(newc, capture, captop * sizeof(Capture)); + *capsize = newsize; + lua_replace(L, caplistidx(ptop)); + return newc; + } } @@ -213,24 +226,24 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { /* -** Add capture values returned by a dynamic capture to the capture list -** 'base', nested inside a group capture. 'fd' indexes the first capture -** value, 'n' is the number of values (at least 1). +** Add capture values returned by a dynamic capture to the list +** 'capture', nested inside a group. 'fd' indexes the first capture +** value, 'n' is the number of values (at least 1). The open group +** capture is already in 'capture', before the place for the new entries. */ -static void adddyncaptures (const char *s, Capture *base, int n, int fd) { +static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { int i; - /* Cgroup capture is already there */ - assert(base[0].kind == Cgroup && base[0].siz == 0); - base[0].idx = 0; /* make it an anonymous group */ - for (i = 1; i <= n; i++) { /* add runtime captures */ - base[i].kind = Cruntime; - base[i].siz = 1; /* mark it as closed */ - base[i].idx = fd + i - 1; /* stack index of capture value */ - base[i].s = s; + assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); + capture[-1].idx = 0; /* make group capture an anonymous group */ + for (i = 0; i < n; i++) { /* add runtime captures */ + capture[i].kind = Cruntime; + capture[i].siz = 1; /* mark it as closed */ + capture[i].idx = fd + i; /* stack index of capture value */ + capture[i].s = s; } - base[i].kind = Cclose; /* close group */ - base[i].siz = 1; - base[i].s = s; + capture[n].kind = Cclose; /* close group */ + capture[n].siz = 1; + capture[n].s = s; } @@ -268,9 +281,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, for (;;) { #if defined(DEBUG) printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", - s, stack - getstackbase(L, ptop), ndyncap, captop); + s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); printinst(op, p); - printcaplist(capture, capture + captop); #endif assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); switch ((Opcode)p->i.code) { @@ -418,23 +430,27 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, CapState cs; int rem, res, n; int fr = lua_gettop(L) + 1; /* stack index of first result */ - cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; + cs.reclevel = 0; cs.L = L; + cs.s = o; cs.ocap = capture; cs.ptop = ptop; n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ captop -= n; /* remove nested captures */ + ndyncap -= rem; /* update number of dynamic captures */ fr -= rem; /* 'rem' items were popped from Lua stack */ res = resdyncaptures(L, fr, s - o, e - o); /* get result */ if (res == -1) /* fail? */ goto fail; s = o + res; /* else update current position */ n = lua_gettop(L) - fr + 1; /* number of new captures */ - ndyncap += n - rem; /* update number of dynamic captures */ - if (n > 0) { /* any new capture? */ - if ((captop += n + 2) >= capsize) { - capture = doublecap(L, capture, captop, ptop); - capsize = 2 * captop; - } - /* add new captures to 'capture' list */ - adddyncaptures(s, capture + captop - n - 2, n, fr); + ndyncap += n; /* update number of dynamic captures */ + if (n == 0) /* no new captures? */ + captop--; /* remove open group */ + else { /* new captures; keep original open group */ + if (fr + n >= SHRT_MAX) + luaL_error(L, "too many results in match-time capture"); + /* add new captures + close group to 'capture' list */ + capture = growcap(L, capture, &capsize, captop, n + 1, ptop); + adddyncaptures(s, capture + captop, n, fr); + captop += n + 1; /* new captures + close group */ } p++; continue; @@ -466,10 +482,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, pushcapture: { capture[captop].idx = p->i.key; capture[captop].kind = getkind(p); - if (++captop >= capsize) { - capture = doublecap(L, capture, captop, ptop); - capsize = 2 * captop; - } + captop++; + capture = growcap(L, capture, &capsize, captop, 0, ptop); p++; continue; } -- 2.39.5