From 8164a21476e2f343a761c30f05c065199ea092ba Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 13 Mar 2017 11:37:14 +0000 Subject: [PATCH] [Fix] Update issues in ac-trie --- contrib/aho-corasick/acism.c | 42 ++++++++--------- contrib/aho-corasick/acism_create.c | 70 ++++++++--------------------- 2 files changed, 39 insertions(+), 73 deletions(-) diff --git a/contrib/aho-corasick/acism.c b/contrib/aho-corasick/acism.c index a0b478ad8..e2b48a590 100644 --- a/contrib/aho-corasick/acism.c +++ b/contrib/aho-corasick/acism.c @@ -24,20 +24,20 @@ #define BACK ((SYMBOL)0) #define ROOT ((STATE) 0) +extern const guchar lc_map[256]; int acism_lookup(ac_trie_t const *psp, const char *text, size_t len, ACISM_ACTION *cb, void *context, int *statep, bool caseless) { - ac_trie_t const ps = *psp; char const *cp = text, *endp = cp + len; uint8_t s; STATE state = *statep; int ret = 0; while (cp < endp) { - s = caseless ? g_ascii_tolower (*cp++) : *cp++; - _SYMBOL sym = ps.symv[s]; + s = caseless ? lc_map[(guint8)*cp++] : *cp++; + _SYMBOL sym = psp->symv[s]; if (!sym) { // Input byte is not in any pattern string. state = ROOT; @@ -48,17 +48,17 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len, // following the backref chain. TRAN next; - while (!t_valid(&ps, next = p_tran(&ps, state, sym)) && state != ROOT) { - TRAN back = p_tran(&ps, state, BACK); - state = t_valid(&ps, back) ? t_next(&ps, back) : ROOT; + while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) { + TRAN back = p_tran(psp, state, BACK); + state = t_valid(psp, back) ? t_next(psp, back) : ROOT; } - if (!t_valid(&ps, next)) + if (!t_valid(psp, next)) continue; if (!(next & (IS_MATCH | IS_SUFFIX))) { // No complete match yet; keep going. - state = t_next(&ps, next); + state = t_next(psp, next); continue; } @@ -73,27 +73,27 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len, // Initially state is ROOT. The chain search saves the // first state from which the next char has a transition. - state = t_isleaf(&ps, next) ? 0 : t_next(&ps, next); + state = t_isleaf(psp, next) ? 0 : t_next(psp, next); while (1) { - if (t_valid(&ps, next)) { + if (t_valid(psp, next)) { if (next & IS_MATCH) { unsigned strno, ss = s + sym, i; - if (t_isleaf(&ps, ps.tranv[ss])) { - strno = t_strno(&ps, ps.tranv[ss]); + if (t_isleaf(psp, psp->tranv[ss])) { + strno = t_strno(psp, psp->tranv[ss]); } else { - for (i = p_hash(&ps, ss); ps.hashv[i].state != ss; ++i); - strno = ps.hashv[i].strno; + for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i); + strno = psp->hashv[i].strno; } if ((ret = cb(strno, cp - text, context))) goto EXIT; } - if (!state && !t_isleaf(&ps, next)) - state = t_next(&ps, next); + if (!state && !t_isleaf(psp, next)) + state = t_next(psp, next); if ( state && !(next & IS_SUFFIX)) break; } @@ -101,9 +101,9 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len, if (s == ROOT) break; - TRAN b = p_tran(&ps, s, BACK); - s = t_valid(&ps, b) ? t_next(&ps, b) : ROOT; - next = p_tran(&ps, s, sym); + TRAN b = p_tran(psp, s, BACK); + s = t_valid(psp, b) ? t_next(psp, b) : ROOT; + next = p_tran(psp, s, sym); } } EXIT: @@ -118,7 +118,7 @@ acism_destroy(ac_trie_t *psp) if (psp->flags & IS_MMAP) munmap((char*)psp->tranv - sizeof(ac_trie_t), sizeof(ac_trie_t) + p_size(psp)); - else free(psp->tranv); - free(psp); + else g_free(psp->tranv); + g_free(psp); } //EOF diff --git a/contrib/aho-corasick/acism_create.c b/contrib/aho-corasick/acism_create.c index a6d007fd7..b12589ae6 100644 --- a/contrib/aho-corasick/acism_create.c +++ b/contrib/aho-corasick/acism_create.c @@ -40,7 +40,6 @@ static inline int bitwid(unsigned u) static void fill_symv(ac_trie_t*, ac_trie_pat_t const*, int ns); static int create_tree(TNODE*, SYMBOL const*symv, ac_trie_pat_t const*strv, int nstrs); static void add_backlinks(TNODE*, TNODE**, TNODE**); -static void prune_backlinks(TNODE*); static int interleave(TNODE*, int nnodes, int nsyms, TNODE**, TNODE**); static void fill_tranv(ac_trie_t*, TNODE const*); static void fill_hashv(ac_trie_t*, TNODE const*, int nn); @@ -72,22 +71,23 @@ ac_trie_t* acism_create(ac_trie_pat_t const* strv, int nstrs) { TNODE *tp, **v1 = NULL, **v2 = NULL; - ac_trie_t *psp = calloc(1, sizeof*psp); + ac_trie_t *psp = g_malloc0(sizeof(*psp)); fill_symv(psp, strv, nstrs); - TNODE *troot = calloc(psp->nchars + 1, sizeof*troot); + TNODE *troot = g_new0(TNODE, psp->nchars + 1); int nnodes = create_tree(troot, psp->symv, strv, nstrs); NOTE(nnodes); // v1, v2: breadth-first work vectors for add_backlink and interleave. - int nhash, i = (nstrs + 1) * sizeof*tp; - add_backlinks(troot, v1 = malloc(i), v2 = malloc(i)); + int nhash = 0, i = (nstrs + 1) * sizeof*tp; - for (tp = troot + nnodes, nhash = 0; --tp > troot;) { - prune_backlinks(tp); + for (tp = troot + nnodes; --tp > troot;) { nhash += tp->match && tp->child; } + v1 = g_malloc(i); + v2 = g_malloc(i); + add_backlinks(troot, v1, v2); // Calculate each node's offset in tranv[]: psp->tran_size = interleave(troot, nnodes, psp->nsyms, v1, v2); @@ -102,7 +102,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs) psp->hash_size = psp->hash_mod + nhash; } - set_tranv(psp, calloc(p_size(psp), 1)); + set_tranv(psp, g_malloc0(p_size(psp))); if (!psp->tranv) goto FAIL; fill_tranv(psp, troot); // The root state (0) must not look like a valid backref. @@ -116,7 +116,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs) psp->hash_size = psp->hash_mod; while ( psp->hashv[psp->hash_size].state) ++psp->hash_size; while (!psp->hashv[psp->hash_size - 1].state) --psp->hash_size; - set_tranv(psp, realloc(psp->tranv, p_size(psp))); + set_tranv(psp, g_realloc(psp->tranv, p_size(psp))); } // Diagnostics/statistics only: @@ -126,7 +126,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs) goto DONE; FAIL: acism_destroy(psp), psp = NULL; -DONE: free(troot), free(v1), free(v2); +DONE: g_free(troot), g_free(v1), g_free(v2); return psp; } @@ -151,7 +151,7 @@ fill_symv(ac_trie_t *psp, ac_trie_pat_t const *strv, int nstrs) ++psp->nsyms; #if ACISM_SIZE < 8 psp->sym_bits = bitwid(psp->nsyms); - psp->sym_mask = ~(-1 << psp->sym_bits); + psp->sym_mask = ~(((TRAN)-1) << psp->sym_bits); #endif } @@ -214,7 +214,7 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2) TNODE *bp = NULL; *dpp++ = dstp; for (tp = srcp->back; tp; tp = tp->back) - if ((bp = find_child(tp, dstp->sym))) break; + if ((bp = find_child(tp, dstp->sym)) && bp->child) break; if (!bp) bp = troot; @@ -228,45 +228,11 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2) } } -static void -prune_backlinks(TNODE *tp) -{ - if (tp->x.nrefs || !tp->child) - return; - - TNODE *bp; - // (bp != bp->back IFF bp != troot) - while ((bp = tp->back) && !bp->match && bp != bp->back) { - HIT("backlinks"); - TNODE *cp = tp->child, *pp = bp->child; - - // Search for a child of bp that's not a child of tp - for (; cp && pp && pp->sym >= cp->sym; cp = cp->next) { - if (pp->sym == cp->sym) { - if (pp->match && cp->is_suffix) break; - pp = pp->next; - } - } - - if (pp) break; - - // So, target of back link is not a suffix match - // of this node, and its children are a subset - // of this node's children: prune it. - HIT("pruned"); - if ((tp->back = bp->back)) { - tp->back->x.nrefs++; - if (!--bp->x.nrefs) - prune_backlinks(bp); - } - } -} - static int interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2) { unsigned usev_size = nnodes + nsyms; - char *usev = calloc(usev_size, sizeof*usev); + char *usev = g_new0(char, usev_size); STATE last_trans = 0, startv[nsyms][2]; TNODE *cp, **tmp; @@ -323,13 +289,13 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2) if (last_trans < last) { last_trans = last; if (last + nsyms >= usev_size) { - char *tmp = realloc(usev, usev_size << 1); + char *tmp = g_realloc(usev, usev_size << 1); if (tmp != NULL) { usev = tmp; memset(usev + usev_size, 0, usev_size); usev_size <<= 1; } else { - free(usev); + g_free(usev); /* And handle error */ } } @@ -339,14 +305,14 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2) *dstp = NULL; } - free(usev); + g_free(usev); return last_trans + 1; } static void fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes) { - STRASH *sv = malloc(psp->hash_mod * sizeof*sv), *sp = sv; + STRASH *sv = g_malloc(psp->hash_mod * sizeof*sv), *sp = sv; int i; // First pass: insert without resolving collisions. @@ -369,7 +335,7 @@ fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes) psp->hashv[i] = *sp; } - free(sv); + g_free(sv); } static void -- 2.39.5