aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/aho-corasick
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-13 11:37:14 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-03-13 11:37:14 +0000
commit8164a21476e2f343a761c30f05c065199ea092ba (patch)
treec7dec671548f4578043909744ce06286e7896c92 /contrib/aho-corasick
parentdfc2fe8153d1dc6a4271373059f215029ca4a2e1 (diff)
downloadrspamd-8164a21476e2f343a761c30f05c065199ea092ba.tar.gz
rspamd-8164a21476e2f343a761c30f05c065199ea092ba.zip
[Fix] Update issues in ac-trie
Diffstat (limited to 'contrib/aho-corasick')
-rw-r--r--contrib/aho-corasick/acism.c42
-rw-r--r--contrib/aho-corasick/acism_create.c70
2 files changed, 39 insertions, 73 deletions
diff --git a/contrib/aho-corasick/acism.c b/contrib/aho-corasick/acism.c
index a0b478ad8..e2b48a590 100644
--- a/contrib/aho-corasick/acism.c
+++ b/contrib/aho-corasick/acism.c
@@ -24,20 +24,20 @@
#define BACK ((SYMBOL)0)
#define ROOT ((STATE) 0)
+extern const guchar lc_map[256];
int
acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
ACISM_ACTION *cb, void *context, int *statep, bool caseless)
{
- ac_trie_t const ps = *psp;
char const *cp = text, *endp = cp + len;
uint8_t s;
STATE state = *statep;
int ret = 0;
while (cp < endp) {
- s = caseless ? g_ascii_tolower (*cp++) : *cp++;
- _SYMBOL sym = ps.symv[s];
+ s = caseless ? lc_map[(guint8)*cp++] : *cp++;
+ _SYMBOL sym = psp->symv[s];
if (!sym) {
// Input byte is not in any pattern string.
state = ROOT;
@@ -48,17 +48,17 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
// following the backref chain.
TRAN next;
- while (!t_valid(&ps, next = p_tran(&ps, state, sym)) && state != ROOT) {
- TRAN back = p_tran(&ps, state, BACK);
- state = t_valid(&ps, back) ? t_next(&ps, back) : ROOT;
+ while (!t_valid(psp, next = p_tran(psp, state, sym)) && state != ROOT) {
+ TRAN back = p_tran(psp, state, BACK);
+ state = t_valid(psp, back) ? t_next(psp, back) : ROOT;
}
- if (!t_valid(&ps, next))
+ if (!t_valid(psp, next))
continue;
if (!(next & (IS_MATCH | IS_SUFFIX))) {
// No complete match yet; keep going.
- state = t_next(&ps, next);
+ state = t_next(psp, next);
continue;
}
@@ -73,27 +73,27 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
// Initially state is ROOT. The chain search saves the
// first state from which the next char has a transition.
- state = t_isleaf(&ps, next) ? 0 : t_next(&ps, next);
+ state = t_isleaf(psp, next) ? 0 : t_next(psp, next);
while (1) {
- if (t_valid(&ps, next)) {
+ if (t_valid(psp, next)) {
if (next & IS_MATCH) {
unsigned strno, ss = s + sym, i;
- if (t_isleaf(&ps, ps.tranv[ss])) {
- strno = t_strno(&ps, ps.tranv[ss]);
+ if (t_isleaf(psp, psp->tranv[ss])) {
+ strno = t_strno(psp, psp->tranv[ss]);
} else {
- for (i = p_hash(&ps, ss); ps.hashv[i].state != ss; ++i);
- strno = ps.hashv[i].strno;
+ for (i = p_hash(psp, ss); psp->hashv[i].state != ss; ++i);
+ strno = psp->hashv[i].strno;
}
if ((ret = cb(strno, cp - text, context)))
goto EXIT;
}
- if (!state && !t_isleaf(&ps, next))
- state = t_next(&ps, next);
+ if (!state && !t_isleaf(psp, next))
+ state = t_next(psp, next);
if ( state && !(next & IS_SUFFIX))
break;
}
@@ -101,9 +101,9 @@ acism_lookup(ac_trie_t const *psp, const char *text, size_t len,
if (s == ROOT)
break;
- TRAN b = p_tran(&ps, s, BACK);
- s = t_valid(&ps, b) ? t_next(&ps, b) : ROOT;
- next = p_tran(&ps, s, sym);
+ TRAN b = p_tran(psp, s, BACK);
+ s = t_valid(psp, b) ? t_next(psp, b) : ROOT;
+ next = p_tran(psp, s, sym);
}
}
EXIT:
@@ -118,7 +118,7 @@ acism_destroy(ac_trie_t *psp)
if (psp->flags & IS_MMAP)
munmap((char*)psp->tranv - sizeof(ac_trie_t),
sizeof(ac_trie_t) + p_size(psp));
- else free(psp->tranv);
- free(psp);
+ else g_free(psp->tranv);
+ g_free(psp);
}
//EOF
diff --git a/contrib/aho-corasick/acism_create.c b/contrib/aho-corasick/acism_create.c
index a6d007fd7..b12589ae6 100644
--- a/contrib/aho-corasick/acism_create.c
+++ b/contrib/aho-corasick/acism_create.c
@@ -40,7 +40,6 @@ static inline int bitwid(unsigned u)
static void fill_symv(ac_trie_t*, ac_trie_pat_t const*, int ns);
static int create_tree(TNODE*, SYMBOL const*symv, ac_trie_pat_t const*strv, int nstrs);
static void add_backlinks(TNODE*, TNODE**, TNODE**);
-static void prune_backlinks(TNODE*);
static int interleave(TNODE*, int nnodes, int nsyms, TNODE**, TNODE**);
static void fill_tranv(ac_trie_t*, TNODE const*);
static void fill_hashv(ac_trie_t*, TNODE const*, int nn);
@@ -72,22 +71,23 @@ ac_trie_t*
acism_create(ac_trie_pat_t const* strv, int nstrs)
{
TNODE *tp, **v1 = NULL, **v2 = NULL;
- ac_trie_t *psp = calloc(1, sizeof*psp);
+ ac_trie_t *psp = g_malloc0(sizeof(*psp));
fill_symv(psp, strv, nstrs);
- TNODE *troot = calloc(psp->nchars + 1, sizeof*troot);
+ TNODE *troot = g_new0(TNODE, psp->nchars + 1);
int nnodes = create_tree(troot, psp->symv, strv, nstrs);
NOTE(nnodes);
// v1, v2: breadth-first work vectors for add_backlink and interleave.
- int nhash, i = (nstrs + 1) * sizeof*tp;
- add_backlinks(troot, v1 = malloc(i), v2 = malloc(i));
+ int nhash = 0, i = (nstrs + 1) * sizeof*tp;
- for (tp = troot + nnodes, nhash = 0; --tp > troot;) {
- prune_backlinks(tp);
+ for (tp = troot + nnodes; --tp > troot;) {
nhash += tp->match && tp->child;
}
+ v1 = g_malloc(i);
+ v2 = g_malloc(i);
+ add_backlinks(troot, v1, v2);
// Calculate each node's offset in tranv[]:
psp->tran_size = interleave(troot, nnodes, psp->nsyms, v1, v2);
@@ -102,7 +102,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
psp->hash_size = psp->hash_mod + nhash;
}
- set_tranv(psp, calloc(p_size(psp), 1));
+ set_tranv(psp, g_malloc0(p_size(psp)));
if (!psp->tranv) goto FAIL;
fill_tranv(psp, troot);
// The root state (0) must not look like a valid backref.
@@ -116,7 +116,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
psp->hash_size = psp->hash_mod;
while ( psp->hashv[psp->hash_size].state) ++psp->hash_size;
while (!psp->hashv[psp->hash_size - 1].state) --psp->hash_size;
- set_tranv(psp, realloc(psp->tranv, p_size(psp)));
+ set_tranv(psp, g_realloc(psp->tranv, p_size(psp)));
}
// Diagnostics/statistics only:
@@ -126,7 +126,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
goto DONE;
FAIL: acism_destroy(psp), psp = NULL;
-DONE: free(troot), free(v1), free(v2);
+DONE: g_free(troot), g_free(v1), g_free(v2);
return psp;
}
@@ -151,7 +151,7 @@ fill_symv(ac_trie_t *psp, ac_trie_pat_t const *strv, int nstrs)
++psp->nsyms;
#if ACISM_SIZE < 8
psp->sym_bits = bitwid(psp->nsyms);
- psp->sym_mask = ~(-1 << psp->sym_bits);
+ psp->sym_mask = ~(((TRAN)-1) << psp->sym_bits);
#endif
}
@@ -214,7 +214,7 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2)
TNODE *bp = NULL;
*dpp++ = dstp;
for (tp = srcp->back; tp; tp = tp->back)
- if ((bp = find_child(tp, dstp->sym))) break;
+ if ((bp = find_child(tp, dstp->sym)) && bp->child) break;
if (!bp)
bp = troot;
@@ -228,45 +228,11 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2)
}
}
-static void
-prune_backlinks(TNODE *tp)
-{
- if (tp->x.nrefs || !tp->child)
- return;
-
- TNODE *bp;
- // (bp != bp->back IFF bp != troot)
- while ((bp = tp->back) && !bp->match && bp != bp->back) {
- HIT("backlinks");
- TNODE *cp = tp->child, *pp = bp->child;
-
- // Search for a child of bp that's not a child of tp
- for (; cp && pp && pp->sym >= cp->sym; cp = cp->next) {
- if (pp->sym == cp->sym) {
- if (pp->match && cp->is_suffix) break;
- pp = pp->next;
- }
- }
-
- if (pp) break;
-
- // So, target of back link is not a suffix match
- // of this node, and its children are a subset
- // of this node's children: prune it.
- HIT("pruned");
- if ((tp->back = bp->back)) {
- tp->back->x.nrefs++;
- if (!--bp->x.nrefs)
- prune_backlinks(bp);
- }
- }
-}
-
static int
interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
{
unsigned usev_size = nnodes + nsyms;
- char *usev = calloc(usev_size, sizeof*usev);
+ char *usev = g_new0(char, usev_size);
STATE last_trans = 0, startv[nsyms][2];
TNODE *cp, **tmp;
@@ -323,13 +289,13 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
if (last_trans < last) {
last_trans = last;
if (last + nsyms >= usev_size) {
- char *tmp = realloc(usev, usev_size << 1);
+ char *tmp = g_realloc(usev, usev_size << 1);
if (tmp != NULL) {
usev = tmp;
memset(usev + usev_size, 0, usev_size);
usev_size <<= 1;
} else {
- free(usev);
+ g_free(usev);
/* And handle error */
}
}
@@ -339,14 +305,14 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
*dstp = NULL;
}
- free(usev);
+ g_free(usev);
return last_trans + 1;
}
static void
fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes)
{
- STRASH *sv = malloc(psp->hash_mod * sizeof*sv), *sp = sv;
+ STRASH *sv = g_malloc(psp->hash_mod * sizeof*sv), *sp = sv;
int i;
// First pass: insert without resolving collisions.
@@ -369,7 +335,7 @@ fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes)
psp->hashv[i] = *sp;
}
- free(sv);
+ g_free(sv);
}
static void