summaryrefslogtreecommitdiffstats
path: root/contrib/aho-corasick/acism_create.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-09-26 21:18:08 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-09-26 21:18:08 +0100
commit2d053b6fac9a7028181a31684cd7bada0016bf37 (patch)
tree84ce8cc4619c3c5eb0bff0c12b28365037b39b56 /contrib/aho-corasick/acism_create.c
parent7d9581a54ecfdb3abc28d853869d25ccb4fb8bf1 (diff)
downloadrspamd-2d053b6fac9a7028181a31684cd7bada0016bf37.tar.gz
rspamd-2d053b6fac9a7028181a31684cd7bada0016bf37.zip
[Minor] Update aho-corasic implementation
Diffstat (limited to 'contrib/aho-corasick/acism_create.c')
-rw-r--r--contrib/aho-corasick/acism_create.c31
1 files changed, 22 insertions, 9 deletions
diff --git a/contrib/aho-corasick/acism_create.c b/contrib/aho-corasick/acism_create.c
index b12589ae6..bd8586c82 100644
--- a/contrib/aho-corasick/acism_create.c
+++ b/contrib/aho-corasick/acism_create.c
@@ -70,7 +70,7 @@ extern PSSTAT psstat[];
ac_trie_t*
acism_create(ac_trie_pat_t const* strv, int nstrs)
{
- TNODE *tp, **v1 = NULL, **v2 = NULL;
+ TNODE **v1 = NULL, **v2 = NULL;
ac_trie_t *psp = g_malloc0(sizeof(*psp));
fill_symv(psp, strv, nstrs);
@@ -80,14 +80,14 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
NOTE(nnodes);
// v1, v2: breadth-first work vectors for add_backlink and interleave.
- int nhash = 0, i = (nstrs + 1) * sizeof*tp;
+ int i = (nstrs + 1) * sizeof(TNODE);
+ add_backlinks(troot, v1 = g_malloc(i), v2 = g_malloc(i));
+ int nhash = 0;
+ TNODE* tp = troot + nnodes;
- for (tp = troot + nnodes; --tp > troot;) {
+ while (--tp > troot) {
nhash += tp->match && tp->child;
}
- v1 = g_malloc(i);
- v2 = g_malloc(i);
- add_backlinks(troot, v1, v2);
// Calculate each node's offset in tranv[]:
psp->tran_size = interleave(troot, nnodes, psp->nsyms, v1, v2);
@@ -151,7 +151,7 @@ fill_symv(ac_trie_t *psp, ac_trie_pat_t const *strv, int nstrs)
++psp->nsyms;
#if ACISM_SIZE < 8
psp->sym_bits = bitwid(psp->nsyms);
- psp->sym_mask = ~(((TRAN)-1) << psp->sym_bits);
+ psp->sym_mask = ~(-1 << psp->sym_bits);
#endif
}
@@ -212,9 +212,22 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2)
while ((srcp = *spp++)) {
for (dstp = srcp->child; dstp; dstp = dstp->next) {
TNODE *bp = NULL;
- *dpp++ = dstp;
+
+ if (dstp->child)
+ *dpp++ = dstp;
+
+ // Go through the parent (srcp) node's backlink chain,
+ // looking for a useful backlink for the child (dstp).
+ // If the parent (srcp) has a backlink to (tp), and (tp) has a child (with children)
+ // matching the transition sym for (srcp -> dstp),
+ // then it is a useful backlink for the child (dstp).
+ // Note that backlinks do not point at the suffix match;
+ // they point at the PARENT of that match.
+
for (tp = srcp->back; tp; tp = tp->back)
- if ((bp = find_child(tp, dstp->sym)) && bp->child) break;
+ if ((bp = find_child(tp, dstp->sym)) && bp->child)
+ break;
+
if (!bp)
bp = troot;