aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/aho-corasick
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-12 12:57:24 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-11-12 12:57:24 +0000
commit093dd1190db7062e5c7f53c8bfb2af5a8e9faa5c (patch)
tree8a26ac62aea8b9636722485c51d05924f3f9a75e /contrib/aho-corasick
parent29e1d556d9f0f7061aabfe93122cafb5450c75c3 (diff)
downloadrspamd-093dd1190db7062e5c7f53c8bfb2af5a8e9faa5c.tar.gz
rspamd-093dd1190db7062e5c7f53c8bfb2af5a8e9faa5c.zip
[Fix] Fix actrie implementation (sync from upstream), fixed OOB read
Diffstat (limited to 'contrib/aho-corasick')
-rw-r--r--contrib/aho-corasick/_acism.h73
-rw-r--r--contrib/aho-corasick/acism.h4
-rw-r--r--contrib/aho-corasick/acism_create.c113
3 files changed, 102 insertions, 88 deletions
diff --git a/contrib/aho-corasick/_acism.h b/contrib/aho-corasick/_acism.h
index 057bc31c3..3993594a0 100644
--- a/contrib/aho-corasick/_acism.h
+++ b/contrib/aho-corasick/_acism.h
@@ -2,7 +2,7 @@
** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
**
** This program is free software; you can redistribute it and/or modify
-** it under the terms of the GNU Lesser General Public License Version as
+** it under the terms of the GNU Lesser General Public License Version 3 as
** published by the Free Software Foundation. You may not use, modify or
** distribute this program under any other version of the GNU Lesser General
** Public License.
@@ -25,30 +25,50 @@
typedef int (*qsort_cmp)(const void *, const void *);
+// "Width" specifier for different plats
+#if __LONG_MAX__ == 9223372036854775807LL
+# ifdef __APPLE__
+# define F64 "ll"
+# else
+# define F64 "l"
+# endif
+#elif __LONG_MAX__ == 2147483647L || defined(_LONG_LONG) || defined(__sun) // AIX 6.1 ...
+# define F64 "ll"
+#else
+//XXX Assuming F64 is "ll" for VS
+# define F64 "ll"
+#endif
+
#ifndef ACISM_SIZE
# define ACISM_SIZE 4
#endif
#if ACISM_SIZE == 8
- typedef uint64_t TRAN, STATE, STRNO;
+typedef uint64_t TRAN, STATE, STRNO;
# define SYM_BITS 9U
# define SYM_MASK 511
+# define FNO F64
#else
- typedef uint32_t TRAN, STATE, STRNO;
+typedef uint32_t TRAN, STATE, STRNO;
# define SYM_BITS psp->sym_bits
# define SYM_MASK psp->sym_mask
+# define FNO
#endif
typedef uint16_t SYMBOL;
typedef unsigned _SYMBOL; // An efficient stacklocal SYMBOL
-#define IS_MATCH ((TRAN)1 << (8*sizeof(TRAN) - 1))
-#define IS_SUFFIX ((TRAN)1 << (8*sizeof(TRAN) - 2))
-#define T_FLAGS (IS_MATCH | IS_SUFFIX)
+#define BACK ((SYMBOL)0)
+#define ROOT ((STATE) 0)
-typedef struct { STATE state; STRNO strno; } STRASH;
+// MATCH and SUFFIX are the top 2 bits of a TRAN:
+enum {
+ IS_MATCH = (TRAN)1 << (8*sizeof(TRAN) - 1),
+ IS_SUFFIX = (TRAN)1 << (8*sizeof(TRAN) - 2),
+ T_FLAGS = IS_MATCH | IS_SUFFIX
+};
-typedef enum { BASE=2, USED=1 } USES;
+typedef struct { STATE state; STRNO strno; } STRASH;
struct acism {
TRAN* tranv;
@@ -70,34 +90,25 @@ struct acism {
#include "acism.h"
// p_size: size of tranv + hashv
-static inline unsigned p_size(ac_trie_t const *psp)
+static inline size_t p_size(ACISM const *psp)
{ return psp->hash_size * sizeof*psp->hashv
- + psp->tran_size * sizeof*psp->tranv; }
+ + psp->tran_size * sizeof*psp->tranv; }
-static inline unsigned p_hash(ac_trie_t const *psp, STATE s)
- { return s * 107 % psp->hash_mod; }
+static inline unsigned p_hash(ACISM const *psp, STATE s)
+{ return s * 107 % psp->hash_mod; }
-static inline void set_tranv(ac_trie_t *psp, void *mem)
- { psp->hashv = (STRASH*)&(psp->tranv = mem)[psp->tran_size]; }
+static inline void set_tranv(ACISM *psp, void *mem)
+{ psp->hashv = (STRASH*)&(psp->tranv = (TRAN*)mem)[psp->tran_size]; }
// TRAN accessors. For ACISM_SIZE=8, SYM_{BITS,MASK} do not use psp.
-static inline TRAN p_tran(ac_trie_t const *psp, STATE s, _SYMBOL sym)
- { return psp->tranv[s + sym] ^ sym; }
-
-static inline _SYMBOL t_sym(ac_trie_t const *psp, TRAN t)
- { (void)psp; return t & SYM_MASK; }
-
-static inline STATE t_next(ac_trie_t const *psp, TRAN t)
- { (void)psp; return (t & ~T_FLAGS) >> SYM_BITS; }
-
-static inline int t_isleaf(ac_trie_t const *psp, TRAN t)
- { return t_next(psp, t) >= psp->tran_size; }
-
-static inline int t_strno(ac_trie_t const *psp, TRAN t)
- { return t_next(psp, t) - psp->tran_size; }
+static inline TRAN p_tran(ACISM const *psp, STATE s, _SYMBOL sym)
+{ return psp->tranv[s + sym] ^ sym; }
-static inline _SYMBOL t_valid(ac_trie_t const *psp, TRAN t)
- { return !t_sym(psp, t); }
+static inline _SYMBOL t_sym(ACISM const *psp, TRAN t) { (void)psp; return t & SYM_MASK; }
+static inline STATE t_next(ACISM const *psp, TRAN t) { (void)psp; return (t & ~T_FLAGS) >> SYM_BITS; }
+static inline int t_isleaf(ACISM const *psp, TRAN t) { return t_next(psp, t) >= psp->tran_size; }
+static inline int t_strno(ACISM const *psp, TRAN t) { return t_next(psp, t) - psp->tran_size; }
+static inline _SYMBOL t_valid(ACISM const *psp, TRAN t) { return !t_sym(psp, t); }
-#endif//_ACISM_H
+#endif//_ACISM_H \ No newline at end of file
diff --git a/contrib/aho-corasick/acism.h b/contrib/aho-corasick/acism.h
index d942fd413..b5a3b1a5c 100644
--- a/contrib/aho-corasick/acism.h
+++ b/contrib/aho-corasick/acism.h
@@ -25,9 +25,11 @@
// "acism" uses MEMREF {ptr,len} bytevec structs for "string" args,
// rather than NUL-terminated "C" strings.
-typedef struct { char const *ptr; size_t len; } ac_trie_pat_t;
+typedef struct ac_trie_pat_s { char const *ptr; size_t len; } ac_trie_pat_t;
typedef struct acism ac_trie_t;
+typedef struct acism ACISM;
+typedef struct ac_trie_pat_s MEMREF;
ac_trie_t* acism_create(ac_trie_pat_t const *strv, int nstrs);
void acism_destroy(ac_trie_t*);
diff --git a/contrib/aho-corasick/acism_create.c b/contrib/aho-corasick/acism_create.c
index 15f9e801e..6b842cf3b 100644
--- a/contrib/aho-corasick/acism_create.c
+++ b/contrib/aho-corasick/acism_create.c
@@ -2,7 +2,7 @@
** Copyright (C) 2009-2014 Mischa Sandberg <mischasan@gmail.com>
**
** This program is free software; you can redistribute it and/or modify
-** it under the terms of the GNU Lesser General Public License Version as
+** it under the terms of the GNU Lesser General Public License Version 3 as
** published by the Free Software Foundation. You may not use, modify or
** distribute this program under any other version of the GNU Lesser General
** Public License.
@@ -18,11 +18,17 @@
*/
#include "_acism.h"
+typedef enum { BASE=2, USED=1 } USES;
+
typedef struct tnode {
struct tnode *child, *next, *back;
- union { unsigned nrefs; STATE state; } x;
- STRNO match;
- SYMBOL sym, is_suffix;
+ // nrefs was used in "prune_backlinks".
+ // It will be used again in "curtail".
+ unsigned nrefs;
+ STATE state;
+ STRNO match;
+ SYMBOL sym;
+ char is_suffix; // "bool"
} TNODE;
//--------------|---------------------------------------------
@@ -37,22 +43,23 @@ static inline int bitwid(unsigned u)
if (u & 0x00000002) ret++;
return ret;
}
-static void fill_symv(ac_trie_t*, ac_trie_pat_t const*, int ns);
-static int create_tree(TNODE*, SYMBOL const*symv, ac_trie_pat_t const*strv, int nstrs);
+
+static void fill_symv(ACISM*, MEMREF const*, int ns);
+static int create_tree(TNODE*, SYMBOL const*symv, MEMREF const*strv, int nstrs);
static void add_backlinks(TNODE*, TNODE**, TNODE**);
static int interleave(TNODE*, int nnodes, int nsyms, TNODE**, TNODE**);
-static void fill_tranv(ac_trie_t*, TNODE const*);
-static void fill_hashv(ac_trie_t*, TNODE const*, int nn);
+static void fill_tranv(ACISM*, TNODE const*);
+static void fill_hashv(ACISM*, TNODE const*, int nn);
static TNODE* find_child(TNODE*, SYMBOL);
// (ns) is either a STATE, or a (STRNO + tran_size)
static inline void
-set_tran(ac_trie_t *psp, STATE s, SYMBOL sym, int match, int suffix, TRAN ns)
+set_tran(ACISM *psp, STATE s, SYMBOL sym, int match, int suffix, TRAN ns)
{
- psp->tranv[s + sym] = sym | (match ? IS_MATCH : 0)
- | (suffix ? IS_SUFFIX : 0)
- | (ns << SYM_BITS);
+ psp->tranv[s + sym] = sym | (match ? IS_MATCH : 0)
+ | (suffix ? IS_SUFFIX : 0)
+ | (ns << SYM_BITS);
}
// Track statistics for construction
@@ -67,27 +74,26 @@ extern PSSTAT psstat[];
#endif //ACISM_STATS
//--------------|---------------------------------------------
-ac_trie_t*
-acism_create(ac_trie_pat_t const* strv, int nstrs)
+ACISM*
+acism_create(MEMREF const* strv, int nstrs)
{
TNODE **v1 = NULL, **v2 = NULL;
- ac_trie_t *psp = g_malloc0(sizeof(*psp));
+ ACISM *psp = g_malloc0(sizeof*psp);
fill_symv(psp, strv, nstrs);
- TNODE *troot = g_new0(TNODE, psp->nchars + 1);
+ TNODE *troot = g_malloc0((psp->nchars + 1) * sizeof(*troot));
int nnodes = create_tree(troot, psp->symv, strv, nstrs);
NOTE(nnodes);
// v1, v2: breadth-first work vectors for add_backlink and interleave.
int i = (nstrs + 1) * sizeof(TNODE);
- add_backlinks(troot, v1 = g_malloc(i), v2 = g_malloc(i));
+ add_backlinks(troot, v1 = g_malloc0(i), v2 = g_malloc0(i));
+
int nhash = 0;
TNODE* tp = troot + nnodes;
-
- while (--tp > troot) {
+ while (--tp > troot)
nhash += tp->match && tp->child;
- }
// Calculate each node's offset in tranv[]:
psp->tran_size = interleave(troot, nnodes, psp->nsyms, v1, v2);
@@ -102,7 +108,7 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
psp->hash_size = psp->hash_mod + nhash;
}
- set_tranv(psp, g_malloc0(p_size(psp)));
+ set_tranv(psp, g_malloc0(p_size(psp) + sizeof(TRAN)));
if (!psp->tranv) goto FAIL;
fill_tranv(psp, troot);
// The root state (0) must not look like a valid backref.
@@ -119,14 +125,14 @@ acism_create(ac_trie_pat_t const* strv, int nstrs)
set_tranv(psp, g_realloc(psp->tranv, p_size(psp)));
}
- // Diagnostics/statistics only:
+ // Diagnostics/statistics only:
psp->nstrs = nstrs;
for (i = psp->maxlen = 0; i < nstrs; ++i)
if (psp->maxlen < strv[i].len) psp->maxlen = strv[i].len;
goto DONE;
-FAIL: acism_destroy(psp), psp = NULL;
-DONE: g_free(troot), g_free(v1), g_free(v2);
+ FAIL: acism_destroy(psp), psp = NULL;
+ DONE: free(troot), free(v1), free(v2);
return psp;
}
@@ -134,7 +140,7 @@ typedef struct { int freq, rank; } FRANK;
static int frcmp(FRANK*a, FRANK*b) { return a->freq - b->freq; }
static void
-fill_symv(ac_trie_t *psp, ac_trie_pat_t const *strv, int nstrs)
+fill_symv(ACISM *psp, MEMREF const *strv, int nstrs)
{
int i, j;
FRANK frv[256];
@@ -149,14 +155,15 @@ fill_symv(ac_trie_t *psp, ac_trie_pat_t const *strv, int nstrs)
for (i = 256; --i >= 0 && frv[i].freq;)
psp->symv[frv[i].rank] = ++psp->nsyms;
++psp->nsyms;
+
#if ACISM_SIZE < 8
psp->sym_bits = bitwid(psp->nsyms);
- psp->sym_mask = ~((~(uint32_t)0u) << psp->sym_bits);
+ psp->sym_mask = ~(-1 << psp->sym_bits);
#endif
}
static int
-create_tree(TNODE *Tree, SYMBOL const *symv, ac_trie_pat_t const *strv, int nstrs)
+create_tree(TNODE *Tree, SYMBOL const *symv, MEMREF const *strv, int nstrs)
{
int i, j;
TNODE *nextp = Tree + 1;
@@ -209,30 +216,30 @@ add_backlinks(TNODE *troot, TNODE **v1, TNODE **v2)
while (*v1) {
TNODE **spp = v1, **dpp = v2, *srcp, *dstp;
+
while ((srcp = *spp++)) {
for (dstp = srcp->child; dstp; dstp = dstp->next) {
TNODE *bp = NULL;
-
if (dstp->child)
*dpp++ = dstp;
// Go through the parent (srcp) node's backlink chain,
// looking for a useful backlink for the child (dstp).
- // If the parent (srcp) has a backlink to (tp), and (tp) has a child (with children)
- // matching the transition sym for (srcp -> dstp),
- // then it is a useful backlink for the child (dstp).
+ // If the parent (srcp) has a backlink to (tp),
+ // and (tp) has a child matching the transition sym
+ // for (srcp -> dstp), then it is a useful backlink
+ // for the child (dstp).
// Note that backlinks do not point at the suffix match;
// they point at the PARENT of that match.
for (tp = srcp->back; tp; tp = tp->back)
- if ((bp = find_child(tp, dstp->sym)) && bp->child)
+ if ((bp = find_child(tp, dstp->sym)))
break;
-
if (!bp)
bp = troot;
dstp->back = dstp->child ? bp : tp ? tp : troot;
- dstp->back->x.nrefs++;
+ dstp->back->nrefs++;
dstp->is_suffix = bp->match || bp->is_suffix;
}
}
@@ -245,8 +252,8 @@ static int
interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
{
unsigned usev_size = nnodes + nsyms;
- char *usev = g_new0(char, usev_size);
- STATE last_trans = 0, startv[nsyms][2];
+ char *usev = g_malloc0(usev_size * sizeof(*usev));
+ STATE last_trans = 0, startv[257][2];
TNODE *cp, **tmp;
memset(startv, 0, nsyms * sizeof*startv);
@@ -287,7 +294,7 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
// No child needs an in-use slot? We're done.
if (!cp) break;
}
- tp->x.state = pos;
+ tp->state = pos;
// Mark node's base and children as used:
usev[pos] |= need;
@@ -302,15 +309,9 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
if (last_trans < last) {
last_trans = last;
if (last + nsyms >= usev_size) {
- char *tmp = g_realloc(usev, usev_size << 1);
- if (tmp != NULL) {
- usev = tmp;
- memset(usev + usev_size, 0, usev_size);
- usev_size <<= 1;
- } else {
- g_free(usev);
- /* And handle error */
- }
+ usev = g_realloc(usev, usev_size << 1);
+ memset(usev + usev_size, 0, usev_size);
+ usev_size <<= 1;
}
}
}
@@ -318,19 +319,19 @@ interleave(TNODE *troot, int nnodes, int nsyms, TNODE **v1, TNODE **v2)
*dstp = NULL;
}
- g_free(usev);
+ free(usev);
return last_trans + 1;
}
static void
-fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes)
+fill_hashv(ACISM *psp, TNODE const treev[], int nnodes)
{
- STRASH *sv = g_malloc(psp->hash_mod * sizeof*sv), *sp = sv;
+ STRASH *sv = g_malloc0(psp->hash_mod * sizeof*sv), *sp = sv;
int i;
// First pass: insert without resolving collisions.
for (i = 0; i < nnodes; ++i) {
- STATE base = treev[i].x.state;
+ STATE base = treev[i].state;
TNODE const *tp;
for (tp = treev[i].child; tp; tp = tp->next) {
if (tp->match && tp->child) {
@@ -348,21 +349,21 @@ fill_hashv(ac_trie_t *psp, TNODE const treev[], int nnodes)
psp->hashv[i] = *sp;
}
- g_free(sv);
+ free(sv);
}
static void
-fill_tranv(ac_trie_t *psp, TNODE const*tp)
+fill_tranv(ACISM *psp, TNODE const*tp)
{
TNODE const *cp = tp->child;
if (cp && tp->back)
- set_tran(psp, tp->x.state, 0, 0, 0, tp->back->x.state);
+ set_tran(psp, tp->state, 0, 0, 0, tp->back->state);
for (; cp; cp = cp->next) {
//NOTE: cp->match is (strno+1) so that !cp->match means "no match".
- set_tran(psp, tp->x.state, cp->sym, cp->match, cp->is_suffix,
- cp->child ? cp->x.state : cp->match - 1 + psp->tran_size);
+ set_tran(psp, tp->state, cp->sym, cp->match, cp->is_suffix,
+ cp->child ? cp->state : cp->match - 1 + psp->tran_size);
if (cp->child)
fill_tranv(psp, cp);
}
@@ -378,4 +379,4 @@ find_child(TNODE *tp, SYMBOL sym)
#ifdef ACISM_STATS
PSSTAT psstat[__LINE__] = {{__LINE__,0}};
#endif//ACISM_STATS
-//EOF
+//EOF \ No newline at end of file