From d0974f01f91da985d7646f6ef64fed1e053c64b2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 10 Sep 2019 12:34:52 +0100 Subject: [PATCH] [Fix] Fix trie code when there are regexps and Hyperscan is absent --- src/libutil/multipattern.c | 74 ++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c index b2cdc6645..0fc028969 100644 --- a/src/libutil/multipattern.c +++ b/src/libutil/multipattern.c @@ -25,6 +25,7 @@ #include "hs.h" #endif #include "acism.h" +#include "libutil/regexp.h" #include #define MAX_SCRATCH 4 @@ -51,6 +52,7 @@ struct RSPAMD_ALIGNED(64) rspamd_multipattern { #endif ac_trie_t *t; GArray *pats; + GArray *res; gboolean compiled; guint cnt; @@ -192,14 +194,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len, gsize *dst_len) { gchar *ret = NULL; -#ifdef WITH_HYPERSCAN - if (rspamd_hs_check ()) { - gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII; + gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII; - if (flags & RSPAMD_MULTIPATTERN_UTF8) { - gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF; - } + if (flags & RSPAMD_MULTIPATTERN_UTF8) { + gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF; + } +#ifdef WITH_HYPERSCAN + if (rspamd_hs_check ()) { if (flags & RSPAMD_MULTIPATTERN_TLD) { gchar *tmp; gsize tlen; @@ -228,6 +230,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len, if (flags & RSPAMD_MULTIPATTERN_TLD) { ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len); } + else if (flags & RSPAMD_MULTIPATTERN_RE) { + ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags | + RSPAMD_REGEXP_ESCAPE_RE); + } + else if (flags & RSPAMD_MULTIPATTERN_GLOB) { + ret = rspamd_str_regexp_escape (pattern, len, dst_len, + gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB); + } else { ret = malloc (len + 1); *dst_len = rspamd_strlcpy (ret, pattern, len + 1); @@ -496,7 +506,30 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err) #endif if (mp->cnt > 0) { - mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt); + + if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) { + /* Fallback to pcre... */ + rspamd_regexp_t *re; + mp->res = g_array_sized_new (FALSE, TRUE, + sizeof (rspamd_regexp_t *), mp->cnt); + + for (guint i = 0; i < mp->cnt; i ++) { + const ac_trie_pat_t *pat; + + pat = &g_array_index (mp->pats, ac_trie_pat_t, i); + + re = rspamd_regexp_new (pat->ptr, NULL, err); + + if (re == NULL) { + return FALSE; + } + + g_array_append_val (mp->res, re); + } + } + else { + mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt); + } } mp->compiled = TRUE; @@ -617,11 +650,30 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp, gint state = 0; - ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd, - &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE); + if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) { + /* Terribly inefficient, but who cares - just use hyperscan */ + for (guint i = 0; i < mp->cnt; i ++) { + rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i); + const gchar *start = NULL, *end = NULL; + + while (rspamd_regexp_search (re, + in, + len, + &start, + &end, + TRUE, + NULL)) { + ret = rspamd_multipattern_acism_cb (i, end - in, &cbd); + } + } + } + else { + ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd, + &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE); - if (pnfound) { - *pnfound = cbd.nfound; + if (pnfound) { + *pnfound = cbd.nfound; + } } return ret; -- 2.39.5