]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Fix trie code when there are regexps and Hyperscan is absent
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 10 Sep 2019 11:34:52 +0000 (12:34 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 10 Sep 2019 11:34:52 +0000 (12:34 +0100)
src/libutil/multipattern.c

index b2cdc66454a172f1ddabfd7731249cdf9095af09..0fc0289699a25fc905055f14eb44acbf2a242707 100644 (file)
@@ -25,6 +25,7 @@
 #include "hs.h"
 #endif
 #include "acism.h"
+#include "libutil/regexp.h"
 #include <stdalign.h>
 
 #define MAX_SCRATCH 4
@@ -51,6 +52,7 @@ struct RSPAMD_ALIGNED(64) rspamd_multipattern {
 #endif
        ac_trie_t *t;
        GArray *pats;
+       GArray *res;
 
        gboolean compiled;
        guint cnt;
@@ -192,14 +194,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
                gsize *dst_len)
 {
        gchar *ret = NULL;
-#ifdef WITH_HYPERSCAN
-       if (rspamd_hs_check ()) {
-               gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
+       gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
 
-               if (flags & RSPAMD_MULTIPATTERN_UTF8) {
-                       gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
-               }
+       if (flags & RSPAMD_MULTIPATTERN_UTF8) {
+               gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
+       }
 
+#ifdef WITH_HYPERSCAN
+       if (rspamd_hs_check ()) {
                if (flags & RSPAMD_MULTIPATTERN_TLD) {
                        gchar *tmp;
                        gsize tlen;
@@ -228,6 +230,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
        if (flags & RSPAMD_MULTIPATTERN_TLD) {
                ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
        }
+       else if (flags & RSPAMD_MULTIPATTERN_RE) {
+               ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
+                                                                                                                          RSPAMD_REGEXP_ESCAPE_RE);
+       }
+       else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+               ret = rspamd_str_regexp_escape (pattern, len, dst_len,
+                               gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+       }
        else {
                ret = malloc (len + 1);
                *dst_len = rspamd_strlcpy (ret, pattern, len + 1);
@@ -496,7 +506,30 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
 #endif
 
        if (mp->cnt > 0) {
-               mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt);
+
+               if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
+                       /* Fallback to pcre... */
+                       rspamd_regexp_t *re;
+                       mp->res = g_array_sized_new (FALSE, TRUE,
+                                       sizeof (rspamd_regexp_t *), mp->cnt);
+
+                       for (guint i = 0; i < mp->cnt; i ++) {
+                               const ac_trie_pat_t *pat;
+
+                               pat = &g_array_index (mp->pats, ac_trie_pat_t, i);
+
+                               re = rspamd_regexp_new (pat->ptr, NULL, err);
+
+                               if (re == NULL) {
+                                       return FALSE;
+                               }
+
+                               g_array_append_val (mp->res, re);
+                       }
+               }
+               else {
+                       mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
+               }
        }
 
        mp->compiled = TRUE;
@@ -617,11 +650,30 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
 
        gint state = 0;
 
-       ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
-                       &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
+       if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
+               /* Terribly inefficient, but who cares - just use hyperscan */
+               for (guint i = 0; i < mp->cnt; i ++) {
+                       rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i);
+                       const gchar *start = NULL, *end = NULL;
+
+                       while (rspamd_regexp_search (re,
+                                       in,
+                                       len,
+                                       &start,
+                                       &end,
+                                       TRUE,
+                                       NULL)) {
+                               ret = rspamd_multipattern_acism_cb (i, end - in, &cbd);
+                       }
+               }
+       }
+       else {
+               ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
+                               &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
 
-       if (pnfound) {
-               *pnfound = cbd.nfound;
+               if (pnfound) {
+                       *pnfound = cbd.nfound;
+               }
        }
 
        return ret;