]> source.dussan.org Git - rspamd.git/commitdiff
[Project] Add initial support for selectors in regexps
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 15 Sep 2018 11:38:45 +0000 (12:38 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 15 Sep 2018 11:38:45 +0000 (12:38 +0100)
src/libmime/mime_expressions.c
src/libserver/re_cache.c
src/libserver/re_cache.h

index 518b9a390943666fbd50d97a4787d57e5935cdd3..bd1c8cb0014b5086c884644dec9fa547deb9287d 100644 (file)
@@ -94,7 +94,10 @@ struct rspamd_regexp_atom {
        enum rspamd_re_type type;                       /**< regexp type                                                                                */
        gchar *regexp_text;                             /**< regexp text representation                                                 */
        rspamd_regexp_t *regexp;                        /**< regexp structure                                                                   */
-       gchar *header;                                  /**< header name for header regexps                                             */
+       union {
+               const gchar *header;                        /**< header name for header regexps                                         */
+               const gchar *selector;                      /**< selector name for lua selector regexp                          */
+       } extra;
        gboolean is_test;                               /**< true if this expression must be tested                             */
        gboolean is_strong;                             /**< true if headers search must be case sensitive              */
        gboolean is_multiple;                           /**< true if we need to match all inclusions of atom    */
@@ -236,6 +239,10 @@ rspamd_parse_long_option (const gchar *start, gsize len,
                ret = TRUE;
                a->type = RSPAMD_RE_SARAWBODY;
        }
+       else if (TYPE_CHECK (start, "selector", len)) {
+               ret = TRUE;
+               a->type = RSPAMD_RE_SELECTOR;
+       }
 
        return ret;
 }
@@ -248,7 +255,7 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
                struct rspamd_config *cfg)
 {
        const gchar *begin, *end, *p, *src, *start, *brace;
-       gchar *dbegin, *dend;
+       gchar *dbegin, *dend, *extra = NULL;
        struct rspamd_regexp_atom *result;
        GError *err = NULL;
        GString *re_flags;
@@ -268,6 +275,9 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
                msg_warn_pool ("got empty regexp");
                return NULL;
        }
+
+       result->type = RSPAMD_RE_MAX;
+
        start = line;
        /* First try to find header name */
        begin = strchr (line, '/');
@@ -281,15 +291,15 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
                        }
                        p--;
                }
+
                if (end) {
-                       result->header = rspamd_mempool_alloc (pool, end - line + 1);
-                       rspamd_strlcpy (result->header, line, end - line + 1);
-                       result->type = RSPAMD_RE_HEADER;
+                       extra = rspamd_mempool_alloc (pool, end - line + 1);
+                       rspamd_strlcpy (extra, line, end - line + 1);
                        line = end;
                }
        }
        else {
-               result->header = rspamd_mempool_strdup (pool, line);
+               extra = rspamd_mempool_strdup (pool, line);
                result->type = RSPAMD_RE_MAX;
                line = start;
        }
@@ -300,9 +310,9 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
        if (*line != '\0') {
                begin = line + 1;
        }
-       else if (result->header == NULL) {
+       else if (extra == NULL) {
                /* Assume that line without // is just a header name */
-               result->header = rspamd_mempool_strdup (pool, line);
+               extra = rspamd_mempool_strdup (pool, line);
                result->type = RSPAMD_RE_HEADER;
                return result;
        }
@@ -382,6 +392,10 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
                        result->type = RSPAMD_RE_RAWHEADER;
                        p++;
                        break;
+               case '$':
+                       result->type = RSPAMD_RE_SELECTOR;
+                       p++;
+                       break;
                case '{':
                        /* Long definition */
                        if ((brace = strchr (p + 1, '}')) != NULL) {
@@ -425,10 +439,24 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
 
        if ((result->type == RSPAMD_RE_HEADER ||
                        result->type == RSPAMD_RE_RAWHEADER ||
-                       result->type == RSPAMD_RE_MIMEHEADER) &&
-                       result->header == NULL) {
-               msg_err_pool ("header regexp: '%s' has no header part", src);
-               return NULL;
+                       result->type == RSPAMD_RE_MIMEHEADER)) {
+               if (extra == NULL) {
+                       msg_err_pool ("header regexp: '%s' has no header part", src);
+                       return NULL;
+               }
+               else {
+                       result->extra.header = extra;
+               }
+       }
+
+       if (result->type == RSPAMD_RE_SELECTOR) {
+               if (extra == NULL) {
+                       msg_err_pool ("selector regexp: '%s' has no selector part", src);
+                       return NULL;
+               }
+               else {
+                       result->extra.selector = extra;
+               }
        }
 
 
@@ -734,13 +762,35 @@ set:
                                        mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
                                        mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
 
-                               if (mime_atom->d.re->header != NULL) {
+                               if (mime_atom->d.re->extra.header != NULL) {
+                                       own_re = mime_atom->d.re->regexp;
+                                       mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
+                                                       mime_atom->d.re->regexp,
+                                                       mime_atom->d.re->type,
+                                                       mime_atom->d.re->extra.header,
+                                                       strlen (mime_atom->d.re->extra.header) + 1);
+                                       /* Pass ownership to the cache */
+                                       rspamd_regexp_unref (own_re);
+                               }
+                               else {
+                                       /* We have header regexp, but no header name is detected */
+                                       g_set_error (err,
+                                                       rspamd_mime_expr_quark (),
+                                                       200,
+                                                       "no header name in header regexp: '%s'",
+                                                       mime_atom->str);
+                                       goto err;
+                               }
+
+                       }
+                       else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
+                               if (mime_atom->d.re->extra.selector != NULL) {
                                        own_re = mime_atom->d.re->regexp;
                                        mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
                                                        mime_atom->d.re->regexp,
                                                        mime_atom->d.re->type,
-                                                       mime_atom->d.re->header,
-                                                       strlen (mime_atom->d.re->header) + 1);
+                                                       mime_atom->d.re->extra.selector,
+                                                       strlen (mime_atom->d.re->extra.selector) + 1);
                                        /* Pass ownership to the cache */
                                        rspamd_regexp_unref (own_re);
                                }
@@ -749,7 +799,7 @@ set:
                                        g_set_error (err,
                                                        rspamd_mime_expr_quark (),
                                                        200,
-                                                       "no header name in /H regexp: '%s'",
+                                                       "no selector name in selector regexp: '%s'",
                                                        mime_atom->str);
                                        goto err;
                                }
@@ -817,8 +867,16 @@ rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
                ret = rspamd_re_cache_process (task,
                                re->regexp,
                                re->type,
-                               re->header,
-                               strlen (re->header),
+                               re->extra.header,
+                               strlen (re->extra.header),
+                               re->is_strong);
+       }
+       else if (re->type == RSPAMD_RE_SELECTOR) {
+               ret = rspamd_re_cache_process (task,
+                               re->regexp,
+                               re->type,
+                               re->extra.selector,
+                               strlen (re->extra.selector),
                                re->is_strong);
        }
        else {
index 8c4cceaff86c47977bead1afcaaa62f1ac850cd3..d8f7f3d0dd80ce1c68db7c20d07989df1953ea16 100644 (file)
@@ -128,7 +128,7 @@ rspamd_re_cache_quark (void)
 
 static guint64
 rspamd_re_cache_class_id (enum rspamd_re_type type,
-               gpointer type_data,
+               gconstpointer type_data,
                gsize datalen)
 {
        rspamd_cryptobox_fast_hash_state_t st;
@@ -221,7 +221,7 @@ rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache)
 
 rspamd_regexp_t *
 rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
-               enum rspamd_re_type type, gpointer type_data, gsize datalen)
+               enum rspamd_re_type type, gconstpointer type_data, gsize datalen)
 {
        guint64 class_id;
        struct rspamd_re_class *re_class;
@@ -1078,7 +1078,7 @@ gint
 rspamd_re_cache_process (struct rspamd_task *task,
                rspamd_regexp_t *re,
                enum rspamd_re_type type,
-               gpointer type_data,
+               gconstpointer type_data,
                gsize datalen,
                gboolean is_strong)
 {
index a138bcb4bd67a4faef148c12d96df505c2e6ce9d..90acd150146cca8ebfd480731bce06ac139336e7 100644 (file)
@@ -35,6 +35,7 @@ enum rspamd_re_type {
        RSPAMD_RE_BODY, /* full in SA */
        RSPAMD_RE_SABODY, /* body in SA */
        RSPAMD_RE_SARAWBODY, /* rawbody in SA */
+       RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */
        RSPAMD_RE_MAX
 };
 
@@ -61,8 +62,9 @@ struct rspamd_re_cache *rspamd_re_cache_new (void);
  * @param datalen associated data length
  */
 rspamd_regexp_t *
-               rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
-               enum rspamd_re_type type, gpointer type_data, gsize datalen);
+rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
+                                        enum rspamd_re_type type,
+                                        gconstpointer type_data, gsize datalen);
 
 /**
  * Replace regexp in the cache with another regexp
@@ -111,7 +113,7 @@ const struct rspamd_re_cache_stat *
 gint rspamd_re_cache_process (struct rspamd_task *task,
                rspamd_regexp_t *re,
                enum rspamd_re_type type,
-               gpointer type_data,
+               gconstpointer type_data,
                gsize datalen,
                gboolean is_strong);