]> source.dussan.org Git - rspamd.git/commitdiff
Start moving to the rspamd regexps.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
src/libmime/expressions.c
src/libserver/cfg_file.h
src/libutil/regexp.c
src/plugins/regexp.c

index 769b7dc14d38226bb2da7a25083516720b358a13..aab78df5c4f501c12cc0f9863b19384226b3701e 100644 (file)
@@ -652,9 +652,9 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
 {
        const gchar *begin, *end, *p, *src, *start;
        gchar *dbegin, *dend;
-       struct rspamd_regexp_element *result, *check;
-       gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
+       struct rspamd_regexp_element *result;
        GError *err = NULL;
+       GString *re_flags;
 
        if (line == NULL) {
                msg_err ("cannot parse NULL line");
@@ -727,35 +727,20 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
        }
        /* Parse flags */
        p = end + 1;
+       re_flags = g_string_sized_new (32);
        while (p != NULL) {
                switch (*p) {
                case 'i':
-                       regexp_flags |= G_REGEX_CASELESS;
-                       p++;
-                       break;
                case 'm':
-                       regexp_flags |= G_REGEX_MULTILINE;
-                       p++;
-                       break;
                case 's':
-                       regexp_flags |= G_REGEX_DOTALL;
-                       p++;
-                       break;
                case 'x':
-                       regexp_flags |= G_REGEX_EXTENDED;
-                       p++;
-                       break;
                case 'u':
-                       regexp_flags |= G_REGEX_UNGREEDY;
+               case 'O':
+               case 'r':
+                       g_string_append_c (re_flags, *p);
                        p++;
                        break;
                case 'o':
-                       regexp_flags |= G_REGEX_OPTIMIZE;
-                       p++;
-                       break;
-               case 'r':
-                       regexp_flags |= G_REGEX_RAW;
-                       result->is_raw = TRUE;
                        p++;
                        break;
                /* Type flags */
@@ -810,61 +795,27 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
        *dend = '\0';
 
        if (raw_mode) {
-               regexp_flags |= G_REGEX_RAW;
-       }
-
-       /* Avoid multiply regexp structures for similar regexps */
-       if ((check =
-               (struct rspamd_regexp_element *)re_cache_check (result->regexp_text,
-               pool)) != NULL) {
-               /* Additional check for headers */
-               if (result->type == REGEXP_HEADER || result->type ==
-                       REGEXP_RAW_HEADER) {
-                       if (result->header && check->header) {
-                               if (strcmp (result->header, check->header) == 0) {
-                                       return check;
-                               }
-                       }
-               }
-               else {
-                       return check;
-               }
-       }
-       result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err);
-       if ((regexp_flags & G_REGEX_RAW) != 0) {
-               result->raw_regexp = result->regexp;
-       }
-       else {
-               result->raw_regexp = g_regex_new (dbegin,
-                               regexp_flags | G_REGEX_RAW,
-                               0,
-                               &err);
-               rspamd_mempool_add_destructor (pool,
-                       (rspamd_mempool_destruct_t) g_regex_unref,
-                       (void *)result->raw_regexp);
+               g_string_append_c (re_flags, 'r');
        }
-       rspamd_mempool_add_destructor (pool,
-               (rspamd_mempool_destruct_t) g_regex_unref,
-               (void *)result->regexp);
 
-       *dend = '/';
+       result->regexp = rspamd_regexp_cache_create (NULL, dbegin, re_flags->str,
+                       &err);
+
+       g_string_free (re_flags, TRUE);
 
        if (result->regexp == NULL || err != NULL) {
                msg_warn ("could not read regexp: %s while reading regexp %s",
                                err ? err->message : "unknown error",
-                       src);
+                                               src);
                return NULL;
        }
 
-       if (result->raw_regexp == NULL || err != NULL) {
-               msg_warn ("could not read raw regexp: %s while reading regexp %s",
-                       err ? err->message : "unknown error",
-                       src);
-               return NULL;
-       }
+       rspamd_mempool_add_destructor (pool,
+               (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+               (void *)result->regexp);
+
+       *dend = '/';
 
-       /* Add to cache for further usage */
-       re_cache_add (result->regexp_text, result, pool);
        return result;
 }
 
index 8c58a4941fe07f7d279448e0cf0e5a25f9af8b04..44728afe846caff4be24c2192fe6a74445e1bf7f 100644 (file)
@@ -12,6 +12,7 @@
 #include "symbols_cache.h"
 #include "cfg_rcl.h"
 #include "ucl.h"
+#include "regexp.h"
 
 #define DEFAULT_BIND_PORT 11333
 #define DEFAULT_CONTROL_PORT 11334
@@ -68,11 +69,9 @@ enum rspamd_log_type {
 struct rspamd_regexp_element {
        enum rspamd_regexp_type type;                   /**< regexp type                                                                                */
        gchar *regexp_text;                             /**< regexp text representation                                                 */
-       GRegex *regexp;                                 /**< glib regexp structure                                                              */
-       GRegex *raw_regexp;                             /**< glib regexp structure for raw matching                             */
+       rspamd_regexp_t *regexp;                        /**< regexp structure                                                                   */
        gchar *header;                                  /**< header name for header regexps                                             */
        gboolean is_test;                               /**< true if this expression must be tested                             */
-       gboolean is_raw;                                /**< true if this regexp is done by raw matching                */
        gboolean is_strong;                             /**< true if headers search must be case sensitive              */
 };
 
index 6da0a663e14569ba00bf149026f7f46d96152b6c..3b76d15e0ba501cd37ddab9d01ebcc4c8a6298a4 100644 (file)
@@ -296,6 +296,10 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
        g_assert (re != NULL);
        g_assert (text != NULL);
 
+       if (len == 0) {
+               len = strlen (text);
+       }
+
        if (end != NULL && *end != NULL) {
                /* Incremental search */
                mt = (*end);
index 848fdfdb2eecdd452b5f50ebfd6b2899f4a65d5b..224d40b21aefbb679f6203adef26bf81c6af7b93 100644 (file)
@@ -369,7 +369,7 @@ regexp_module_reconfig (struct rspamd_config *cfg)
 
 struct url_regexp_param {
        struct rspamd_task *task;
-       GRegex *regexp;
+       rspamd_regexp_t *regexp;
        struct rspamd_regexp_element *re;
        gboolean found;
 };
@@ -379,10 +379,9 @@ tree_url_callback (gpointer key, gpointer value, void *data)
 {
        struct url_regexp_param *param = data;
        struct rspamd_url *url = value;
-       GError *err = NULL;
 
-       if (g_regex_match_full (param->regexp, struri (url), -1, 0, 0, NULL,
-               &err) == TRUE) {
+       if (rspamd_regexp_search (param->regexp, struri (url), 0, NULL, NULL, FALSE)
+                       == TRUE) {
                if (G_UNLIKELY (param->re->is_test)) {
                        msg_info ("process test regexp %s for url %s returned TRUE",
                                struri (url));
@@ -395,11 +394,6 @@ tree_url_callback (gpointer key, gpointer value, void *data)
                msg_info ("process test regexp %s for url %s returned FALSE",
                        struri (url));
        }
-       if (err != NULL) {
-               msg_info ("error occured while processing regexp \"%s\": %s",
-                       param->re->regexp_text,
-                       err->message);
-       }
 
        return FALSE;
 }
@@ -413,14 +407,12 @@ process_regexp (struct rspamd_regexp_element *re,
 {
        guint8 *ct;
        gsize clen;
-       gint r, passed = 0, start, end, old;
-       gboolean matched = FALSE;
-       const gchar *in;
+       gint r, passed = 0;
+       gboolean matched = FALSE, raw = FALSE;
+       const gchar *in, *start, *end;
 
        GList *cur, *headerlist;
-       GRegex *regexp;
-       GMatchInfo *info;
-       GError *err = NULL;
+       rspamd_regexp_t *regexp;
        struct url_regexp_param callback_param = {
                .task = task,
                .re = re,
@@ -449,8 +441,8 @@ process_regexp (struct rspamd_regexp_element *re,
                                re->regexp_text,
                                additional);
                }
-               if (g_regex_match_full (re->regexp, additional, strlen (additional), 0,
-                       0, NULL, NULL) == TRUE) {
+               if (rspamd_regexp_search (re->regexp, additional, 0, NULL, NULL,
+                       FALSE) == TRUE) {
                        if (G_UNLIKELY (re->is_test)) {
                                msg_info ("result of regexp %s is true", re->regexp_text);
                        }
@@ -513,7 +505,7 @@ process_regexp (struct rspamd_regexp_element *re,
                                        re->header, rh->decoded);
                                if (re->type == REGEXP_RAW_HEADER) {
                                        in = rh->value;
-                                       regexp = re->raw_regexp;
+                                       raw = TRUE;
                                }
                                else {
                                        in = rh->decoded;
@@ -527,8 +519,7 @@ process_regexp (struct rspamd_regexp_element *re,
 
                                /* Match re */
                                if (in &&
-                                       g_regex_match_full (regexp, in, -1, 0, 0, NULL,
-                                                       &err) == TRUE) {
+                                       rspamd_regexp_search (regexp, in, 0, NULL, NULL, raw)) {
                                        if (G_UNLIKELY (re->is_test)) {
                                                msg_info (
                                                        "process test regexp %s for header %s with value '%s' returned TRUE",
@@ -555,12 +546,6 @@ process_regexp (struct rspamd_regexp_element *re,
                                                re->header,
                                                in);
                                }
-                               if (err != NULL) {
-                                       msg_info (
-                                               "error occured while processing regexp \"%s\": %s",
-                                               re->regexp_text,
-                                               err->message);
-                               }
                                cur = g_list_next (cur);
                        }
                        task_cache_add (task, re, 0);
@@ -589,14 +574,14 @@ process_regexp (struct rspamd_regexp_element *re,
                        }
                        /* Check raw flags */
                        if (part->is_raw) {
-                               regexp = re->raw_regexp;
+                               raw = TRUE;
                        }
                        else {
                                /* This time there is no need to validate anything as conversion succeed only for valid characters */
                                regexp = re->regexp;
                        }
                        /* Select data for regexp */
-                       if (re->is_raw) {
+                       if (raw) {
                                ct = part->orig->data;
                                clen = part->orig->len;
                        }
@@ -607,9 +592,10 @@ process_regexp (struct rspamd_regexp_element *re,
                        /* If we have limit, apply regexp so much times as we can */
                        if (f != NULL && limit > 1) {
                                end = 0;
+                               start = NULL;
+                               end = NULL;
                                while ((matched =
-                                       g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0,
-                                       0, &info, &err)) == TRUE) {
+                                       rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
                                        if (G_UNLIKELY (re->is_test)) {
                                                msg_info (
                                                        "process test regexp %s for mime part of length %d returned TRUE",
@@ -621,22 +607,10 @@ process_regexp (struct rspamd_regexp_element *re,
                                                task_cache_add (task, re, 1);
                                                return 1;
                                        }
-                                       else {
-                                               /* Match not found, skip further cycles */
-                                               old = end;
-                                               if (!g_match_info_fetch_pos (info, 0, &start,
-                                                       &end) || end <= 0) {
-                                                       break;
-                                               }
-                                               end += old;
-                                       }
-                                       g_match_info_free (info);
                                }
-                               g_match_info_free (info);
                        }
                        else {
-                               if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
-                                       &err) == TRUE) {
+                               if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
                                        if (G_UNLIKELY (re->is_test)) {
                                                msg_info (
                                                        "process test regexp %s for mime part of length %d returned TRUE",
@@ -654,18 +628,13 @@ process_regexp (struct rspamd_regexp_element *re,
                                        re->regexp_text,
                                        (gint)clen);
                        }
-                       if (err != NULL) {
-                               msg_info ("error occured while processing regexp \"%s\": %s",
-                                       re->regexp_text,
-                                       err->message);
-                       }
                        cur = g_list_next (cur);
                }
                task_cache_add (task, re, 0);
                break;
        case REGEXP_MESSAGE:
                debug_task ("checking message regexp: %s", re->regexp_text);
-               regexp = re->raw_regexp;
+               raw = TRUE;
                ct = (guint8 *)task->msg.start;
                clen = task->msg.len;
 
@@ -676,10 +645,9 @@ process_regexp (struct rspamd_regexp_element *re,
                }
                /* If we have limit, apply regexp so much times as we can */
                if (f != NULL && limit > 1) {
-                       end = 0;
+                       start = end = NULL;
                        while ((matched =
-                               g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0,
-                               &info, &err)) == TRUE) {
+                               rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
                                if (G_UNLIKELY (re->is_test)) {
                                        msg_info (
                                                "process test regexp %s for mime part of length %d returned TRUE",
@@ -690,22 +658,10 @@ process_regexp (struct rspamd_regexp_element *re,
                                        task_cache_add (task, re, 1);
                                        return 1;
                                }
-                               else {
-                                       /* Match not found, skip further cycles */
-                                       old = end;
-                                       if (!g_match_info_fetch_pos (info, 0, &start,
-                                               &end) || end <= 0) {
-                                               break;
-                                       }
-                                       old += end;
-                               }
-                               g_match_info_free (info);
                        }
-                       g_match_info_free (info);
                }
                else {
-                       if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
-                               &err) == TRUE) {
+                       if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
                                if (G_UNLIKELY (re->is_test)) {
                                        msg_info (
                                                "process test regexp %s for message part of length %d returned TRUE",
@@ -723,11 +679,6 @@ process_regexp (struct rspamd_regexp_element *re,
                                re->regexp_text,
                                (gint)clen);
                }
-               if (err != NULL) {
-                       msg_info ("error occured while processing regexp \"%s\": %s",
-                               re->regexp_text,
-                               err->message);
-               }
                task_cache_add (task, re, 0);
                break;
        case REGEXP_URL: