Start moving to the rspamd regexps.

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c

index 769b7dc14d38226bb2da7a25083516720b358a13..aab78df5c4f501c12cc0f9863b19384226b3701e 100644 (file)
--- a/src/libmime/expressions.c
+++ b/src/libmime/expressions.c
@@ -652,9 +652,9 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
  {
         const gchar *begin, *end, *p, *src, *start;
         gchar *dbegin, *dend;
-       struct rspamd_regexp_element *result, *check;
-       gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE;
+       struct rspamd_regexp_element *result;
         GError *err = NULL;
+       GString *re_flags;
  
         if (line == NULL) {
                 msg_err ("cannot parse NULL line");
@@ -727,35 +727,20 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
         }
         /* Parse flags */
         p = end + 1;
+       re_flags = g_string_sized_new (32);
         while (p != NULL) {
                 switch (*p) {
                 case 'i':
-                       regexp_flags |= G_REGEX_CASELESS;
-                       p++;
-                       break;
                 case 'm':
-                       regexp_flags |= G_REGEX_MULTILINE;
-                       p++;
-                       break;
                 case 's':
-                       regexp_flags |= G_REGEX_DOTALL;
-                       p++;
-                       break;
                 case 'x':
-                       regexp_flags |= G_REGEX_EXTENDED;
-                       p++;
-                       break;
                 case 'u':
-                       regexp_flags |= G_REGEX_UNGREEDY;
+               case 'O':
+               case 'r':
+                       g_string_append_c (re_flags, *p);
                         p++;
                         break;
                 case 'o':
-                       regexp_flags |= G_REGEX_OPTIMIZE;
-                       p++;
-                       break;
-               case 'r':
-                       regexp_flags |= G_REGEX_RAW;
-                       result->is_raw = TRUE;
                         p++;
                         break;
                 /* Type flags */
@@ -810,61 +795,27 @@ parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode)
         *dend = '\0';
  
         if (raw_mode) {
-               regexp_flags |= G_REGEX_RAW;
-       }
-
-       /* Avoid multiply regexp structures for similar regexps */
-       if ((check =
-               (struct rspamd_regexp_element *)re_cache_check (result->regexp_text,
-               pool)) != NULL) {
-               /* Additional check for headers */
-               if (result->type == REGEXP_HEADER || result->type ==
-                       REGEXP_RAW_HEADER) {
-                       if (result->header && check->header) {
-                               if (strcmp (result->header, check->header) == 0) {
-                                       return check;
-                               }
-                       }
-               }
-               else {
-                       return check;
-               }
-       }
-       result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err);
-       if ((regexp_flags & G_REGEX_RAW) != 0) {
-               result->raw_regexp = result->regexp;
-       }
-       else {
-               result->raw_regexp = g_regex_new (dbegin,
-                               regexp_flags | G_REGEX_RAW,
-                               0,
-                               &err);
-               rspamd_mempool_add_destructor (pool,
-                       (rspamd_mempool_destruct_t) g_regex_unref,
-                       (void *)result->raw_regexp);
+               g_string_append_c (re_flags, 'r');
         }
-       rspamd_mempool_add_destructor (pool,
-               (rspamd_mempool_destruct_t) g_regex_unref,
-               (void *)result->regexp);
  
-       *dend = '/';
+       result->regexp = rspamd_regexp_cache_create (NULL, dbegin, re_flags->str,
+                       &err);
+
+       g_string_free (re_flags, TRUE);
  
         if (result->regexp == NULL || err != NULL) {
                 msg_warn ("could not read regexp: %s while reading regexp %s",
                                 err ? err->message : "unknown error",
-                       src);
+                                               src);
                 return NULL;
         }
  
-       if (result->raw_regexp == NULL || err != NULL) {
-               msg_warn ("could not read raw regexp: %s while reading regexp %s",
-                       err ? err->message : "unknown error",
-                       src);
-               return NULL;
-       }
+       rspamd_mempool_add_destructor (pool,
+               (rspamd_mempool_destruct_t) rspamd_regexp_unref,
+               (void *)result->regexp);
+
+       *dend = '/';
  
-       /* Add to cache for further usage */
-       re_cache_add (result->regexp_text, result, pool);
         return result;
  }
  
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h

index 8c58a4941fe07f7d279448e0cf0e5a25f9af8b04..44728afe846caff4be24c2192fe6a74445e1bf7f 100644 (file)
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -12,6 +12,7 @@
  #include "symbols_cache.h"
  #include "cfg_rcl.h"
  #include "ucl.h"
+#include "regexp.h"
  
  #define DEFAULT_BIND_PORT 11333
  #define DEFAULT_CONTROL_PORT 11334
@@ -68,11 +69,9 @@ enum rspamd_log_type {
  struct rspamd_regexp_element {
         enum rspamd_regexp_type type;                   /**< regexp type                                                                                */
         gchar *regexp_text;                             /**< regexp text representation                                                 */
-       GRegex *regexp;                                 /**< glib regexp structure                                                              */
-       GRegex *raw_regexp;                             /**< glib regexp structure for raw matching                             */
+       rspamd_regexp_t *regexp;                        /**< regexp structure                                                                   */
         gchar *header;                                  /**< header name for header regexps                                             */
         gboolean is_test;                               /**< true if this expression must be tested                             */
-       gboolean is_raw;                                /**< true if this regexp is done by raw matching                */
         gboolean is_strong;                             /**< true if headers search must be case sensitive              */
  };
  
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c

index 6da0a663e14569ba00bf149026f7f46d96152b6c..3b76d15e0ba501cd37ddab9d01ebcc4c8a6298a4 100644 (file)
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -296,6 +296,10 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
         g_assert (re != NULL);
         g_assert (text != NULL);
  
+       if (len == 0) {
+               len = strlen (text);
+       }
+
         if (end != NULL && *end != NULL) {
                 /* Incremental search */
                 mt = (*end);
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c

index 848fdfdb2eecdd452b5f50ebfd6b2899f4a65d5b..224d40b21aefbb679f6203adef26bf81c6af7b93 100644 (file)
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -369,7 +369,7 @@ regexp_module_reconfig (struct rspamd_config *cfg)
  
  struct url_regexp_param {
         struct rspamd_task *task;
-       GRegex *regexp;
+       rspamd_regexp_t *regexp;
         struct rspamd_regexp_element *re;
         gboolean found;
  };
@@ -379,10 +379,9 @@ tree_url_callback (gpointer key, gpointer value, void *data)
  {
         struct url_regexp_param *param = data;
         struct rspamd_url *url = value;
-       GError *err = NULL;
  
-       if (g_regex_match_full (param->regexp, struri (url), -1, 0, 0, NULL,
-               &err) == TRUE) {
+       if (rspamd_regexp_search (param->regexp, struri (url), 0, NULL, NULL, FALSE)
+                       == TRUE) {
                 if (G_UNLIKELY (param->re->is_test)) {
                         msg_info ("process test regexp %s for url %s returned TRUE",
                                 struri (url));
@@ -395,11 +394,6 @@ tree_url_callback (gpointer key, gpointer value, void *data)
                 msg_info ("process test regexp %s for url %s returned FALSE",
                         struri (url));
         }
-       if (err != NULL) {
-               msg_info ("error occured while processing regexp \"%s\": %s",
-                       param->re->regexp_text,
-                       err->message);
-       }
  
         return FALSE;
  }
@@ -413,14 +407,12 @@ process_regexp (struct rspamd_regexp_element *re,
  {
         guint8 *ct;
         gsize clen;
-       gint r, passed = 0, start, end, old;
-       gboolean matched = FALSE;
-       const gchar *in;
+       gint r, passed = 0;
+       gboolean matched = FALSE, raw = FALSE;
+       const gchar *in, *start, *end;
  
         GList *cur, *headerlist;
-       GRegex *regexp;
-       GMatchInfo *info;
-       GError *err = NULL;
+       rspamd_regexp_t *regexp;
         struct url_regexp_param callback_param = {
                 .task = task,
                 .re = re,
@@ -449,8 +441,8 @@ process_regexp (struct rspamd_regexp_element *re,
                                 re->regexp_text,
                                 additional);
                 }
-               if (g_regex_match_full (re->regexp, additional, strlen (additional), 0,
-                       0, NULL, NULL) == TRUE) {
+               if (rspamd_regexp_search (re->regexp, additional, 0, NULL, NULL,
+                       FALSE) == TRUE) {
                         if (G_UNLIKELY (re->is_test)) {
                                 msg_info ("result of regexp %s is true", re->regexp_text);
                         }
@@ -513,7 +505,7 @@ process_regexp (struct rspamd_regexp_element *re,
                                         re->header, rh->decoded);
                                 if (re->type == REGEXP_RAW_HEADER) {
                                         in = rh->value;
-                                       regexp = re->raw_regexp;
+                                       raw = TRUE;
                                 }
                                 else {
                                         in = rh->decoded;
@@ -527,8 +519,7 @@ process_regexp (struct rspamd_regexp_element *re,
  
                                 /* Match re */
                                 if (in &&
-                                       g_regex_match_full (regexp, in, -1, 0, 0, NULL,
-                                                       &err) == TRUE) {
+                                       rspamd_regexp_search (regexp, in, 0, NULL, NULL, raw)) {
                                         if (G_UNLIKELY (re->is_test)) {
                                                 msg_info (
                                                         "process test regexp %s for header %s with value '%s' returned TRUE",
@@ -555,12 +546,6 @@ process_regexp (struct rspamd_regexp_element *re,
                                                 re->header,
                                                 in);
                                 }
-                               if (err != NULL) {
-                                       msg_info (
-                                               "error occured while processing regexp \"%s\": %s",
-                                               re->regexp_text,
-                                               err->message);
-                               }
                                 cur = g_list_next (cur);
                         }
                         task_cache_add (task, re, 0);
@@ -589,14 +574,14 @@ process_regexp (struct rspamd_regexp_element *re,
                         }
                         /* Check raw flags */
                         if (part->is_raw) {
-                               regexp = re->raw_regexp;
+                               raw = TRUE;
                         }
                         else {
                                 /* This time there is no need to validate anything as conversion succeed only for valid characters */
                                 regexp = re->regexp;
                         }
                         /* Select data for regexp */
-                       if (re->is_raw) {
+                       if (raw) {
                                 ct = part->orig->data;
                                 clen = part->orig->len;
                         }
@@ -607,9 +592,10 @@ process_regexp (struct rspamd_regexp_element *re,
                         /* If we have limit, apply regexp so much times as we can */
                         if (f != NULL && limit > 1) {
                                 end = 0;
+                               start = NULL;
+                               end = NULL;
                                 while ((matched =
-                                       g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0,
-                                       0, &info, &err)) == TRUE) {
+                                       rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
                                         if (G_UNLIKELY (re->is_test)) {
                                                 msg_info (
                                                         "process test regexp %s for mime part of length %d returned TRUE",
@@ -621,22 +607,10 @@ process_regexp (struct rspamd_regexp_element *re,
                                                 task_cache_add (task, re, 1);
                                                 return 1;
                                         }
-                                       else {
-                                               /* Match not found, skip further cycles */
-                                               old = end;
-                                               if (!g_match_info_fetch_pos (info, 0, &start,
-                                                       &end) || end <= 0) {
-                                                       break;
-                                               }
-                                               end += old;
-                                       }
-                                       g_match_info_free (info);
                                 }
-                               g_match_info_free (info);
                         }
                         else {
-                               if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
-                                       &err) == TRUE) {
+                               if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
                                         if (G_UNLIKELY (re->is_test)) {
                                                 msg_info (
                                                         "process test regexp %s for mime part of length %d returned TRUE",
@@ -654,18 +628,13 @@ process_regexp (struct rspamd_regexp_element *re,
                                         re->regexp_text,
                                         (gint)clen);
                         }
-                       if (err != NULL) {
-                               msg_info ("error occured while processing regexp \"%s\": %s",
-                                       re->regexp_text,
-                                       err->message);
-                       }
                         cur = g_list_next (cur);
                 }
                 task_cache_add (task, re, 0);
                 break;
         case REGEXP_MESSAGE:
                 debug_task ("checking message regexp: %s", re->regexp_text);
-               regexp = re->raw_regexp;
+               raw = TRUE;
                 ct = (guint8 *)task->msg.start;
                 clen = task->msg.len;
  
@@ -676,10 +645,9 @@ process_regexp (struct rspamd_regexp_element *re,
                 }
                 /* If we have limit, apply regexp so much times as we can */
                 if (f != NULL && limit > 1) {
-                       end = 0;
+                       start = end = NULL;
                         while ((matched =
-                               g_regex_match_full (regexp, ct + end + 1, clen - end - 1, 0, 0,
-                               &info, &err)) == TRUE) {
+                               rspamd_regexp_search (regexp, ct, clen, &start, &end, raw))) {
                                 if (G_UNLIKELY (re->is_test)) {
                                         msg_info (
                                                 "process test regexp %s for mime part of length %d returned TRUE",
@@ -690,22 +658,10 @@ process_regexp (struct rspamd_regexp_element *re,
                                         task_cache_add (task, re, 1);
                                         return 1;
                                 }
-                               else {
-                                       /* Match not found, skip further cycles */
-                                       old = end;
-                                       if (!g_match_info_fetch_pos (info, 0, &start,
-                                               &end) || end <= 0) {
-                                               break;
-                                       }
-                                       old += end;
-                               }
-                               g_match_info_free (info);
                         }
-                       g_match_info_free (info);
                 }
                 else {
-                       if (g_regex_match_full (regexp, ct, clen, 0, 0, NULL,
-                               &err) == TRUE) {
+                       if (rspamd_regexp_search (regexp, ct, clen, NULL, NULL, raw)) {
                                 if (G_UNLIKELY (re->is_test)) {
                                         msg_info (
                                                 "process test regexp %s for message part of length %d returned TRUE",
@@ -723,11 +679,6 @@ process_regexp (struct rspamd_regexp_element *re,
                                 re->regexp_text,
                                 (gint)clen);
                 }
-               if (err != NULL) {
-                       msg_info ("error occured while processing regexp \"%s\": %s",
-                               re->regexp_text,
-                               err->message);
-               }
                 task_cache_add (task, re, 0);
                 break;
         case REGEXP_URL:
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Sat, 7 Mar 2015 22:00:14 +0000 (22:00 +0000)
src/libmime/expressions.c		patch \| blob \| history
src/libserver/cfg_file.h		patch \| blob \| history
src/libutil/regexp.c		patch \| blob \| history
src/plugins/regexp.c		patch \| blob \| history