]> source.dussan.org Git - rspamd.git/commitdiff
* Add option max_size for regexp module to skip expensive regexp on long messages
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 26 Jul 2011 13:37:19 +0000 (17:37 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 26 Jul 2011 13:37:19 +0000 (17:37 +0400)
src/plugins/regexp.c
src/printf.c
src/printf.h

index 89f1ed5c390e718339c8bd2b544816e57e850b1d..04dd4956631e3f78f0eb873f9e5ef731b7abc8a3 100644 (file)
@@ -57,12 +57,13 @@ struct autolearn_data {
 };
 
 struct regexp_ctx {
-       gint                            (*filter) (struct worker_task * task);
+       gint                          (*filter) (struct worker_task * task);
        GHashTable                     *autolearn_symbols;
-       gchar                           *statfile_prefix;
+       gchar                          *statfile_prefix;
 
        memory_pool_t                  *regexp_pool;
        memory_pool_t                  *dynamic_pool;
+       gsize                           max_size;
 };
 
 struct regexp_json_buf {
@@ -444,6 +445,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
 
        (void)luaopen_regexp (cfg->lua_state);
        register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING);
+       register_module_opt ("regexp", "max_size", MODULE_OPT_TYPE_SIZE);
        register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING);
 
        return 0;
@@ -497,6 +499,12 @@ regexp_module_config (struct config_file *cfg)
        else {
                regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
        }
+       if ((value = get_module_opt (cfg, "regexp", "max_size")) != NULL) {
+               regexp_module_ctx->max_size = parse_limit (value);
+       }
+       else {
+               regexp_module_ctx->max_size = 0;
+       }
        if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
                jb = g_malloc (sizeof (struct regexp_json_buf));
                pjb = g_malloc (sizeof (struct regexp_json_buf *));
@@ -725,6 +733,12 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                                cur = g_list_next (cur);
                                continue;
                        }
+                       /* Skip too large parts */
+                       if (regexp_module_ctx->max_size != 0 && part->content->len > regexp_module_ctx->max_size) {
+                               msg_info ("<%s> skip part of size %Hud", task->message_id, part->content->len);
+                               cur = g_list_next (cur);
+                               continue;
+                       }
                        /* Check raw flags */
                        if (part->is_raw) {
                                regexp = re->raw_regexp;
@@ -795,6 +809,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                ct = task->msg->begin;
                clen = task->msg->len;
 
+               if (regexp_module_ctx->max_size != 0 && clen > regexp_module_ctx->max_size) {
+                       msg_info ("<%s> skip message of size %Hz", task->message_id, clen);
+                       return 0;
+               }
                /* If we have limit, apply regexp so much times as we can */
                if (f != NULL && limit > 1) {
                        end = 0;
index 4155f947864de5b716da8a1db56223954f2a4067..80af94e863eaf2123e7061869a34648db9819b6f 100644 (file)
 #include "fstring.h"
 #include "main.h"
 
+/**
+ * From FreeBSD libutil code
+ */
+static const int maxscale = 7;
+
+static gchar *
+humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes)
+{
+       const gchar *prefixes;
+       int i, r, remainder, sign;
+       gint64 divisor;
+       gsize baselen, len = last - buf;
+
+       remainder = 0;
+
+       baselen = 1;
+       if (!bytes) {
+               divisor = 1000;
+               prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+       }
+       else {
+               divisor = 1024;
+               prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+       }
+
+
+#define SCALE2PREFIX(scale)     (&prefixes[(scale) * 3])
+
+       if (num < 0) {
+               sign = -1;
+               num = -num;
+               baselen += 2; /* sign, digit */
+       }
+       else {
+               sign = 1;
+               baselen += 1; /* digit */
+       }
+
+       /* Check if enough room for `x y' + suffix + `\0' */
+       if (len < baselen + 1) {
+               return buf;
+       }
+
+       /*
+        * Divide the number until it fits the given column.
+        * If there will be an overflow by the rounding below,
+        * divide once more.
+        */
+       for (i = 0; i < maxscale && num > divisor; i++) {
+               remainder = num % divisor;
+               num /= divisor;
+       }
+
+       r = rspamd_snprintf (buf, len, "%L%s",
+                       sign * (num + (remainder + 50) / 1000),
+                       SCALE2PREFIX (i));
+
+#undef SCALE2PREFIX
+
+       return buf + r;
+}
+
+
 static gchar *
 rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero,
        guint                           hexadecimal, guint width)
@@ -214,7 +277,7 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
        size_t              len, slen;
        gint64              i64;
        guint64             ui64;
-       guint               width, sign, hex, max_width, frac_width, i;
+       guint               width, sign, hex, humanize, bytes, max_width, frac_width, i;
        f_str_t                    *v;
        GString            *gs;
 
@@ -240,6 +303,8 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
                        width = 0;
                        sign = 1;
                        hex = 0;
+                       bytes = 0;
+                       humanize = 0;
                        max_width = 0;
                        frac_width = 0;
                        slen = (size_t) -1;
@@ -273,6 +338,17 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
                                        sign = 0;
                                        fmt++;
                                        continue;
+                               case 'H':
+                                       humanize = 1;
+                                       bytes = 1;
+                                       sign = 0;
+                                       fmt ++;
+                                       continue;
+                               case 'h':
+                                       humanize = 1;
+                                       sign = 0;
+                                       fmt ++;
+                                       continue;
                                case '.':
                                        fmt++;
 
@@ -566,7 +642,12 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
                                }
                        }
 
-                       buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width);
+                       if (!humanize) {
+                               buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width);
+                       }
+                       else {
+                               buf = humanize_number (buf, last, ui64, bytes);
+                       }
 
                        fmt++;
 
index c1c6866d6cb77f75d59c2c92d44d1f957f95853e..436ec305966a1ab716fa4d9bf4cb53a40466a24f 100644 (file)
  * supported formats:
  *     %[0][width][x][X]O                  off_t
  *     %[0][width]T                        time_t
- *     %[0][width][u][x|X]z        ssize_t/size_t
- *     %[0][width][u][x|X]d        gint/guint
- *     %[0][width][u][x|X]l        long
- *     %[0][width][u][x|X]D        gint32/guint32
- *     %[0][width][u][x|X]L        gint64/guint64
+ *     %[0][width][u][x|X|h|H]z            ssize_t/size_t
+ *     %[0][width][u][x|X|h|H]d            gint/guint
+ *     %[0][width][u][x|X|h|H]l            long
+ *     %[0][width][u][x|X|h|H]D            gint32/guint32
+ *     %[0][width][u][x|X|h|H]L            gint64/guint64
  *     %[0][width][.width]f        double
  *     %[0][width][.width]F        long double
  *     %[0][width][.width]g        double