From d6625c5b603460aa485acc5d3ddd96a8b3c10858 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 26 Jul 2011 17:37:19 +0400 Subject: [PATCH] * Add option max_size for regexp module to skip expensive regexp on long messages --- src/plugins/regexp.c | 22 ++++++++++-- src/printf.c | 85 ++++++++++++++++++++++++++++++++++++++++++-- src/printf.h | 10 +++--- 3 files changed, 108 insertions(+), 9 deletions(-) diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index 89f1ed5c3..04dd49566 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -57,12 +57,13 @@ struct autolearn_data { }; struct regexp_ctx { - gint (*filter) (struct worker_task * task); + gint (*filter) (struct worker_task * task); GHashTable *autolearn_symbols; - gchar *statfile_prefix; + gchar *statfile_prefix; memory_pool_t *regexp_pool; memory_pool_t *dynamic_pool; + gsize max_size; }; struct regexp_json_buf { @@ -444,6 +445,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx) (void)luaopen_regexp (cfg->lua_state); register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING); + register_module_opt ("regexp", "max_size", MODULE_OPT_TYPE_SIZE); register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING); return 0; @@ -497,6 +499,12 @@ regexp_module_config (struct config_file *cfg) else { regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX; } + if ((value = get_module_opt (cfg, "regexp", "max_size")) != NULL) { + regexp_module_ctx->max_size = parse_limit (value); + } + else { + regexp_module_ctx->max_size = 0; + } if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) { jb = g_malloc (sizeof (struct regexp_json_buf)); pjb = g_malloc (sizeof (struct regexp_json_buf *)); @@ -725,6 +733,12 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar cur = g_list_next (cur); continue; } + /* Skip too large parts */ + if (regexp_module_ctx->max_size != 0 && part->content->len > regexp_module_ctx->max_size) { + msg_info ("<%s> skip part of size %Hud", task->message_id, part->content->len); + cur = g_list_next (cur); + continue; + } /* Check raw flags */ if (part->is_raw) { regexp = re->raw_regexp; @@ -795,6 +809,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar ct = task->msg->begin; clen = task->msg->len; + if (regexp_module_ctx->max_size != 0 && clen > regexp_module_ctx->max_size) { + msg_info ("<%s> skip message of size %Hz", task->message_id, clen); + return 0; + } /* If we have limit, apply regexp so much times as we can */ if (f != NULL && limit > 1) { end = 0; diff --git a/src/printf.c b/src/printf.c index 4155f9478..80af94e86 100644 --- a/src/printf.c +++ b/src/printf.c @@ -25,6 +25,69 @@ #include "fstring.h" #include "main.h" +/** + * From FreeBSD libutil code + */ +static const int maxscale = 7; + +static gchar * +humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes) +{ + const gchar *prefixes; + int i, r, remainder, sign; + gint64 divisor; + gsize baselen, len = last - buf; + + remainder = 0; + + baselen = 1; + if (!bytes) { + divisor = 1000; + prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + else { + divisor = 1024; + prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + + +#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3]) + + if (num < 0) { + sign = -1; + num = -num; + baselen += 2; /* sign, digit */ + } + else { + sign = 1; + baselen += 1; /* digit */ + } + + /* Check if enough room for `x y' + suffix + `\0' */ + if (len < baselen + 1) { + return buf; + } + + /* + * Divide the number until it fits the given column. + * If there will be an overflow by the rounding below, + * divide once more. + */ + for (i = 0; i < maxscale && num > divisor; i++) { + remainder = num % divisor; + num /= divisor; + } + + r = rspamd_snprintf (buf, len, "%L%s", + sign * (num + (remainder + 50) / 1000), + SCALE2PREFIX (i)); + +#undef SCALE2PREFIX + + return buf + r; +} + + static gchar * rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero, guint hexadecimal, guint width) @@ -214,7 +277,7 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) size_t len, slen; gint64 i64; guint64 ui64; - guint width, sign, hex, max_width, frac_width, i; + guint width, sign, hex, humanize, bytes, max_width, frac_width, i; f_str_t *v; GString *gs; @@ -240,6 +303,8 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) width = 0; sign = 1; hex = 0; + bytes = 0; + humanize = 0; max_width = 0; frac_width = 0; slen = (size_t) -1; @@ -273,6 +338,17 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) sign = 0; fmt++; continue; + case 'H': + humanize = 1; + bytes = 1; + sign = 0; + fmt ++; + continue; + case 'h': + humanize = 1; + sign = 0; + fmt ++; + continue; case '.': fmt++; @@ -566,7 +642,12 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) } } - buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width); + if (!humanize) { + buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width); + } + else { + buf = humanize_number (buf, last, ui64, bytes); + } fmt++; diff --git a/src/printf.h b/src/printf.h index c1c6866d6..436ec3059 100644 --- a/src/printf.h +++ b/src/printf.h @@ -31,11 +31,11 @@ * supported formats: * %[0][width][x][X]O off_t * %[0][width]T time_t - * %[0][width][u][x|X]z ssize_t/size_t - * %[0][width][u][x|X]d gint/guint - * %[0][width][u][x|X]l long - * %[0][width][u][x|X]D gint32/guint32 - * %[0][width][u][x|X]L gint64/guint64 + * %[0][width][u][x|X|h|H]z ssize_t/size_t + * %[0][width][u][x|X|h|H]d gint/guint + * %[0][width][u][x|X|h|H]l long + * %[0][width][u][x|X|h|H]D gint32/guint32 + * %[0][width][u][x|X|h|H]L gint64/guint64 * %[0][width][.width]f double * %[0][width][.width]F long double * %[0][width][.width]g double -- 2.39.5