aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-26 17:37:19 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-26 17:37:19 +0400
commitd6625c5b603460aa485acc5d3ddd96a8b3c10858 (patch)
tree00ff73488e516fc0117f2a037e2ae443f6306541
parent6eded20b2c15e524ed9a83c436a3b5f0bfbd253c (diff)
downloadrspamd-d6625c5b603460aa485acc5d3ddd96a8b3c10858.tar.gz
rspamd-d6625c5b603460aa485acc5d3ddd96a8b3c10858.zip
* Add option max_size for regexp module to skip expensive regexp on long messages
-rw-r--r--src/plugins/regexp.c22
-rw-r--r--src/printf.c85
-rw-r--r--src/printf.h10
3 files changed, 108 insertions, 9 deletions
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 89f1ed5c3..04dd49566 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -57,12 +57,13 @@ struct autolearn_data {
};
struct regexp_ctx {
- gint (*filter) (struct worker_task * task);
+ gint (*filter) (struct worker_task * task);
GHashTable *autolearn_symbols;
- gchar *statfile_prefix;
+ gchar *statfile_prefix;
memory_pool_t *regexp_pool;
memory_pool_t *dynamic_pool;
+ gsize max_size;
};
struct regexp_json_buf {
@@ -444,6 +445,7 @@ regexp_module_init (struct config_file *cfg, struct module_ctx **ctx)
(void)luaopen_regexp (cfg->lua_state);
register_module_opt ("regexp", "dynamic_rules", MODULE_OPT_TYPE_STRING);
+ register_module_opt ("regexp", "max_size", MODULE_OPT_TYPE_SIZE);
register_module_opt ("regexp", "/^\\S+$/", MODULE_OPT_TYPE_STRING);
return 0;
@@ -497,6 +499,12 @@ regexp_module_config (struct config_file *cfg)
else {
regexp_module_ctx->statfile_prefix = DEFAULT_STATFILE_PREFIX;
}
+ if ((value = get_module_opt (cfg, "regexp", "max_size")) != NULL) {
+ regexp_module_ctx->max_size = parse_limit (value);
+ }
+ else {
+ regexp_module_ctx->max_size = 0;
+ }
if ((value = get_module_opt (cfg, "regexp", "dynamic_rules")) != NULL) {
jb = g_malloc (sizeof (struct regexp_json_buf));
pjb = g_malloc (sizeof (struct regexp_json_buf *));
@@ -725,6 +733,12 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
cur = g_list_next (cur);
continue;
}
+ /* Skip too large parts */
+ if (regexp_module_ctx->max_size != 0 && part->content->len > regexp_module_ctx->max_size) {
+ msg_info ("<%s> skip part of size %Hud", task->message_id, part->content->len);
+ cur = g_list_next (cur);
+ continue;
+ }
/* Check raw flags */
if (part->is_raw) {
regexp = re->raw_regexp;
@@ -795,6 +809,10 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
ct = task->msg->begin;
clen = task->msg->len;
+ if (regexp_module_ctx->max_size != 0 && clen > regexp_module_ctx->max_size) {
+ msg_info ("<%s> skip message of size %Hz", task->message_id, clen);
+ return 0;
+ }
/* If we have limit, apply regexp so much times as we can */
if (f != NULL && limit > 1) {
end = 0;
diff --git a/src/printf.c b/src/printf.c
index 4155f9478..80af94e86 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -25,6 +25,69 @@
#include "fstring.h"
#include "main.h"
+/**
+ * From FreeBSD libutil code
+ */
+static const int maxscale = 7;
+
+static gchar *
+humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes)
+{
+ const gchar *prefixes;
+ int i, r, remainder, sign;
+ gint64 divisor;
+ gsize baselen, len = last - buf;
+
+ remainder = 0;
+
+ baselen = 1;
+ if (!bytes) {
+ divisor = 1000;
+ prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
+ else {
+ divisor = 1024;
+ prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E";
+ }
+
+
+#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3])
+
+ if (num < 0) {
+ sign = -1;
+ num = -num;
+ baselen += 2; /* sign, digit */
+ }
+ else {
+ sign = 1;
+ baselen += 1; /* digit */
+ }
+
+ /* Check if enough room for `x y' + suffix + `\0' */
+ if (len < baselen + 1) {
+ return buf;
+ }
+
+ /*
+ * Divide the number until it fits the given column.
+ * If there will be an overflow by the rounding below,
+ * divide once more.
+ */
+ for (i = 0; i < maxscale && num > divisor; i++) {
+ remainder = num % divisor;
+ num /= divisor;
+ }
+
+ r = rspamd_snprintf (buf, len, "%L%s",
+ sign * (num + (remainder + 50) / 1000),
+ SCALE2PREFIX (i));
+
+#undef SCALE2PREFIX
+
+ return buf + r;
+}
+
+
static gchar *
rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero,
guint hexadecimal, guint width)
@@ -214,7 +277,7 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
size_t len, slen;
gint64 i64;
guint64 ui64;
- guint width, sign, hex, max_width, frac_width, i;
+ guint width, sign, hex, humanize, bytes, max_width, frac_width, i;
f_str_t *v;
GString *gs;
@@ -240,6 +303,8 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
width = 0;
sign = 1;
hex = 0;
+ bytes = 0;
+ humanize = 0;
max_width = 0;
frac_width = 0;
slen = (size_t) -1;
@@ -273,6 +338,17 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
sign = 0;
fmt++;
continue;
+ case 'H':
+ humanize = 1;
+ bytes = 1;
+ sign = 0;
+ fmt ++;
+ continue;
+ case 'h':
+ humanize = 1;
+ sign = 0;
+ fmt ++;
+ continue;
case '.':
fmt++;
@@ -566,7 +642,12 @@ rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args)
}
}
- buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width);
+ if (!humanize) {
+ buf = rspamd_sprintf_num (buf, last, ui64, zero, hex, width);
+ }
+ else {
+ buf = humanize_number (buf, last, ui64, bytes);
+ }
fmt++;
diff --git a/src/printf.h b/src/printf.h
index c1c6866d6..436ec3059 100644
--- a/src/printf.h
+++ b/src/printf.h
@@ -31,11 +31,11 @@
* supported formats:
* %[0][width][x][X]O off_t
* %[0][width]T time_t
- * %[0][width][u][x|X]z ssize_t/size_t
- * %[0][width][u][x|X]d gint/guint
- * %[0][width][u][x|X]l long
- * %[0][width][u][x|X]D gint32/guint32
- * %[0][width][u][x|X]L gint64/guint64
+ * %[0][width][u][x|X|h|H]z ssize_t/size_t
+ * %[0][width][u][x|X|h|H]d gint/guint
+ * %[0][width][u][x|X|h|H]l long
+ * %[0][width][u][x|X|h|H]D gint32/guint32
+ * %[0][width][u][x|X|h|H]L gint64/guint64
* %[0][width][.width]f double
* %[0][width][.width]F long double
* %[0][width][.width]g double