From a49a31b3f72d9a744c3bb53f60f7f39614eb7f35 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 11 Oct 2019 13:00:24 +0100 Subject: [PATCH] [Feature] Add limit for number of URLs in Lua --- src/libserver/cfg_file.h | 1 + src/libserver/cfg_rcl.c | 6 ++++++ src/libserver/cfg_utils.c | 1 + src/libutil/util.c | 15 ++++++++----- src/libutil/util.h | 2 +- src/lua/lua_task.c | 45 +++++++++++++++++++++++++++++++++++++-- 6 files changed, 62 insertions(+), 8 deletions(-) diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index f22b0cefc..4eea4db16 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -462,6 +462,7 @@ struct rspamd_config { guint lua_gc_step; /**< lua gc step */ guint lua_gc_pause; /**< lua gc pause */ guint full_gc_iters; /**< iterations between full gc cycle */ + guint max_lua_urls; /**< maximum number of urls to be passed to Lua */ GList *classify_headers; /**< list of headers using for statistics */ struct module_s **compiled_modules; /**< list of compiled C modules */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index cdf3d9b5b..d409e40e8 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -2195,6 +2195,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections) RSPAMD_CL_FLAG_INT_32, "Maximum count of heartbeats to be lost before trying to " "terminate a worker (default: 0 - disabled)"); + rspamd_rcl_add_default_handler (sub, + "max_lua_urls", + rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct rspamd_config, max_lua_urls), + RSPAMD_CL_FLAG_INT_32, + "Maximum count of URLs to pass to Lua to avoid DoS"); /* Neighbours configuration */ rspamd_rcl_add_section_doc (&sub->subsections, "neighbours", "name", diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index be709a051..5851e250f 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -195,6 +195,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags) cfg->log_error_elts = 10; cfg->log_error_elt_maxlen = 1000; cfg->cache_reload_time = 30.0; + cfg->max_lua_urls = 1024; /* Default log line */ cfg->log_format_str = "id: <$mid>,$if_qid{ qid: <$>,}$if_ip{ ip: $,}" diff --git a/src/libutil/util.c b/src/libutil/util.c index 55696c212..ecdd7b2b9 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -2624,17 +2624,22 @@ xoroshiro_rotl (const guint64 x, int k) { return (x << k) | (x >> (64 - k)); } - gdouble rspamd_random_double_fast (void) { - const guint64 s0 = xorshifto_seed[0]; - guint64 s1 = xorshifto_seed[1]; + return rspamd_random_double_fast_seed (xorshifto_seed); +} + +gdouble +rspamd_random_double_fast_seed (guint64 seed[2]) +{ + const guint64 s0 = seed[0]; + guint64 s1 = seed[1]; const guint64 result = s0 + s1; s1 ^= s0; - xorshifto_seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14); - xorshifto_seed[1] = xoroshiro_rotl (s1, 36); + seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14); + seed[1] = xoroshiro_rotl (s1, 36); return rspamd_double_from_int64 (result); } diff --git a/src/libutil/util.h b/src/libutil/util.h index 7c9eb5f91..c482a2d9f 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -424,7 +424,7 @@ gdouble rspamd_random_double (void); * @return */ gdouble rspamd_random_double_fast (void); - +gdouble rspamd_random_double_fast_seed (guint64 seed[2]); guint64 rspamd_random_uint64_fast (void); /** diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 97523a1fa..4293ef178 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -2103,6 +2103,8 @@ struct lua_tree_cb_data { int i; gint mask; gint need_images; + gdouble skip_prob; + guint64 xoroshiro_state[2]; }; static void @@ -2117,6 +2119,14 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) return; } + if (cb->skip_prob > 0) { + gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state); + + if (coin < cb->skip_prob) { + return; + } + } + lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url)); rspamd_lua_setclass (cb->L, "rspamd{url}", -1); lua_url->url = url; @@ -2124,6 +2134,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud) } } +static inline gsize +lua_task_urls_adjust_skip_prob (struct rspamd_task *task, + struct lua_tree_cb_data *cb, gsize sz, gsize max_urls) +{ + if (max_urls > 0 && sz > max_urls) { + cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz; + /* + * Use task dependent probabilistic seed to ensure that + * consequent task:get_urls return the same list of urls + */ + memcpy (&cb->xoroshiro_state[0], &task->task_timestamp, + MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp))); + memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest), + sizeof (cb->xoroshiro_state[1])); + sz = max_urls; + } + + return sz; +} + static gint lua_task_get_urls (lua_State * L) { @@ -2135,9 +2165,13 @@ lua_task_get_urls (lua_State * L) PROTOCOL_FILE|PROTOCOL_FTP; const gchar *cache_name = "emails+urls"; gboolean need_images = FALSE; - gsize sz; + gsize sz, max_urls = 0; if (task) { + if (task->cfg) { + max_urls = task->cfg->max_lua_urls; + } + if (task->message == NULL) { lua_newtable (L); @@ -2220,6 +2254,8 @@ lua_task_get_urls (lua_State * L) sz = g_hash_table_size (MESSAGE_FIELD (task, urls)) + g_hash_table_size (MESSAGE_FIELD (task, emails)); + sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); + if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) { /* Can use cached version */ if (!lua_task_get_cached (L, task, cache_name)) { @@ -2250,6 +2286,7 @@ lua_task_get_urls (lua_State * L) } sz = g_hash_table_size (MESSAGE_FIELD (task, urls)); + sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls); if (protocols_mask == (default_mask)) { if (!lua_task_get_cached (L, task, cache_name)) { @@ -2279,6 +2316,7 @@ lua_task_has_urls (lua_State * L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task (L, 1); gboolean need_emails = FALSE, ret = FALSE; + gsize sz = 0; if (task) { if (task->message) { @@ -2287,10 +2325,12 @@ lua_task_has_urls (lua_State * L) } if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) { + sz += g_hash_table_size (MESSAGE_FIELD (task, urls)); ret = TRUE; } if (need_emails && g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) { + sz += g_hash_table_size (MESSAGE_FIELD (task, emails)); ret = TRUE; } } @@ -2300,8 +2340,9 @@ lua_task_has_urls (lua_State * L) } lua_pushboolean (L, ret); + lua_pushinteger (L, sz); - return 1; + return 2; } static gint -- 2.39.5