]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Add limit for number of URLs in Lua
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 11 Oct 2019 12:00:24 +0000 (13:00 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 11 Oct 2019 12:00:24 +0000 (13:00 +0100)
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/cfg_utils.c
src/libutil/util.c
src/libutil/util.h
src/lua/lua_task.c

index f22b0cefc4efaf5549697b594e2da016f827c651..4eea4db161c4871ee2709d7a6d76833c4aaffa39 100644 (file)
@@ -462,6 +462,7 @@ struct rspamd_config {
        guint lua_gc_step;                                /**< lua gc step                                                                              */
        guint lua_gc_pause;                                /**< lua gc pause                                                                            */
        guint full_gc_iters;                            /**< iterations between full gc cycle                                   */
+       guint max_lua_urls;                             /**< maximum number of urls to be passed to Lua                 */
 
        GList *classify_headers;                        /**< list of headers using for statistics                               */
        struct module_s **compiled_modules;                /**< list of compiled C modules                                                      */
index cdf3d9b5b74e5a5cf089bbd37507a2b3424369a4..d409e40e89359cf234c3402d82a48f2daea9de35 100644 (file)
@@ -2195,6 +2195,12 @@ rspamd_rcl_config_init (struct rspamd_config *cfg, GHashTable *skip_sections)
                                RSPAMD_CL_FLAG_INT_32,
                                "Maximum count of heartbeats to be lost before trying to "
                                "terminate a worker (default: 0 - disabled)");
+               rspamd_rcl_add_default_handler (sub,
+                               "max_lua_urls",
+                               rspamd_rcl_parse_struct_integer,
+                               G_STRUCT_OFFSET (struct rspamd_config, max_lua_urls),
+                               RSPAMD_CL_FLAG_INT_32,
+                               "Maximum count of URLs to pass to Lua to avoid DoS");
 
                /* Neighbours configuration */
                rspamd_rcl_add_section_doc (&sub->subsections, "neighbours", "name",
index be709a05189d94daa5ee322507fb0899067149b0..5851e250f6f0a295f0f3cdb3013f4774176a3f7a 100644 (file)
@@ -195,6 +195,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags)
        cfg->log_error_elts = 10;
        cfg->log_error_elt_maxlen = 1000;
        cfg->cache_reload_time = 30.0;
+       cfg->max_lua_urls = 1024;
 
        /* Default log line */
        cfg->log_format_str = "id: <$mid>,$if_qid{ qid: <$>,}$if_ip{ ip: $,}"
index 55696c212c39958b608c47570bc651c60866b5b8..ecdd7b2b9e6b26447788c0e31e718068f894621e 100644 (file)
@@ -2624,17 +2624,22 @@ xoroshiro_rotl (const guint64 x, int k) {
        return (x << k) | (x >> (64 - k));
 }
 
-
 gdouble
 rspamd_random_double_fast (void)
 {
-       const guint64 s0 = xorshifto_seed[0];
-       guint64 s1 = xorshifto_seed[1];
+       return rspamd_random_double_fast_seed (xorshifto_seed);
+}
+
+gdouble
+rspamd_random_double_fast_seed (guint64 seed[2])
+{
+       const guint64 s0 = seed[0];
+       guint64 s1 = seed[1];
        const guint64 result = s0 + s1;
 
        s1 ^= s0;
-       xorshifto_seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
-       xorshifto_seed[1] = xoroshiro_rotl (s1, 36);
+       seed[0] = xoroshiro_rotl(s0, 55) ^ s1 ^ (s1 << 14);
+       seed[1] = xoroshiro_rotl (s1, 36);
 
        return rspamd_double_from_int64 (result);
 }
index 7c9eb5f914bc263d8b85b603ad811e7746e1524c..c482a2d9fb3223da9ac6833ec072371fa69d0cce 100644 (file)
@@ -424,7 +424,7 @@ gdouble rspamd_random_double (void);
  * @return
  */
 gdouble rspamd_random_double_fast (void);
-
+gdouble rspamd_random_double_fast_seed (guint64 seed[2]);
 guint64 rspamd_random_uint64_fast (void);
 
 /**
index 97523a1fac2c0d4e3defbbf40ebc0bbe01d33895..4293ef1781dc5770ee616624b75d62f26a19f940 100644 (file)
@@ -2103,6 +2103,8 @@ struct lua_tree_cb_data {
        int i;
        gint mask;
        gint need_images;
+       gdouble skip_prob;
+       guint64 xoroshiro_state[2];
 };
 
 static void
@@ -2117,6 +2119,14 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
                        return;
                }
 
+               if (cb->skip_prob > 0) {
+                       gdouble coin = rspamd_random_double_fast_seed (cb->xoroshiro_state);
+
+                       if (coin < cb->skip_prob) {
+                               return;
+                       }
+               }
+
                lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
                rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
                lua_url->url = url;
@@ -2124,6 +2134,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
        }
 }
 
+static inline gsize
+lua_task_urls_adjust_skip_prob (struct rspamd_task *task,
+               struct lua_tree_cb_data *cb, gsize sz, gsize max_urls)
+{
+       if (max_urls > 0 && sz > max_urls) {
+               cb->skip_prob = 1.0 - ((gdouble)max_urls) / (gdouble)sz;
+               /*
+                * Use task dependent probabilistic seed to ensure that
+                * consequent task:get_urls return the same list of urls
+                */
+               memcpy (&cb->xoroshiro_state[0], &task->task_timestamp,
+                               MIN (sizeof (cb->xoroshiro_state[0]), sizeof (task->task_timestamp)));
+               memcpy (&cb->xoroshiro_state[1], MESSAGE_FIELD (task, digest),
+                               sizeof (cb->xoroshiro_state[1]));
+               sz = max_urls;
+       }
+
+       return sz;
+}
+
 static gint
 lua_task_get_urls (lua_State * L)
 {
@@ -2135,9 +2165,13 @@ lua_task_get_urls (lua_State * L)
                        PROTOCOL_FILE|PROTOCOL_FTP;
        const gchar *cache_name = "emails+urls";
        gboolean need_images = FALSE;
-       gsize sz;
+       gsize sz, max_urls = 0;
 
        if (task) {
+               if (task->cfg) {
+                       max_urls = task->cfg->max_lua_urls;
+               }
+
                if (task->message == NULL) {
                        lua_newtable (L);
 
@@ -2220,6 +2254,8 @@ lua_task_get_urls (lua_State * L)
                        sz = g_hash_table_size (MESSAGE_FIELD (task, urls)) +
                                        g_hash_table_size (MESSAGE_FIELD (task, emails));
 
+                       sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
+
                        if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) {
                                /* Can use cached version */
                                if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2250,6 +2286,7 @@ lua_task_get_urls (lua_State * L)
                        }
 
                        sz = g_hash_table_size (MESSAGE_FIELD (task, urls));
+                       sz = lua_task_urls_adjust_skip_prob (task, &cb, sz, max_urls);
 
                        if (protocols_mask == (default_mask)) {
                                if (!lua_task_get_cached (L, task, cache_name)) {
@@ -2279,6 +2316,7 @@ lua_task_has_urls (lua_State * L)
        LUA_TRACE_POINT;
        struct rspamd_task *task = lua_check_task (L, 1);
        gboolean need_emails = FALSE, ret = FALSE;
+       gsize sz = 0;
 
        if (task) {
                if (task->message) {
@@ -2287,10 +2325,12 @@ lua_task_has_urls (lua_State * L)
                        }
 
                        if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) {
+                               sz += g_hash_table_size (MESSAGE_FIELD (task, urls));
                                ret = TRUE;
                        }
 
                        if (need_emails && g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) {
+                               sz += g_hash_table_size (MESSAGE_FIELD (task, emails));
                                ret = TRUE;
                        }
                }
@@ -2300,8 +2340,9 @@ lua_task_has_urls (lua_State * L)
        }
 
        lua_pushboolean (L, ret);
+       lua_pushinteger (L, sz);
 
-       return 1;
+       return 2;
 }
 
 static gint