From daa0c73eee23fea675fc733482248722657f6a56 Mon Sep 17 00:00:00 2001 From: Mikhail Galanin Date: Tue, 31 Jul 2018 15:53:51 +0100 Subject: [PATCH] [Minor] added test parsing html/phished urls --- lualib/lua_util.lua | 2 +- src/lua/lua_task.c | 59 ++++++++- src/lua/lua_util.c | 58 ++++++++- .../unit/lua_util.extract_specific_urls.lua | 123 ++++++++++++++++-- 4 files changed, 224 insertions(+), 18 deletions(-) diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua index 86cbc9ff4..ba5843ff6 100644 --- a/lualib/lua_util.lua +++ b/lualib/lua_util.lua @@ -595,7 +595,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte else if u:get_user() then table.insert(res, u) - elseif u:is_subject() then + elseif u:is_subject() or u:is_phished() then table.insert(res, u) end end diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index ca10a94e7..e4fcea3a4 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -53,6 +53,12 @@ end * @return {boolean},{rspamd_task|error} status + new task or error message */ LUA_FUNCTION_DEF (task, load_from_file); +/*** + * @function rspamd_task.load_from_string(message[, cfg]) + * Loads a message from specific file + * @return {boolean},{rspamd_task|error} status + new task or error message + */ +LUA_FUNCTION_DEF (task, load_from_string); LUA_FUNCTION_DEF (task, get_message); /*** @@ -908,6 +914,7 @@ LUA_FUNCTION_DEF (task, get_stat_tokens); static const struct luaL_reg tasklib_f[] = { LUA_INTERFACE_DEF (task, load_from_file), + LUA_INTERFACE_DEF (task, load_from_string), {NULL, NULL} }; @@ -1237,7 +1244,7 @@ lua_task_unmap_dtor (gpointer p) } } -static int +static gint lua_task_load_from_file (lua_State * L) { struct rspamd_task *task = NULL, **ptask; @@ -1295,6 +1302,56 @@ lua_task_load_from_file (lua_State * L) return 2; } +static gint +lua_task_load_from_string (lua_State * L) +{ + struct rspamd_task *task = NULL, **ptask; + const gchar *str_message = luaL_checkstring (L, 1), *err = NULL; + gsize message_len = lua_strlen(L, 1); + struct rspamd_config *cfg = NULL; + gboolean res = FALSE; + + if (str_message) { + + if (lua_type (L, 2) == LUA_TUSERDATA) { + gpointer p; + p = rspamd_lua_check_udata_maybe (L, 2, "rspamd{config}"); + + if (p) { + cfg = *(struct rspamd_config **)p; + } + } + + task = rspamd_task_new (NULL, cfg, NULL, NULL); + task->msg.begin = str_message; + task->msg.len = message_len; + rspamd_mempool_add_destructor (task->task_pool, + lua_task_unmap_dtor, task); + res = TRUE; + } + else { + return luaL_error (L, "invalid arguments"); + } + + lua_pushboolean (L, res); + + if (res) { + ptask = lua_newuserdata (L, sizeof (*ptask)); + *ptask = task; + rspamd_lua_setclass (L, "rspamd{task}", -1); + } + else { + if (err) { + lua_pushstring (L, err); + } + else { + lua_pushnil (L); + } + } + + return 2; +} + static int lua_task_get_mempool (lua_State * L) { diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 063a7aab7..f0feb7f2e 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -45,7 +45,7 @@ LUA_FUNCTION_DEF (util, create_event_base); */ LUA_FUNCTION_DEF (util, load_rspamd_config); /*** - * @function util.config_from_ucl(any) + * @function util.config_from_ucl(any, string) * Load rspamd config from ucl reperesented by any lua table * @return {confg} new configuration object suitable for access */ @@ -665,18 +665,68 @@ lua_util_load_rspamd_config (lua_State *L) return 1; } +static gint +parse_config_options(const char *str_options) +{ + gint ret = 0; + gchar **vec; + const gchar *str; + guint i, l; + + vec = g_strsplit_set (str_options, ",;", -1); + if (vec) { + l = g_strv_length (vec); + for (i = 0; i < l; i ++) { + str = vec[i]; + + if (g_ascii_strcasecmp (str, "INIT_URL") == 0) { + ret |= RSPAMD_CONFIG_INIT_URL; + } else if (g_ascii_strcasecmp (str, "INIT_LIBS") == 0) { + ret |= RSPAMD_CONFIG_INIT_LIBS; + } else if (g_ascii_strcasecmp (str, "INIT_SYMCACHE") == 0) { + ret |= RSPAMD_CONFIG_INIT_SYMCACHE; + } else if (g_ascii_strcasecmp (str, "INIT_VALIDATE") == 0) { + ret |= RSPAMD_CONFIG_INIT_VALIDATE; + } else if (g_ascii_strcasecmp (str, "INIT_NO_TLD") == 0) { + ret |= RSPAMD_CONFIG_INIT_NO_TLD; + } else if (g_ascii_strcasecmp (str, "INIT_PRELOAD_MAPS") == 0) { + ret |= RSPAMD_CONFIG_INIT_PRELOAD_MAPS; + } else { + msg_warn ("bad type: %s", str); + } + } + + g_strfreev (vec); + } + + return ret; +} + static gint lua_util_config_from_ucl (lua_State *L) { - struct rspamd_config *cfg, **pcfg; + struct rspamd_config *cfg = NULL, **pcfg; struct rspamd_rcl_section *top; GError *err = NULL; ucl_object_t *obj; + const char *str_options = NULL; + gint int_options = 0; + obj = ucl_object_lua_import (L, 1); + if (lua_gettop (L) == 2) { + if (lua_type (L, 2) == LUA_TSTRING) { + str_options = lua_tostring (L, 2); + int_options = parse_config_options(str_options); + } + else { + msg_err_config ("config_from_ucl: second parameter is expected to be string"); + ucl_object_unref (obj); + lua_pushnil (L); + } + } if (obj) { - cfg = g_malloc0 (sizeof (struct rspamd_config)); cfg = rspamd_config_new (RSPAMD_CONFIG_INIT_SKIP_LUA); cfg->lua_state = L; @@ -690,7 +740,7 @@ lua_util_config_from_ucl (lua_State *L) lua_pushnil (L); } else { - rspamd_config_post_load (cfg, 0); + rspamd_config_post_load (cfg, int_options); pcfg = lua_newuserdata (L, sizeof (struct rspamd_config *)); rspamd_lua_setclass (L, "rspamd{config}", -1); *pcfg = cfg; diff --git a/test/lua/unit/lua_util.extract_specific_urls.lua b/test/lua/unit/lua_util.extract_specific_urls.lua index 424cca5f5..9c8e4e187 100644 --- a/test/lua/unit/lua_util.extract_specific_urls.lua +++ b/test/lua/unit/lua_util.extract_specific_urls.lua @@ -5,6 +5,8 @@ context("Lua util - extract_specific_urls", function() local url = require "rspamd_url" local logger = require "rspamd_logger" local ffi = require "ffi" + local rspamd_util = require "rspamd_util" + local rspamd_task = require "rspamd_task" ffi.cdef[[ void rspamd_url_init (const char *tld_file); @@ -64,19 +66,35 @@ context("Lua util - extract_specific_urls", function() esld_limit = 2, need_emails = true, prefix = 'p' + }, + { + input = {"abc@a.google.com", "b.google.com", "c.google.com", "a.net", "bb.net", "a.bb.net", "b.bb.net"}, + expect = {"abc@a.google.com", "a.bb.net", "b.google.com", "a.net", "bb.net", "abc@a.google.com"}, + filter = nil, + limit = 9999, + esld_limit = 2, + need_emails = true, + prefix = 'p' } } + local function prepare_actual_result(actual) + return fun.totable(fun.map( + function(u) return u:get_raw():gsub('^%w+://', '') end, + actual + )) + end + local pool = mpool.create() for i,c in ipairs(cases) do local function prepare_url_list(c) return fun.totable(fun.map( - function (u) return url.create(pool, u) end, - c.input or url_list - )) - end + function (u) return url.create(pool, u) end, + c.input or url_list + )) + end test("extract_specific_urls, backward compatibility case #" .. i, function() task_object.urls = prepare_url_list(c) @@ -86,10 +104,7 @@ context("Lua util - extract_specific_urls", function() end local actual = util.extract_specific_urls(task_object, c.limit, c.need_emails, c.filter, c.prefix) - local actual_result = fun.totable(fun.map( - function(u) return u:get_host() end, - actual - )) + local actual_result = prepare_actual_result(actual) --[[ local s = logger.slog("%1 =?= %2", c.expect, actual_result) @@ -111,10 +126,7 @@ context("Lua util - extract_specific_urls", function() prefix = c.prefix, }) - local actual_result = fun.totable(fun.map( - function(u) return u:get_host() end, - actual - )) + local actual_result = prepare_actual_result(actual) --[[ local s = logger.slog("case[%1] %2 =?= %3", i, c.expect, actual_result) @@ -124,4 +136,91 @@ context("Lua util - extract_specific_urls", function() end) end + +--[[ ******************* kinda functional *************************************** ]] + local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1") + local tld_file = string.format('%s/%s', test_dir, "test_tld.dat") + + local config = { + options = { + filters = {'spf', 'dkim', 'regexp'}, + url_tld = tld_file, + dns = { + nameserver = {'8.8.8.8'} + }, + }, + logging = { + type = 'console', + level = 'debug' + }, + metric = { + name = 'default', + actions = { + reject = 100500, + }, + unknown_weight = 1 + } + } + + test("extract_specific_urls - from email", function() + local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") + assert_not_nil(cfg) + + local msg = [[ +From: <> +To: +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + + +http://example.net +http://example1.net +http://example2.net +http://example3.net +http://example4.net +http://domain1.com +http://domain2.com +http://domain3.com +http://domain4.com +http://domain5.com +http://example.net/ + +]] + local expect = {"example.net", "domain.com"} + local res,task = rspamd_task.load_from_string(msg, rspamd_config) + + if not res then + assert_true(false, "failed to load message") + end + + if not task:process_message() then + assert_true(false, "failed to process message") + end + + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 2, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_equal("domain.com", actual_result[1], "checking that first url is the one with highest suspiciousness level") + + end) end) \ No newline at end of file -- 2.39.5