]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] added test parsing html/phished urls
authorMikhail Galanin <mgalanin@mimecast.com>
Tue, 31 Jul 2018 14:53:51 +0000 (15:53 +0100)
committerMikhail Galanin <mgalanin@mimecast.com>
Tue, 31 Jul 2018 14:53:51 +0000 (15:53 +0100)
lualib/lua_util.lua
src/lua/lua_task.c
src/lua/lua_util.c
test/lua/unit/lua_util.extract_specific_urls.lua

index 86cbc9ff49756c4bd12ce82524713ffef4e45c9d..ba5843ff6b730cb8209c09375cd40a3db25a235f 100644 (file)
@@ -595,7 +595,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
         else
           if u:get_user() then
             table.insert(res, u)
-          elseif u:is_subject() then
+          elseif u:is_subject() or u:is_phished() then
             table.insert(res, u)
           end
         end
index ca10a94e7d9c002ea9f74bc9f59a363b2ed6cf9d..e4fcea3a4cad3d14a45f1f8406c74d39ce8a1a74 100644 (file)
@@ -53,6 +53,12 @@ end
  * @return {boolean},{rspamd_task|error} status + new task or error message
  */
 LUA_FUNCTION_DEF (task, load_from_file);
+/***
+ * @function rspamd_task.load_from_string(message[, cfg])
+ * Loads a message from specific file
+ * @return {boolean},{rspamd_task|error} status + new task or error message
+ */
+LUA_FUNCTION_DEF (task, load_from_string);
 
 LUA_FUNCTION_DEF (task, get_message);
 /***
@@ -908,6 +914,7 @@ LUA_FUNCTION_DEF (task, get_stat_tokens);
 
 static const struct luaL_reg tasklib_f[] = {
        LUA_INTERFACE_DEF (task, load_from_file),
+       LUA_INTERFACE_DEF (task, load_from_string),
        {NULL, NULL}
 };
 
@@ -1237,7 +1244,7 @@ lua_task_unmap_dtor (gpointer p)
        }
 }
 
-static int
+static gint
 lua_task_load_from_file (lua_State * L)
 {
        struct rspamd_task *task = NULL, **ptask;
@@ -1295,6 +1302,56 @@ lua_task_load_from_file (lua_State * L)
        return 2;
 }
 
+static gint
+lua_task_load_from_string (lua_State * L)
+{
+       struct rspamd_task *task = NULL, **ptask;
+       const gchar *str_message = luaL_checkstring (L, 1), *err = NULL;
+       gsize message_len = lua_strlen(L, 1);
+       struct rspamd_config *cfg = NULL;
+       gboolean res = FALSE;
+
+       if (str_message) {
+
+               if (lua_type (L, 2) == LUA_TUSERDATA) {
+                       gpointer p;
+                       p = rspamd_lua_check_udata_maybe (L, 2, "rspamd{config}");
+
+                       if (p) {
+                               cfg = *(struct rspamd_config **)p;
+                       }
+               }
+
+               task = rspamd_task_new (NULL, cfg, NULL, NULL);
+               task->msg.begin = str_message;
+               task->msg.len   = message_len;
+               rspamd_mempool_add_destructor (task->task_pool,
+                                                                          lua_task_unmap_dtor, task);
+               res = TRUE;
+       }
+       else {
+               return luaL_error (L, "invalid arguments");
+       }
+
+       lua_pushboolean (L, res);
+
+       if (res) {
+               ptask = lua_newuserdata (L, sizeof (*ptask));
+               *ptask = task;
+               rspamd_lua_setclass (L, "rspamd{task}", -1);
+       }
+       else {
+               if (err) {
+                       lua_pushstring (L, err);
+               }
+               else {
+                       lua_pushnil (L);
+               }
+       }
+
+       return 2;
+}
+
 static int
 lua_task_get_mempool (lua_State * L)
 {
index 063a7aab7340c769e3588a41f8a6557e30c3b5d2..f0feb7f2e26aa7988ff150bbc8025a4fa0e267f1 100644 (file)
@@ -45,7 +45,7 @@ LUA_FUNCTION_DEF (util, create_event_base);
  */
 LUA_FUNCTION_DEF (util, load_rspamd_config);
 /***
- * @function util.config_from_ucl(any)
+ * @function util.config_from_ucl(any, string)
  * Load rspamd config from ucl reperesented by any lua table
  * @return {confg} new configuration object suitable for access
  */
@@ -665,18 +665,68 @@ lua_util_load_rspamd_config (lua_State *L)
        return 1;
 }
 
+static gint
+parse_config_options(const char *str_options)
+{
+       gint ret = 0;
+       gchar **vec;
+       const gchar *str;
+       guint i, l;
+
+       vec = g_strsplit_set (str_options, ",;", -1);
+       if (vec) {
+               l = g_strv_length (vec);
+               for (i = 0; i < l; i ++) {
+                       str = vec[i];
+
+                       if (g_ascii_strcasecmp (str, "INIT_URL") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_URL;
+                       } else if (g_ascii_strcasecmp (str, "INIT_LIBS") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_LIBS;
+                       } else if (g_ascii_strcasecmp (str, "INIT_SYMCACHE") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_SYMCACHE;
+                       } else if (g_ascii_strcasecmp (str, "INIT_VALIDATE") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_VALIDATE;
+                       } else if (g_ascii_strcasecmp (str, "INIT_NO_TLD") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_NO_TLD;
+                       } else if (g_ascii_strcasecmp (str, "INIT_PRELOAD_MAPS") == 0) {
+                               ret |= RSPAMD_CONFIG_INIT_PRELOAD_MAPS;
+                       } else {
+                               msg_warn ("bad type: %s", str);
+                       }
+               }
+
+               g_strfreev (vec);
+       }
+
+       return ret;
+}
+
 static gint
 lua_util_config_from_ucl (lua_State *L)
 {
-       struct rspamd_config *cfg, **pcfg;
+       struct rspamd_config *cfg = NULL, **pcfg;
        struct rspamd_rcl_section *top;
        GError *err = NULL;
        ucl_object_t *obj;
+       const char *str_options = NULL;
+       gint int_options = 0;
+
 
        obj = ucl_object_lua_import (L, 1);
+       if (lua_gettop (L) == 2) {
+               if (lua_type (L, 2) == LUA_TSTRING) {
+                       str_options = lua_tostring (L, 2);
+                       int_options = parse_config_options(str_options);
+               }
+               else {
+                       msg_err_config ("config_from_ucl: second parameter is expected to be string");
+                       ucl_object_unref (obj);
+                       lua_pushnil (L);
+               }
+       }
 
        if (obj) {
-               cfg = g_malloc0 (sizeof (struct rspamd_config));
                cfg = rspamd_config_new (RSPAMD_CONFIG_INIT_SKIP_LUA);
                cfg->lua_state = L;
 
@@ -690,7 +740,7 @@ lua_util_config_from_ucl (lua_State *L)
                        lua_pushnil (L);
                }
                else {
-                       rspamd_config_post_load (cfg, 0);
+                       rspamd_config_post_load (cfg, int_options);
                        pcfg = lua_newuserdata (L, sizeof (struct rspamd_config *));
                        rspamd_lua_setclass (L, "rspamd{config}", -1);
                        *pcfg = cfg;
index 424cca5f5077eb44be1819e42942943cdae02639..9c8e4e187a9450707ff42bb5a076705e39840d40 100644 (file)
@@ -5,6 +5,8 @@ context("Lua util - extract_specific_urls", function()
   local url   = require "rspamd_url"
   local logger = require "rspamd_logger"
   local ffi = require "ffi"
+  local rspamd_util = require "rspamd_util"
+  local rspamd_task = require "rspamd_task"
 
   ffi.cdef[[
   void rspamd_url_init (const char *tld_file);
@@ -64,19 +66,35 @@ context("Lua util - extract_specific_urls", function()
       esld_limit = 2,
       need_emails = true,
       prefix = 'p'
+    },
+    {
+      input  = {"abc@a.google.com", "b.google.com", "c.google.com", "a.net", "bb.net", "a.bb.net", "b.bb.net"},
+      expect = {"abc@a.google.com", "a.bb.net", "b.google.com", "a.net", "bb.net", "abc@a.google.com"},
+      filter = nil,
+      limit = 9999,
+      esld_limit = 2,
+      need_emails = true,
+      prefix = 'p'
     }
   }
 
+  local function prepare_actual_result(actual)
+    return fun.totable(fun.map(
+      function(u) return u:get_raw():gsub('^%w+://', '') end,
+      actual
+    ))
+  end
+
   local pool = mpool.create()
 
   for i,c in ipairs(cases) do
 
     local function prepare_url_list(c)
       return fun.totable(fun.map(
-        function (u) return url.create(pool, u) end,
-        c.input or url_list
-      ))
-    end
+    function (u) return url.create(pool, u) end,
+    c.input or url_list
+    ))
+  end
 
     test("extract_specific_urls, backward compatibility case #" .. i, function()
       task_object.urls = prepare_url_list(c)
@@ -86,10 +104,7 @@ context("Lua util - extract_specific_urls", function()
       end
       local actual = util.extract_specific_urls(task_object, c.limit, c.need_emails, c.filter, c.prefix)
 
-      local actual_result = fun.totable(fun.map(
-        function(u) return u:get_host() end,
-        actual
-      ))
+      local actual_result = prepare_actual_result(actual)
 
       --[[
         local s = logger.slog("%1 =?= %2", c.expect, actual_result)
@@ -111,10 +126,7 @@ context("Lua util - extract_specific_urls", function()
         prefix = c.prefix,
       })
 
-      local actual_result = fun.totable(fun.map(
-        function(u) return u:get_host() end,
-        actual
-      ))
+      local actual_result = prepare_actual_result(actual)
 
       --[[
         local s = logger.slog("case[%1] %2 =?= %3", i, c.expect, actual_result)
@@ -124,4 +136,91 @@ context("Lua util - extract_specific_urls", function()
 
     end)
   end
+
+--[[ ******************* kinda functional *************************************** ]]
+  local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1")
+  local tld_file = string.format('%s/%s', test_dir, "test_tld.dat")
+
+  local config = {
+    options = {
+      filters = {'spf', 'dkim', 'regexp'},
+      url_tld = tld_file,
+      dns = {
+        nameserver = {'8.8.8.8'}
+      },
+    },
+    logging = {
+      type = 'console',
+      level = 'debug'
+    },
+    metric = {
+      name = 'default',
+      actions = {
+        reject = 100500,
+      },
+      unknown_weight = 1
+    }
+  }
+
+  test("extract_specific_urls - from email", function()
+    local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS")
+    assert_not_nil(cfg)
+
+    local msg = [[
+From: <>
+To: <nobody@example.com>
+Subject: test
+Content-Type: multipart/alternative;
+    boundary="_000_6be055295eab48a5af7ad4022f33e2d0_"
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+Hello world
+
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/html; charset="utf-8"
+
+<html><body>
+<a href="http://example.net">http://example.net</a>
+<a href="http://example1.net">http://example1.net</a>
+<a href="http://example2.net">http://example2.net</a>
+<a href="http://example3.net">http://example3.net</a>
+<a href="http://example4.net">http://example4.net</a>
+<a href="http://domain1.com">http://domain1.com</a>
+<a href="http://domain2.com">http://domain2.com</a>
+<a href="http://domain3.com">http://domain3.com</a>
+<a href="http://domain4.com">http://domain4.com</a>
+<a href="http://domain5.com">http://domain5.com</a>
+<a href="http://domain.com">http://example.net/</a>
+</html>
+]]
+    local expect = {"example.net", "domain.com"}
+    local res,task = rspamd_task.load_from_string(msg, rspamd_config)
+
+    if not res then
+      assert_true(false, "failed to load message")
+    end
+
+    if not task:process_message() then
+      assert_true(false, "failed to process message")
+    end
+
+    local actual = util.extract_specific_urls({
+      task = task,
+      limit = 2,
+      esld_limit = 2,
+    })
+
+    local actual_result = prepare_actual_result(actual)
+
+    --[[
+      local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+      print(s) --]]
+
+    assert_equal("domain.com", actual_result[1], "checking that first url is the one with highest suspiciousness level")
+
+  end)
 end)
\ No newline at end of file