diff options
author | Mikhail Galanin <mgalanin@mimecast.com> | 2018-07-31 15:53:51 +0100 |
---|---|---|
committer | Mikhail Galanin <mgalanin@mimecast.com> | 2018-07-31 15:53:51 +0100 |
commit | daa0c73eee23fea675fc733482248722657f6a56 (patch) | |
tree | cd1b224cede1b8766f865789261801a03ba875ee /test/lua | |
parent | 8da3d333136e031cd4888dbd850eb60406cee5e7 (diff) | |
download | rspamd-daa0c73eee23fea675fc733482248722657f6a56.tar.gz rspamd-daa0c73eee23fea675fc733482248722657f6a56.zip |
[Minor] added test parsing html/phished urls
Diffstat (limited to 'test/lua')
-rw-r--r-- | test/lua/unit/lua_util.extract_specific_urls.lua | 123 |
1 files changed, 111 insertions, 12 deletions
diff --git a/test/lua/unit/lua_util.extract_specific_urls.lua b/test/lua/unit/lua_util.extract_specific_urls.lua index 424cca5f5..9c8e4e187 100644 --- a/test/lua/unit/lua_util.extract_specific_urls.lua +++ b/test/lua/unit/lua_util.extract_specific_urls.lua @@ -5,6 +5,8 @@ context("Lua util - extract_specific_urls", function() local url = require "rspamd_url" local logger = require "rspamd_logger" local ffi = require "ffi" + local rspamd_util = require "rspamd_util" + local rspamd_task = require "rspamd_task" ffi.cdef[[ void rspamd_url_init (const char *tld_file); @@ -64,19 +66,35 @@ context("Lua util - extract_specific_urls", function() esld_limit = 2, need_emails = true, prefix = 'p' + }, + { + input = {"abc@a.google.com", "b.google.com", "c.google.com", "a.net", "bb.net", "a.bb.net", "b.bb.net"}, + expect = {"abc@a.google.com", "a.bb.net", "b.google.com", "a.net", "bb.net", "abc@a.google.com"}, + filter = nil, + limit = 9999, + esld_limit = 2, + need_emails = true, + prefix = 'p' } } + local function prepare_actual_result(actual) + return fun.totable(fun.map( + function(u) return u:get_raw():gsub('^%w+://', '') end, + actual + )) + end + local pool = mpool.create() for i,c in ipairs(cases) do local function prepare_url_list(c) return fun.totable(fun.map( - function (u) return url.create(pool, u) end, - c.input or url_list - )) - end + function (u) return url.create(pool, u) end, + c.input or url_list + )) + end test("extract_specific_urls, backward compatibility case #" .. i, function() task_object.urls = prepare_url_list(c) @@ -86,10 +104,7 @@ context("Lua util - extract_specific_urls", function() end local actual = util.extract_specific_urls(task_object, c.limit, c.need_emails, c.filter, c.prefix) - local actual_result = fun.totable(fun.map( - function(u) return u:get_host() end, - actual - )) + local actual_result = prepare_actual_result(actual) --[[ local s = logger.slog("%1 =?= %2", c.expect, actual_result) @@ -111,10 +126,7 @@ context("Lua util - extract_specific_urls", function() prefix = c.prefix, }) - local actual_result = fun.totable(fun.map( - function(u) return u:get_host() end, - actual - )) + local actual_result = prepare_actual_result(actual) --[[ local s = logger.slog("case[%1] %2 =?= %3", i, c.expect, actual_result) @@ -124,4 +136,91 @@ context("Lua util - extract_specific_urls", function() end) end + +--[[ ******************* kinda functional *************************************** ]] + local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1") + local tld_file = string.format('%s/%s', test_dir, "test_tld.dat") + + local config = { + options = { + filters = {'spf', 'dkim', 'regexp'}, + url_tld = tld_file, + dns = { + nameserver = {'8.8.8.8'} + }, + }, + logging = { + type = 'console', + level = 'debug' + }, + metric = { + name = 'default', + actions = { + reject = 100500, + }, + unknown_weight = 1 + } + } + + test("extract_specific_urls - from email", function() + local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") + assert_not_nil(cfg) + + local msg = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="http://example1.net">http://example1.net</a> +<a href="http://example2.net">http://example2.net</a> +<a href="http://example3.net">http://example3.net</a> +<a href="http://example4.net">http://example4.net</a> +<a href="http://domain1.com">http://domain1.com</a> +<a href="http://domain2.com">http://domain2.com</a> +<a href="http://domain3.com">http://domain3.com</a> +<a href="http://domain4.com">http://domain4.com</a> +<a href="http://domain5.com">http://domain5.com</a> +<a href="http://domain.com">http://example.net/</a> +</html> +]] + local expect = {"example.net", "domain.com"} + local res,task = rspamd_task.load_from_string(msg, rspamd_config) + + if not res then + assert_true(false, "failed to load message") + end + + if not task:process_message() then + assert_true(false, "failed to process message") + end + + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 2, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_equal("domain.com", actual_result[1], "checking that first url is the one with highest suspiciousness level") + + end) end)
\ No newline at end of file |