diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-08-29 13:47:44 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-08-29 13:47:44 +0100 |
commit | fb43ff309cad5f01a99941d413ba1e91e60aa0ed (patch) | |
tree | 165b34c504aba3cea781bfca391290fc535ff65d /test | |
parent | 7ed1a20f98ffdc226ec4a3e1b57d4f10f81cb423 (diff) | |
download | rspamd-fb43ff309cad5f01a99941d413ba1e91e60aa0ed.tar.gz rspamd-fb43ff309cad5f01a99941d413ba1e91e60aa0ed.zip |
[Test] Improve urls extraction tests
Diffstat (limited to 'test')
-rw-r--r-- | test/lua/unit/lua_util.extract_specific_urls.lua | 245 |
1 files changed, 189 insertions, 56 deletions
diff --git a/test/lua/unit/lua_util.extract_specific_urls.lua b/test/lua/unit/lua_util.extract_specific_urls.lua index c84a7ca8d..73491ecb3 100644 --- a/test/lua/unit/lua_util.extract_specific_urls.lua +++ b/test/lua/unit/lua_util.extract_specific_urls.lua @@ -1,14 +1,78 @@ -local msg - -context("Lua util - extract_specific_urls", function() - local util = require 'lua_util' - local mpool = require "rspamd_mempool" - local fun = require "fun" - local url = require "rspamd_url" - local logger = require "rspamd_logger" - local rspamd_util = require "rspamd_util" - local rspamd_task = require "rspamd_task" +local msg, msg_img +local logger = require "rspamd_logger" +local rspamd_util = require "rspamd_util" +local rspamd_task = require "rspamd_task" +local util = require 'lua_util' +local mpool = require "rspamd_mempool" +local fun = require "fun" +local url = require "rspamd_url" + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="http://example1.net">http://example1.net</a> +<a href="http://example2.net">http://example2.net</a> +<a href="http://example3.net">http://example3.net</a> +<a href="http://example4.net">http://example4.net</a> +<a href="http://domain1.com">http://domain1.com</a> +<a href="http://domain2.com">http://domain2.com</a> +<a href="http://domain3.com">http://domain3.com</a> +<a href="http://domain4.com">http://domain4.com</a> +<a href="http://domain5.com">http://domain5.com</a> +<a href="http://domain.com">http://example.net/</a> +<img src="http://example5.org">hahaha</img> +</html> +]] +msg_img = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="http://domain.com">http://example.net</a> +<img src="http://example5.org">hahaha</img> +</html> +]] + +local function prepare_actual_result(actual) + return fun.totable(fun.map( + function(u) return u:get_raw():gsub('^%w+://', '') end, + actual + )) +end + +context("Lua util - extract_specific_urls plain", function() local test_helper = require "rspamd_test_helper" test_helper.init_url_parser() @@ -74,13 +138,6 @@ context("Lua util - extract_specific_urls", function() } } - local function prepare_actual_result(actual) - return fun.totable(fun.map( - function(u) return u:get_raw():gsub('^%w+://', '') end, - actual - )) - end - local pool = mpool.create() local function prepare_url_list(list) @@ -148,7 +205,9 @@ context("Lua util - extract_specific_urls", function() table.sort(expect) assert_rspamd_table_eq({actual = actual_result, expect = expect}) end) +end) +context("Lua util - extract_specific_urls message", function() --[[ ******************* kinda functional *************************************** ]] local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1") @@ -175,25 +234,65 @@ context("Lua util - extract_specific_urls", function() } } - test("extract_specific_urls - from email", function() - local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") - assert_not_nil(cfg) + local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") + local res,task = rspamd_task.load_from_string(msg, cfg) + + if not res then + assert(false, "failed to load message") + end + + if not task:process_message() then + assert(false, "failed to process message") + end + + test("extract_specific_urls - from email 1 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 1, + esld_limit = 1, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com"}}) + + end) + test("extract_specific_urls - from email 2 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + }) + + local actual_result = prepare_actual_result(actual) - local expect = {"example.net", "domain.com"} - local res,task = rspamd_task.load_from_string(msg, rspamd_config) + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}}) + + end) - if not res then - assert_true(false, "failed to load message") - end + res,task = rspamd_task.load_from_string(msg_img, rspamd_config) - if not task:process_message() then - assert_true(false, "failed to process message") - end + if not res then + assert_true(false, "failed to load message") + end + if not task:process_message() then + assert_true(false, "failed to process message") + end + test("extract_specific_urls - from email image 1 limit", function() local actual = util.extract_specific_urls({ task = task, limit = 1, esld_limit = 1, + need_images = false, }) local actual_result = prepare_actual_result(actual) @@ -205,37 +304,71 @@ context("Lua util - extract_specific_urls", function() assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com"}}) end) -end) + test("extract_specific_urls - from email image 2 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + need_images = false, + }) ---[=========[ ******************* message ******************* ]=========] -msg = [[ -From: <> -To: <nobody@example.com> -Subject: test -Content-Type: multipart/alternative; - boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + local actual_result = prepare_actual_result(actual) ---_000_6be055295eab48a5af7ad4022f33e2d0_ -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: base64 + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] -Hello world + assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}}) + end) + test("extract_specific_urls - from email image 3 limit, no images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 3, + esld_limit = 1, + need_images = false, + }) ---_000_6be055295eab48a5af7ad4022f33e2d0_ -Content-Type: text/html; charset="utf-8" + local actual_result = prepare_actual_result(actual) -<html><body> -<a href="http://example.net">http://example.net</a> -<a href="http://example1.net">http://example1.net</a> -<a href="http://example2.net">http://example2.net</a> -<a href="http://example3.net">http://example3.net</a> -<a href="http://example4.net">http://example4.net</a> -<a href="http://domain1.com">http://domain1.com</a> -<a href="http://domain2.com">http://domain2.com</a> -<a href="http://domain3.com">http://domain3.com</a> -<a href="http://domain4.com">http://domain4.com</a> -<a href="http://domain5.com">http://domain5.com</a> -<a href="http://domain.com">http://example.net/</a> -</html> -]] + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}}) + end) + test("extract_specific_urls - from email image 3 limit, has images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 3, + esld_limit = 1, + need_images = true, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq({actual = actual_result, + expect = {"domain.com", "example.net", "example5.org"}}) + end) + test("extract_specific_urls - from email image 2 limit, has images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + need_images = true, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq({actual = actual_result, + expect = {"domain.com", "example.net"}}) + end) +end) |