diff options
Diffstat (limited to 'test/lua/unit')
-rw-r--r-- | test/lua/unit/html.lua | 414 | ||||
-rw-r--r-- | test/lua/unit/logger.lua | 95 | ||||
-rw-r--r-- | test/lua/unit/rspamd_resolver.lua | 62 |
3 files changed, 530 insertions, 41 deletions
diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua index 81c52ec1b..1802dc984 100644 --- a/test/lua/unit/html.lua +++ b/test/lua/unit/html.lua @@ -1,11 +1,10 @@ context("HTML processing", function() local rspamd_util = require("rspamd_util") - local logger = require("rspamd_logger") local cases = { - -- Entities - {[[<html><body>.firebaseapp.com</body></html>]], - [[.firebaseapp.com]]}, - {[[ + -- Entities + { [[<html><body>.firebaseapp.com</body></html>]], + [[.firebaseapp.com]] }, + { [[ <?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" @@ -22,8 +21,8 @@ context("HTML processing", function() </p> </body> - </html>]], 'Hello, world!\n'}, - {[[ + </html>]], 'Hello, world!\n' }, + { [[ <!DOCTYPE html> <html lang="en"> <head> @@ -39,8 +38,8 @@ context("HTML processing", function() Hello, world! </body> </html> - ]], 'Hello, world!'}, - {[[ + ]], 'Hello, world!' }, + { [[ <html lang="en"> <head> <meta charset="utf-8"> @@ -56,8 +55,8 @@ context("HTML processing", function() </div> </body> </html> - ]], 'Hello, world!\ntest\ncontentmore content\ncontent inside div\n'}, - {[[ + ]], 'Hello, world!\ntest\ncontentmore content\ncontent inside div\n' }, + { [[ <html lang="en"> <head> <meta charset="utf-8"> @@ -83,8 +82,8 @@ context("HTML processing", function() </body> </html> - ]], 'content\nheada headb\ndata1 data2\n'}, - {[[ + ]], 'content\nheada headb\ndata1 data2\n' }, + { [[ <html lang="en"> <head> <meta charset="utf-8"> @@ -97,17 +96,398 @@ context("HTML processing", function() a b a > b a < b a & b 'a "a" </body> </html> - ]], 'a b a > b a < b a & b \'a "a"'}, + ]], 'a b a > b a < b a & b \'a "a"' }, } - for i,c in ipairs(cases) do + for i, c in ipairs(cases) do test("Extract text from HTML " .. tostring(i), function() local t = rspamd_util.parse_html(c[1]) assert_not_nil(t) assert_equal(c[2], tostring(t), string.format("'%s' doesn't match with '%s'", - c[2], t)) - + c[2], t)) end) end + + -- Test cases for new HTML tag API methods + local function parse_html_and_extract_tags(html_content, pool) + local rspamd_parsers = require("rspamd_parsers") + + local parsed = rspamd_parsers.parse_html_content(html_content, pool) + local tags = {} + + if parsed then + parsed:foreach_tag("any", function(tag, content_length, is_leaf) + table.insert(tags, tag) + return false + end) + end + + return parsed, tags + end + + test("HTML tag get_all_attributes basic test", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<div class="test-class" id="test-id" style="color: red;" width="100">content</div>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + assert_true(#tags > 0) + + -- Find the div tag + local div_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" then + div_tag = tag + break + end + end + + assert_not_nil(div_tag) + + local attrs = div_tag:get_all_attributes() + assert_not_nil(attrs) + + -- Check that we have the expected attributes + assert_equal("test-class", attrs["class"]) + assert_equal("test-id", attrs["id"]) + assert_equal("color: red;", attrs["style"]) + assert_equal("100", attrs["width"]) + + pool:destroy() + end) + + test("HTML tag has_attribute test", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<img src="test.jpg" width="100" height="50" alt="Test image" hidden />]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local img_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "img" then + img_tag = tag + break + end + end + + assert_not_nil(img_tag) + + -- Test existing attributes + assert_true(img_tag:has_attribute("src")) + assert_true(img_tag:has_attribute("width")) + assert_true(img_tag:has_attribute("height")) + assert_true(img_tag:has_attribute("alt")) + assert_true(img_tag:has_attribute("hidden")) + + -- Test non-existing attributes + assert_false(img_tag:has_attribute("nonexistent")) + assert_false(img_tag:has_attribute("class")) + assert_false(img_tag:has_attribute("")) + + pool:destroy() + end) + + test("HTML tag get_numeric_attribute test", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<div width="200" height="150" font-size="14" opacity="0.8" tabindex="5">content</div>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local div_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" then + div_tag = tag + break + end + end + + assert_not_nil(div_tag) + + -- Test numeric attributes + assert_equal(200, div_tag:get_numeric_attribute("width")) + assert_equal(150, div_tag:get_numeric_attribute("height")) + assert_equal(14, div_tag:get_numeric_attribute("font-size")) + + -- Test opacity with floating-point tolerance + local opacity = div_tag:get_numeric_attribute("opacity") + assert_not_nil(opacity) + assert_true(math.abs(opacity - 0.8) < 0.01, string.format("Expected opacity ~0.8, got %f", opacity)) + + assert_equal(5, div_tag:get_numeric_attribute("tabindex")) + + -- Test non-numeric attributes + assert_nil(div_tag:get_numeric_attribute("nonexistent")) + + pool:destroy() + end) + + test("HTML tag get_unknown_attributes test", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<div class="known" data-track="analytics" unknown-attr="test-value" custom-id="12345">content</div>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local div_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" then + div_tag = tag + break + end + end + + assert_not_nil(div_tag) + + local unknown_attrs = div_tag:get_unknown_attributes() + assert_not_nil(unknown_attrs) + + -- Should include unknown attributes but not known ones like "class" + assert_not_nil(unknown_attrs["unknown-attr"]) + assert_equal("test-value", unknown_attrs["unknown-attr"]) + assert_not_nil(unknown_attrs["custom-id"]) + assert_equal("12345", unknown_attrs["custom-id"]) + + -- data-track should be recognized as a known attribute now + -- but if not, it would appear in unknown attributes + + pool:destroy() + end) + + test("HTML tag get_children test", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[ + <div id="parent"> + <p>First child</p> + <span>Second child</span> + <img src="test.jpg" /> + </div> + ]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local parent_div = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" and tag:has_attribute("id") and tag:get_attribute("id") == "parent" then + parent_div = tag + break + end + end + + assert_not_nil(parent_div) + + local children = parent_div:get_children() + assert_not_nil(children) + assert_equal(3, #children) + + -- Check child types + local child_types = {} + for _, child in ipairs(children) do + table.insert(child_types, child:get_type()) + end + + -- Should contain p, span, and img + local child_types_str = table.concat(child_types, ",") + assert_true(child_types_str:find("p") ~= nil) + assert_true(child_types_str:find("span") ~= nil) + assert_true(child_types_str:find("img") ~= nil) + + pool:destroy() + end) + + test("HTML tag get_attribute vs get_all_attributes consistency", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<a href="https://example.com" class="link" target="_blank" title="Example Link">Link</a>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local a_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "a" then + a_tag = tag + break + end + end + + assert_not_nil(a_tag) + + local all_attrs = a_tag:get_all_attributes() + + -- Test that individual get_attribute calls match get_all_attributes + for attr_name, attr_value in pairs(all_attrs) do + assert_equal(attr_value, a_tag:get_attribute(attr_name), + string.format("Attribute '%s' mismatch: get_attribute='%s', get_all_attributes='%s'", + attr_name, a_tag:get_attribute(attr_name) or "nil", attr_value)) + end + + -- Test specific expected attributes + assert_equal("https://example.com", a_tag:get_attribute("href")) + assert_equal("link", a_tag:get_attribute("class")) + assert_equal("_blank", a_tag:get_attribute("target")) + assert_equal("Example Link", a_tag:get_attribute("title")) + + pool:destroy() + end) + + + + test("HTML tag attribute edge cases", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[<div class="" hidden style=" " width="0" height="abc">content</div>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local div_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" then + div_tag = tag + break + end + end + + assert_not_nil(div_tag) + + -- Test empty attribute value + assert_true(div_tag:has_attribute("class")) + assert_equal("", div_tag:get_attribute("class")) + + -- Test boolean attribute (hidden) + assert_true(div_tag:has_attribute("hidden")) + + -- Test whitespace-only attribute + assert_true(div_tag:has_attribute("style")) + assert_equal(" ", div_tag:get_attribute("style")) + + -- Test numeric attributes with edge cases + assert_equal(0, div_tag:get_numeric_attribute("width")) + assert_nil(div_tag:get_numeric_attribute("height")) -- "abc" is not numeric + + -- Test non-existent attribute + assert_false(div_tag:has_attribute("nonexistent")) + assert_nil(div_tag:get_attribute("nonexistent")) + assert_nil(div_tag:get_numeric_attribute("nonexistent")) + + pool:destroy() + end) + + test("HTML tag complex nested structure", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = [[ + <table cellpadding="5" cellspacing="2" border="1"> + <tr> + <td align="center" valign="top" width="100"> + <img src="image1.jpg" width="80" height="60" alt="Image 1" /> + </td> + <td align="left" valign="middle"> + <p font-size="12">Text content</p> + </td> + </tr> + </table> + ]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + -- Find table tag + local table_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "table" then + table_tag = tag + break + end + end + + assert_not_nil(table_tag) + + -- Test table attributes + assert_equal(5, table_tag:get_numeric_attribute("cellpadding")) + assert_equal(2, table_tag:get_numeric_attribute("cellspacing")) + assert_equal("1", table_tag:get_attribute("border")) + + -- Test that table has children + local children = table_tag:get_children() + assert_not_nil(children) + assert_true(#children > 0) + + -- Find img tag + local img_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "img" then + img_tag = tag + break + end + end + + assert_not_nil(img_tag) + + -- Test img attributes + assert_equal("image1.jpg", img_tag:get_attribute("src")) + assert_equal(80, img_tag:get_numeric_attribute("width")) + assert_equal(60, img_tag:get_numeric_attribute("height")) + assert_equal("Image 1", img_tag:get_attribute("alt")) + + pool:destroy() + end) + + test("HTML tag with mixed known and unknown attributes", function() + local rspamd_mempool = require("rspamd_mempool") + local pool = rspamd_mempool.create() + + local html = + [[<div class="container" data-analytics="track" custom-attr="value" style="color: blue;" unknown123="test">content</div>]] + local parsed, tags = parse_html_and_extract_tags(html, pool) + + assert_not_nil(parsed) + + local div_tag = nil + for _, tag in ipairs(tags) do + if tag:get_type() == "div" then + div_tag = tag + break + end + end + + assert_not_nil(div_tag) + + local all_attrs = div_tag:get_all_attributes() + local unknown_attrs = div_tag:get_unknown_attributes() + + -- All attributes should include both known and unknown + assert_not_nil(all_attrs["class"]) -- known + assert_not_nil(all_attrs["style"]) -- known + assert_not_nil(all_attrs["custom-attr"]) -- unknown + assert_not_nil(all_attrs["unknown123"]) -- unknown + + -- Unknown attributes should only include unrecognized ones + assert_nil(unknown_attrs["class"]) -- known, shouldn't be here + assert_nil(unknown_attrs["style"]) -- known, shouldn't be here + assert_not_nil(unknown_attrs["custom-attr"]) -- unknown, should be here + assert_not_nil(unknown_attrs["unknown123"]) -- unknown, should be here + + assert_equal("value", unknown_attrs["custom-attr"]) + assert_equal("test", unknown_attrs["unknown123"]) + + pool:destroy() + end) end) diff --git a/test/lua/unit/logger.lua b/test/lua/unit/logger.lua index dc0120709..c28d8bb09 100644 --- a/test/lua/unit/logger.lua +++ b/test/lua/unit/logger.lua @@ -3,17 +3,17 @@ context("Logger unit tests", function() local log = require "rspamd_logger" local cases = { - {'string', 'string'}, - {'%1', 'string', 'string'}, - {'%1', '1.1', 1.1}, - {'%1', '1', 1}, - {'%1', 'true', true}, - {'%1', '{[1] = 1, [2] = test}', {1, 'test'}}, - {'%1', '{[1] = 1, [2] = 2.1, [k2] = test}', {1, 2.1, k2='test'}}, - {'%s', 'true', true}, + { 'string', 'string' }, + { '%1', 'string', 'string' }, + { '%1', '1.1', 1.1 }, + { '%1', '1', 1 }, + { '%1', 'true', true }, + { '%1', '{[1] = 1, [2] = test}', { 1, 'test' } }, + { '%1', '{[1] = 1, [2] = 2.1, [k2] = test}', { 1, 2.1, k2 = 'test' } }, + { '%s', 'true', true }, } - for _,c in ipairs(cases) do + for _, c in ipairs(cases) do local s if c[3] then s = log.slog(c[1], c[3]) @@ -21,7 +21,82 @@ context("Logger unit tests", function() s = log.slog(c[1]) end assert_equal(s, c[2], string.format("'%s' doesn't match with '%s'", - c[2], s)) + c[2], s)) + end + end) + + test("Logger graceful error handling", function() + local log = require "rspamd_logger" + + -- Test missing arguments + local missing_arg_cases = { + { '%1', '<MISSING ARGUMENT>' }, + { '%0', '<MISSING ARGUMENT>' }, -- %0 is invalid since Lua args are 1-indexed + { '%2', '<MISSING ARGUMENT>', 'arg1' }, + { '%1 %2', 'arg1 <MISSING ARGUMENT>', 'arg1' }, + { 'prefix %1 %3 suffix', 'prefix arg1 <MISSING ARGUMENT> suffix', 'arg1' }, + } + + for _, c in ipairs(missing_arg_cases) do + local s + if c[3] then + s = log.slog(c[1], c[3]) + else + s = log.slog(c[1]) + end + assert_equal(s, c[2], string.format("Missing arg test: '%s' doesn't match with '%s'", + c[2], s)) + end + + -- Test extra arguments + local extra_arg_cases = { + { '%1', 'arg1 <EXTRA 1 ARGUMENTS>', 'arg1', 'extra1' }, + { '%1', 'arg1 <EXTRA 2 ARGUMENTS>', 'arg1', 'extra1', 'extra2' }, + { '%s', 'arg1 <EXTRA 1 ARGUMENTS>', 'arg1', 'extra1' }, + { 'prefix %1 suffix', 'prefix arg1 suffix <EXTRA 1 ARGUMENTS>', 'arg1', 'extra1' }, + } + + for _, c in ipairs(extra_arg_cases) do + local s + if c[4] and c[5] then + s = log.slog(c[1], c[3], c[4], c[5]) + elseif c[4] then + s = log.slog(c[1], c[3], c[4]) + else + s = log.slog(c[1], c[3]) + end + assert_equal(s, c[2], string.format("Extra arg test: '%s' doesn't match with '%s'", + c[2], s)) + end + + -- Test literal percent sequences (should pass through as-is) + local literal_cases = { + { '%-1', '%-1' }, + { '%abc', '%abc' }, -- Should pass through as literal since it's not a valid number + { '%', '%' }, -- Single percent should pass through + } + + for _, c in ipairs(literal_cases) do + local s = log.slog(c[1]) + assert_equal(s, c[2], string.format("Literal test: '%s' doesn't match with '%s'", + c[2], s)) + end + + -- Test mixed scenarios + local mixed_cases = { + { '%1 %3', 'arg1 <MISSING ARGUMENT> <EXTRA 1 ARGUMENTS>', 'arg1', 'extra1' }, + { '%2 %4', 'extra1 <MISSING ARGUMENT> <EXTRA 1 ARGUMENTS>', 'arg1', 'extra1' }, + } + + for _, c in ipairs(mixed_cases) do + local s + if c[4] then + s = log.slog(c[1], c[3], c[4]) + else + s = log.slog(c[1], c[3]) + end + assert_equal(s, c[2], string.format("Mixed test: '%s' doesn't match with '%s'", + c[2], s)) end end) end)
\ No newline at end of file diff --git a/test/lua/unit/rspamd_resolver.lua b/test/lua/unit/rspamd_resolver.lua index e987ff00b..2fdec2c4b 100644 --- a/test/lua/unit/rspamd_resolver.lua +++ b/test/lua/unit/rspamd_resolver.lua @@ -6,24 +6,58 @@ context("Check punycoding UTF-8 URL", function() local resolver = rspamd_resolver.init(rspamd_util.create_event_base(), rspamd_config) - local cases = { - -- https://unicode.org/reports/tr46/#Deviations - ['faß.de'] = 'fass.de', -- IDNA2008 result: xn--fa-hia.de - ['βόλος.com'] = 'xn--nxasmq6b.com', -- IDNA2008 result: xn--nxasmm1c.com - ['نامهای.com'] = 'xn--mgba3gch31f.com', -- IDNA2008 result: xn--mgba3gch31f060k.com - ['ශ්රී.com'] = 'xn--10cl1a0b.com', -- IDNA2008 result: xn--10cl1a0b660p.com - - -- https://unicode.org/reports/tr46/#Table_Example_Processing - ['日本語。JP'] = 'xn--wgv71a119e.jp', -- Fullwidth characters are remapped, including 。 - --['u¨.com'] = 'xn--tda.com', -- Normalize changes u + umlaut to ü - ['☕.us'] = 'xn--53h.us', -- Post-Unicode 3.2 characters are allowed - - -- Other + -- Helper function to detect IDNA behavior by testing a known conversion + local function detect_idna_behavior() + -- Use faß.de as a test case - different results in IDNA2003 vs IDNA2008 + local test_result = resolver:idna_convert_utf8('faß.de') + if test_result == 'fass.de' then + return 'transitional' -- IDNA2003/transitional behavior + elseif test_result == 'xn--fa-hia.de' then + return 'nontransitional' -- IDNA2008/nontransitional behavior + else + return 'unknown' + end + end + + local idna_behavior = detect_idna_behavior() + + -- Define test cases with both expected results + local cases_transitional = { + -- IDNA2003/transitional results (ICU < 76 default) + ['faß.de'] = 'fass.de', + ['βόλος.com'] = 'xn--nxasmq6b.com', + ['نامهای.com'] = 'xn--mgba3gch31f.com', + ['ශ්රී.com'] = 'xn--10cl1a0b.com', + ['日本語。JP'] = 'xn--wgv71a119e.jp', + ['☕.us'] = 'xn--53h.us', + ['example.рф'] = 'example.xn--p1ai', + } + + local cases_nontransitional = { + -- IDNA2008/nontransitional results (ICU >= 76 default) + ['faß.de'] = 'xn--fa-hia.de', + ['βόλος.com'] = 'xn--nxasmm1c.com', + ['نامهای.com'] = 'xn--mgba3gch31f060k.com', + ['ශ්රී.com'] = 'xn--10cl1a0b660p.com', + ['日本語。JP'] = 'xn--wgv71a119e.jp', + ['☕.us'] = 'xn--53h.us', ['example.рф'] = 'example.xn--p1ai', } + -- Choose appropriate test cases based on detected behavior + local cases + if idna_behavior == 'transitional' then + cases = cases_transitional + print("Detected IDNA transitional behavior (ICU < 76 or configured for IDNA2003)") + elseif idna_behavior == 'nontransitional' then + cases = cases_nontransitional + print("Detected IDNA nontransitional behavior (ICU >= 76 default)") + else + error("Could not detect IDNA behavior - unexpected result for test case") + end + for k, v in pairs(cases) do - test(string.format("punycode %s -> %s", k, v), function() + test(string.format("punycode %s -> %s (%s)", k, v, idna_behavior), function() local res = resolver:idna_convert_utf8(k) assert_equal(res, v) end) |