diff options
author | Vsevolod Stakhov <vsevolod@rspamd.com> | 2025-06-11 15:24:22 +0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-11 15:24:22 +0600 |
commit | 664f0e993719d126ded977438146a06e68a6320b (patch) | |
tree | 2164fa509529a51899039f070fc6ea6e1e592635 | |
parent | 8622b24da98a3c1565776a93c436a7323185f613 (diff) | |
parent | ae847461789ecba841732c71196a67cd55b5308a (diff) | |
download | rspamd-664f0e993719d126ded977438146a06e68a6320b.tar.gz rspamd-664f0e993719d126ded977438146a06e68a6320b.zip |
Merge pull request #5506 from rspamd/vstakhov-idna-tests
[Test] Detect libicu idna behaviour and select the appropriate tests
-rw-r--r-- | test/lua/unit/rspamd_resolver.lua | 62 |
1 files changed, 48 insertions, 14 deletions
diff --git a/test/lua/unit/rspamd_resolver.lua b/test/lua/unit/rspamd_resolver.lua index e987ff00b..2fdec2c4b 100644 --- a/test/lua/unit/rspamd_resolver.lua +++ b/test/lua/unit/rspamd_resolver.lua @@ -6,24 +6,58 @@ context("Check punycoding UTF-8 URL", function() local resolver = rspamd_resolver.init(rspamd_util.create_event_base(), rspamd_config) - local cases = { - -- https://unicode.org/reports/tr46/#Deviations - ['faß.de'] = 'fass.de', -- IDNA2008 result: xn--fa-hia.de - ['βόλος.com'] = 'xn--nxasmq6b.com', -- IDNA2008 result: xn--nxasmm1c.com - ['نامهای.com'] = 'xn--mgba3gch31f.com', -- IDNA2008 result: xn--mgba3gch31f060k.com - ['ශ්රී.com'] = 'xn--10cl1a0b.com', -- IDNA2008 result: xn--10cl1a0b660p.com - - -- https://unicode.org/reports/tr46/#Table_Example_Processing - ['日本語。JP'] = 'xn--wgv71a119e.jp', -- Fullwidth characters are remapped, including 。 - --['u¨.com'] = 'xn--tda.com', -- Normalize changes u + umlaut to ü - ['☕.us'] = 'xn--53h.us', -- Post-Unicode 3.2 characters are allowed - - -- Other + -- Helper function to detect IDNA behavior by testing a known conversion + local function detect_idna_behavior() + -- Use faß.de as a test case - different results in IDNA2003 vs IDNA2008 + local test_result = resolver:idna_convert_utf8('faß.de') + if test_result == 'fass.de' then + return 'transitional' -- IDNA2003/transitional behavior + elseif test_result == 'xn--fa-hia.de' then + return 'nontransitional' -- IDNA2008/nontransitional behavior + else + return 'unknown' + end + end + + local idna_behavior = detect_idna_behavior() + + -- Define test cases with both expected results + local cases_transitional = { + -- IDNA2003/transitional results (ICU < 76 default) + ['faß.de'] = 'fass.de', + ['βόλος.com'] = 'xn--nxasmq6b.com', + ['نامهای.com'] = 'xn--mgba3gch31f.com', + ['ශ්රී.com'] = 'xn--10cl1a0b.com', + ['日本語。JP'] = 'xn--wgv71a119e.jp', + ['☕.us'] = 'xn--53h.us', + ['example.рф'] = 'example.xn--p1ai', + } + + local cases_nontransitional = { + -- IDNA2008/nontransitional results (ICU >= 76 default) + ['faß.de'] = 'xn--fa-hia.de', + ['βόλος.com'] = 'xn--nxasmm1c.com', + ['نامهای.com'] = 'xn--mgba3gch31f060k.com', + ['ශ්රී.com'] = 'xn--10cl1a0b660p.com', + ['日本語。JP'] = 'xn--wgv71a119e.jp', + ['☕.us'] = 'xn--53h.us', ['example.рф'] = 'example.xn--p1ai', } + -- Choose appropriate test cases based on detected behavior + local cases + if idna_behavior == 'transitional' then + cases = cases_transitional + print("Detected IDNA transitional behavior (ICU < 76 or configured for IDNA2003)") + elseif idna_behavior == 'nontransitional' then + cases = cases_nontransitional + print("Detected IDNA nontransitional behavior (ICU >= 76 default)") + else + error("Could not detect IDNA behavior - unexpected result for test case") + end + for k, v in pairs(cases) do - test(string.format("punycode %s -> %s", k, v), function() + test(string.format("punycode %s -> %s (%s)", k, v, idna_behavior), function() local res = resolver:idna_convert_utf8(k) assert_equal(res, v) end) |