From c9477ccf51f803d83cd2fb6f90171e2f17aaf2a8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 10 Aug 2023 12:21:49 +0100 Subject: [PATCH] [Test] Add one more test for urls parsing --- test/lua/unit/url.lua | 241 +++++++++++++++++++++--------------------- 1 file changed, 123 insertions(+), 118 deletions(-) diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index cd23afd7c..e2daa7771 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -9,7 +9,7 @@ context("URL check functions", function() local test_helper = require("rspamd_test_helper") local ffi = require("ffi") - ffi.cdef[[ + ffi.cdef [[ void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen); ]] @@ -18,25 +18,25 @@ context("URL check functions", function() local pool = mpool.create() local cases = { - {"test.com", {"test.com", nil}}, - {" test.com", {"test.com", nil}}, - {" text", {"test.com", nil}}, - {"test.com. text", {"test.com", nil}}, - {"mailto:A.User@example.com text", {"example.com", "A.User"}}, - {"http://Тест.Рф:18 text", {"тест.рф", nil}}, - {"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}}, - {"somebody@example.com", {"example.com", "somebody"}}, - {"https://127.0.0.1/abc text", {"127.0.0.1", nil}}, - {"https:\\\\127.0.0.1/abc text", {"127.0.0.1", nil}}, - {"https:\\\\127.0.0.1", {"127.0.0.1", nil}}, - {"https://127.0.0.1 text", {"127.0.0.1", nil}}, - {"https://[::1]:1", {"::1", nil}}, - {"https://user:password@[::1]:1", {"::1", nil}}, - {"https://user:password@[::1]", {"::1", nil}}, - {"https://user:password@[::1]/1", {"::1", nil}}, + { "test.com", { "test.com", nil } }, + { " test.com", { "test.com", nil } }, + { " text", { "test.com", nil } }, + { "test.com. text", { "test.com", nil } }, + { "mailto:A.User@example.com text", { "example.com", "A.User" } }, + { "http://Тест.Рф:18 text", { "тест.рф", nil } }, + { "http://user:password@тест2.РФ:18 text", { "тест2.рф", "user" } }, + { "somebody@example.com", { "example.com", "somebody" } }, + { "https://127.0.0.1/abc text", { "127.0.0.1", nil } }, + { "https:\\\\127.0.0.1/abc text", { "127.0.0.1", nil } }, + { "https:\\\\127.0.0.1", { "127.0.0.1", nil } }, + { "https://127.0.0.1 text", { "127.0.0.1", nil } }, + { "https://[::1]:1", { "::1", nil } }, + { "https://user:password@[::1]:1", { "::1", nil } }, + { "https://user:password@[::1]", { "::1", nil } }, + { "https://user:password@[::1]/1", { "::1", nil } }, } - for i,c in ipairs(cases) do + for i, c in ipairs(cases) do local res = url.create(pool, c[1]) test("Extract urls from text" .. i, function() @@ -46,107 +46,112 @@ context("URL check functions", function() --print(s) assert_not_nil(t, "cannot convert to table " .. c[1]) assert_equal(c[2][1], t['host'], - logger.slog('expected host "%s", but got "%s" in url %s => %s', + logger.slog('expected host "%s", but got "%s" in url %s => %s', c[2][1], t['host'], c[1], t)) if c[2][2] then assert_equal(c[2][1], t['host'], - logger.slog('expected user "%s", but got "%s" in url %s => %s', - c[2][1], t['host'], c[1], t)) + logger.slog('expected user "%s", but got "%s" in url %s => %s', + c[2][1], t['host'], c[1], t)) end end) end cases = { - {[[http://example.net/path/]], true, { + { [[http://example.net/path/]], true, { host = 'example.net', path = 'path/' - }}, - {'http://example.net/hello%20world.php?arg=x#fragment', true, { + } }, + { 'http://example.net/hello%20world.php?arg=x#fragment', true, { host = 'example.net', fragment = 'fragment', query = 'arg=x', path = 'hello world.php', - }}, - {'http://example.net/?arg=%23#fragment', true, { + } }, + { 'http://example.net/?arg=%23#fragment', true, { host = 'example.net', fragment = 'fragment', query = 'arg=#', - }}, - {"http:/\\[::eeee:192.168.0.1]/#test", true, { + } }, + { "http:/\\[::eeee:192.168.0.1]/#test", true, { host = '::eeee:c0a8:1', fragment = 'test' - }}, - {"http:/\\[::eeee:192.168.0.1]#test", true, { + } }, + { "http:/\\[::eeee:192.168.0.1]#test", true, { host = '::eeee:c0a8:1', fragment = 'test' - }}, - {"http:/\\[::eeee:192.168.0.1]?test", true, { + } }, + { "http:/\\[::eeee:192.168.0.1]?test", true, { host = '::eeee:c0a8:1', query = 'test' - }}, - {"http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01 + } }, + { "http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01 host = '192.168.0.1', - }}, - {"http:/\\www.google.com/foo?bar=baz#", true, { + } }, + { "http:/\\www.google.com/foo?bar=baz#", true, { host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com' - }}, - {"http://[www.google.com]/", true, { + } }, + { "http://[www.google.com]/", true, { host = 'www.google.com', - }}, - {"", false}, - {",test.com text", false}, - {"ht\ttp:@www.google.com:80/;p?#", false}, - {"http://user:pass@/", false}, - {"http://foo:-80/", false}, - {"http:////////user:@google.com:99?foo", true, { + } }, + { "test.com>", false }, + { ",test.com text", false }, + { "ht\ttp:@www.google.com:80/;p?#", false }, + { "http://user:pass@/", false }, + { "http://foo:-80/", false }, + { "http:////////user:@google.com:99?foo", true, { host = 'google.com', user = 'user', port = 99, query = 'foo' - }}, - {"http://%25DOMAIN:foobar@foodomain.com/", true, { + } }, + { "http://%25DOMAIN:foobar@foodomain.com/", true, { host = 'foodomain.com', user = '%25DOMAIN' - }}, - {"http://0.0xFFFFFF", true, { + } }, + { "http://0.0xFFFFFF", true, { host = '0.255.255.255' - }}, + } }, --{"http:/\\030052000001", true, { -- host = '192.168.0.1' --}}, - {"http:\\/0xc0.052000001", true, { + { "http:\\/0xc0.052000001", true, { host = '192.168.0.1' - }}, - {"http://192.168.0.1.?foo", true, { + } }, + { "http://192.168.0.1.?foo", true, { host = '192.168.0.1', query = 'foo', - }}, - {"http://twitter.com#test", true, { + } }, + { "http://twitter.com#test", true, { host = 'twitter.com', fragment = 'test' - }}, - {"http:www.twitter.com#test", true, { + } }, + { "http:www.twitter.com#test", true, { host = 'www.twitter.com', fragment = 'test' - }}, - {"http://example。com#test", true, { + } }, + { "http://example。com#test", true, { host = 'example.com', fragment = 'test' - }}, - {"http://hoho.example。com#test", true, { + } }, + { "http://hoho.example。com#test", true, { host = 'hoho.example.com', fragment = 'test' - }}, - {"http://hoho。example。com#test", true, { + } }, + { "http://hoho。example。com#test", true, { host = 'hoho.example.com', fragment = 'test' - }}, - {"http://hoho.example。com#test", true, { + } }, + { "http://hoho.example。com#test", true, { host = 'hoho.example.com', fragment = 'test' - }}, - {"http://hehe。example。com#test", true, { + } }, + { "http://hehe。example。com#test", true, { host = 'hehe.example.com', fragment = 'test' - }}, - {"http:////$%^&****((@example.org//#f@f", true, { + } }, + { "http:////$%^&****((@example.org//#f@f", true, { user = '$%^&****((', host = 'example.org', fragment = 'f@f' - }}, - {"http://@@example.com", true, { + } }, + { "http://@@example.com", true, { user = "@", host = "example.com" - }}, - {"https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, { + } }, + { "https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, { host = "example.com", path = "_Resources\\ClientImages\\UserData", query = "ol\\o", fragment = "ololo\\", - }}, + } }, + { + "http://0x3f8f29a4/pro/au.html", true, { + host = "63.143.41.164", + path = "pro/au.html", + } }, } -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc - for i,c in ipairs(cases) do + for i, c in ipairs(cases) do local res = url.create(pool, c[1]) test("Parse url: " .. c[1], function() @@ -155,12 +160,12 @@ context("URL check functions", function() local uf = res:to_table() - for k,v in pairs(c[3]) do + for k, v in pairs(c[3]) do assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v) - assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s', - v, k, uf[k], c[1], uf)) + assert_equal(uf[k], v, logger.slog('expected " %s ", for %s, but got " %s " in url %s => %s', + v, k, uf[k], c[1], uf)) end - for k,v in pairs(uf) do + for k, v in pairs(uf) do if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1]) end @@ -172,32 +177,32 @@ context("URL check functions", function() end cases = { - {"/././foo", "/foo"}, - {"/a/b/c/./../../g", "/a/g"}, - {"/./.foo", "/.foo"}, - {"/foo/.", "/foo/"}, - {"/foo/./", "/foo/"}, - {"/foo/bar/..", "/foo"}, - {"/foo/bar/../", "/foo/"}, - {"/foo/..bar", "/foo/..bar"}, - {"/foo/bar/../ton", "/foo/ton"}, - {"/foo/bar/../ton/../../a", "/a"}, - {"/foo/../../..", "/"}, - {"/foo/../../../ton", "/ton"}, - {"////../..", "/"}, - {"./", ""}, - {"/./", "/"}, - {"/./././././././", "/"}, - {"/", "/"}, - {"/a/b", "/a/b"}, - {"/a/b/", "/a/b/"}, - {"..", "/"}, - {"/../", "/"}, - {"../", "/"}, - {"///foo", "/foo"}, + { "/././foo", "/foo" }, + { "/a/b/c/./../../g", "/a/g" }, + { "/./.foo", "/.foo" }, + { "/foo/.", "/foo/" }, + { "/foo/./", "/foo/" }, + { "/foo/bar/..", "/foo" }, + { "/foo/bar/../", "/foo/" }, + { "/foo/..bar", "/foo/..bar" }, + { "/foo/bar/../ton", "/foo/ton" }, + { "/foo/bar/../ton/../../a", "/a" }, + { "/foo/../../..", "/" }, + { "/foo/../../../ton", "/ton" }, + { "////../..", "/" }, + { "./", "" }, + { "/./", "/" }, + { "/./././././././", "/" }, + { "/", "/" }, + { "/a/b", "/a/b" }, + { "/a/b/", "/a/b/" }, + { "..", "/" }, + { "/../", "/" }, + { "../", "/" }, + { "///foo", "/foo" }, } - for i,v in ipairs(cases) do + for i, v in ipairs(cases) do test(string.format("Normalize paths '%s'", v[1]), function() local buf = ffi.new("uint8_t[?]", #v[1]) local sizbuf = ffi.new("size_t[1]") @@ -209,28 +214,28 @@ context("URL check functions", function() end cases = { - {'example.com', 'example.com'}, - {'baz.example.com', 'baz.example.com'}, - {'3.baz.example.com', 'baz.example.com'}, - {'bar.example.com', 'example.com'}, - {'foo.example.com', 'foo.example.com'}, - {'3.foo.example.com', '3.foo.example.com'}, - {'foo.com', 'foo.com'}, - {'bar.foo.com', 'foo.com'}, + { 'example.com', 'example.com' }, + { 'baz.example.com', 'baz.example.com' }, + { '3.baz.example.com', 'baz.example.com' }, + { 'bar.example.com', 'example.com' }, + { 'foo.example.com', 'foo.example.com' }, + { '3.foo.example.com', '3.foo.example.com' }, + { 'foo.com', 'foo.com' }, + { 'bar.foo.com', 'foo.com' }, } local excl_rules1 = { - 'example.com', - '*.foo.example.com', - '!bar.example.com' + 'example.com', + '*.foo.example.com', + '!bar.example.com' } local comp_rules = lua_urls_compose.inject_composition_rules(rspamd_config, excl_rules1) - for _,v in ipairs(cases) do + for _, v in ipairs(cases) do test("URL composition " .. v[1], function() local u = url.create(pool, v[1]) - assert_not_nil(u, "we are able to parse url: " .. v[1]) + assert_not_nil(u, "we are able to parse url:" .. v[1]) local res = comp_rules:process_url(nil, u:get_tld(), u:get_host()) assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) end) -- 2.39.5