rspamd/test/lua/unit/url.lua
2018-09-21 09:56:11 +01:00

163 lines
5.1 KiB
Lua
Raw Blame 히스토리

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- URL parser tests
context("URL check functions", function()
local mpool = require("rspamd_mempool")
local url = require("rspamd_url")
local logger = require("rspamd_logger")
local test_helper = require("rspamd_test_helper")
local ffi = require("ffi")
ffi.cdef[[
void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
]]
test_helper.init_url_parser()
local pool = mpool.create()
local cases = {
{"test.com", {"test.com", nil}},
{" test.com", {"test.com", nil}},
{"<test.com> text", {"test.com", nil}},
{"test.com. text", {"test.com", nil}},
{"mailto:A.User@example.com text", {"example.com", "A.User"}},
{"http://Тест.Рф:18 text", {"тест.рф", nil}},
{"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}},
{"somebody@example.com", {"example.com", "somebody"}},
{"https://127.0.0.1/abc text", {"127.0.0.1", nil}},
{"https://127.0.0.1 text", {"127.0.0.1", nil}},
{"https://[::1]:1", {"::1", nil}},
{"https://user:password@[::1]:1", {"::1", nil}},
{"https://user:password@[::1]", {"::1", nil}},
{"https://user:password@[::1]/1", {"::1", nil}},
}
for i,c in ipairs(cases) do
local res = url.create(pool, c[1])
test("Extract urls from text" .. i, function()
assert_not_nil(res, "cannot parse " .. c[1])
local t = res:to_table()
--local s = logger.slog("%1 -> %2", c[1], t)
--print(s)
assert_not_nil(t, "cannot convert to table " .. c[1])
assert_equal(c[2][1], t['host'],
logger.slog('expected host "%s", but got "%s" in url %s => %s',
c[2][1], t['host'], c[1], t))
if c[2][2] then
assert_equal(c[2][1], t['host'],
logger.slog('expected user "%s", but got "%s" in url %s => %s',
c[2][1], t['host'], c[1], t))
end
end)
end
cases = {
{"http://%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
host = '192.168.0.1',
}},
{"http://www.google.com/foo?bar=baz#", true, {
host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
}},
{"http://[www.google.com]/", false},
{"<test.com", true, {
host = 'test.com', tld = 'test.com',
}},
{"test.com>", false},
{",test.com text", false},
{"ht\ttp:@www.google.com:80/;p?#", false},
{"http://user:pass@/", false},
{"http://foo:-80/", false},
{"http:////////user:@google.com:99?foo", true, {
host = 'google.com', user = 'user', port = 99, query = 'foo'
}},
{"http://%25DOMAIN:foobar@foodomain.com/", true, {
host = 'foodomain.com', user = '%25DOMAIN'
}},
{"http://0.0xFFFFFF", true, {
host = '0.255.255.255'
}},
{"http://030052000001", true, {
host = '192.168.0.1'
}},
{"http://0xc0.052000001", true, {
host = '192.168.0.1'
}},
{"http://192.168.0.1.", true, {
host = '192.168.0.1'
}},
{"http://[::eeee:192.168.0.1]", true, {
host = '::eeee:c0a8:1'
}},
{"http://twitter.com#test", true, {
host = 'twitter.com', fragment = 'test'
}},
{"http:www.twitter.com#test", true, {
host = 'www.twitter.com', fragment = 'test'
}},
}
-- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
for i,c in ipairs(cases) do
local res = url.create(pool, c[1])
test("Parse urls " .. i, function()
if c[2] then
assert_not_nil(res, "cannot parse " .. c[1])
local uf = res:to_table()
for k,v in pairs(c[3]) do
assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s',
v, k, uf[k], c[1], uf))
end
for k,v in pairs(uf) do
if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
end
end
else
assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
end
end)
end
cases = {
{"/././foo", "/foo"},
{"/a/b/c/./../../g", "/a/g"},
{"/./.foo", "/.foo"},
{"/foo/.", "/foo"},
{"/foo/./", "/foo"},
{"/foo/bar/..", "/foo"},
{"/foo/bar/../", "/foo/"},
{"/foo/..bar", "/foo/..bar"},
{"/foo/bar/../ton", "/foo/ton"},
{"/foo/bar/../ton/../../a", "/a"},
{"/foo/../../..", "/"},
{"/foo/../../../ton", "/ton"},
{"////../..", "/"},
{"./", ""},
{"/./", "/"},
{"/./././././././", "/"},
{"/", "/"},
{"/a/b", "/a/b"},
{"/a/b/", "/a/b/"},
{"..", "/"},
{"/../", "/"},
{"../", "/"},
}
for i,v in ipairs(cases) do
test("Normalize paths " .. i, function()
local buf = ffi.new("uint8_t[?]", #v[1])
local sizbuf = ffi.new("size_t[1]")
ffi.copy(buf, v[1], #v[1])
ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf)
local res = ffi.string(buf, tonumber(sizbuf[0]))
assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
end)
end
end)