You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

url.lua 5.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. -- URL parser tests
  2. context("URL check functions", function()
  3. local mpool = require("rspamd_mempool")
  4. local url = require("rspamd_url")
  5. local logger = require("rspamd_logger")
  6. local test_helper = require("rspamd_test_helper")
  7. local ffi = require("ffi")
  8. ffi.cdef[[
  9. void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
  10. ]]
  11. test_helper.init_url_parser()
  12. local pool = mpool.create()
  13. local cases = {
  14. {"test.com", {"test.com", nil}},
  15. {" test.com", {"test.com", nil}},
  16. {"<test.com> text", {"test.com", nil}},
  17. {"test.com. text", {"test.com", nil}},
  18. {"mailto:A.User@example.com text", {"example.com", "A.User"}},
  19. {"http://Тест.Рф:18 text", {"тест.рф", nil}},
  20. {"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}},
  21. {"somebody@example.com", {"example.com", "somebody"}},
  22. {"https://127.0.0.1/abc text", {"127.0.0.1", nil}},
  23. {"https://127.0.0.1 text", {"127.0.0.1", nil}},
  24. {"https://[::1]:1", {"::1", nil}},
  25. {"https://user:password@[::1]:1", {"::1", nil}},
  26. {"https://user:password@[::1]", {"::1", nil}},
  27. {"https://user:password@[::1]/1", {"::1", nil}},
  28. }
  29. for i,c in ipairs(cases) do
  30. local res = url.create(pool, c[1])
  31. test("Extract urls from text" .. i, function()
  32. assert_not_nil(res, "cannot parse " .. c[1])
  33. local t = res:to_table()
  34. --local s = logger.slog("%1 -> %2", c[1], t)
  35. --print(s)
  36. assert_not_nil(t, "cannot convert to table " .. c[1])
  37. assert_equal(c[2][1], t['host'],
  38. logger.slog('expected host "%s", but got "%s" in url %s => %s',
  39. c[2][1], t['host'], c[1], t))
  40. if c[2][2] then
  41. assert_equal(c[2][1], t['host'],
  42. logger.slog('expected user "%s", but got "%s" in url %s => %s',
  43. c[2][1], t['host'], c[1], t))
  44. end
  45. end)
  46. end
  47. cases = {
  48. {"http://%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
  49. host = '192.168.0.1',
  50. }},
  51. {"http://www.google.com/foo?bar=baz#", true, {
  52. host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
  53. }},
  54. {"http://[www.google.com]/", false},
  55. {"<test.com", true, {
  56. host = 'test.com', tld = 'test.com',
  57. }},
  58. {"test.com>", false},
  59. {",test.com text", false},
  60. {"ht\ttp:@www.google.com:80/;p?#", false},
  61. {"http://user:pass@/", false},
  62. {"http://foo:-80/", false},
  63. {"http:////////user:@google.com:99?foo", true, {
  64. host = 'google.com', user = 'user', port = 99, query = 'foo'
  65. }},
  66. {"http://%25DOMAIN:foobar@foodomain.com/", true, {
  67. host = 'foodomain.com', user = '%25DOMAIN'
  68. }},
  69. {"http://0.0xFFFFFF", true, {
  70. host = '0.255.255.255'
  71. }},
  72. {"http://030052000001", true, {
  73. host = '192.168.0.1'
  74. }},
  75. {"http://0xc0.052000001", true, {
  76. host = '192.168.0.1'
  77. }},
  78. {"http://192.168.0.1.", true, {
  79. host = '192.168.0.1'
  80. }},
  81. {"http://[::eeee:192.168.0.1]", true, {
  82. host = '::eeee:c0a8:1'
  83. }},
  84. {"http://twitter.com#test", true, {
  85. host = 'twitter.com', fragment = 'test'
  86. }},
  87. {"http:www.twitter.com#test", true, {
  88. host = 'www.twitter.com', fragment = 'test'
  89. }},
  90. }
  91. -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
  92. for i,c in ipairs(cases) do
  93. local res = url.create(pool, c[1])
  94. test("Parse urls " .. i, function()
  95. if c[2] then
  96. assert_not_nil(res, "cannot parse " .. c[1])
  97. local uf = res:to_table()
  98. for k,v in pairs(c[3]) do
  99. assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
  100. assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s',
  101. v, k, uf[k], c[1], uf))
  102. end
  103. for k,v in pairs(uf) do
  104. if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
  105. assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
  106. end
  107. end
  108. else
  109. assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
  110. end
  111. end)
  112. end
  113. cases = {
  114. {"/././foo", "/foo"},
  115. {"/a/b/c/./../../g", "/a/g"},
  116. {"/./.foo", "/.foo"},
  117. {"/foo/.", "/foo"},
  118. {"/foo/./", "/foo"},
  119. {"/foo/bar/..", "/foo"},
  120. {"/foo/bar/../", "/foo/"},
  121. {"/foo/..bar", "/foo/..bar"},
  122. {"/foo/bar/../ton", "/foo/ton"},
  123. {"/foo/bar/../ton/../../a", "/a"},
  124. {"/foo/../../..", "/"},
  125. {"/foo/../../../ton", "/ton"},
  126. {"////../..", "/"},
  127. {"./", ""},
  128. {"/./", "/"},
  129. {"/./././././././", "/"},
  130. {"/", "/"},
  131. {"/a/b", "/a/b"},
  132. {"/a/b/", "/a/b/"},
  133. {"..", "/"},
  134. {"/../", "/"},
  135. {"../", "/"},
  136. }
  137. for i,v in ipairs(cases) do
  138. test("Normalize paths " .. i, function()
  139. local buf = ffi.new("uint8_t[?]", #v[1])
  140. local sizbuf = ffi.new("size_t[1]")
  141. ffi.copy(buf, v[1], #v[1])
  142. ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf)
  143. local res = ffi.string(buf, tonumber(sizbuf[0]))
  144. assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
  145. end)
  146. end
  147. end)