Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. -- URL parser tests
  2. context("URL check functions", function()
  3. local mpool = require("rspamd_mempool")
  4. local lua_urls_compose = require "lua_urls_compose"
  5. local url = require("rspamd_url")
  6. local lua_util = require("lua_util")
  7. local logger = require("rspamd_logger")
  8. local test_helper = require("rspamd_test_helper")
  9. local ffi = require("ffi")
  10. ffi.cdef [[
  11. void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen);
  12. ]]
  13. test_helper.init_url_parser()
  14. local pool = mpool.create()
  15. local cases = {
  16. { "test.com", { "test.com", nil } },
  17. { " test.com", { "test.com", nil } },
  18. { "<test.com> text", { "test.com", nil } },
  19. { "test.com. text", { "test.com", nil } },
  20. { "mailto:A.User@example.com text", { "example.com", "A.User" } },
  21. { "http://Тест.Рф:18 text", { "тест.рф", nil } },
  22. { "http://user:password@тест2.РФ:18 text", { "тест2.рф", "user" } },
  23. { "somebody@example.com", { "example.com", "somebody" } },
  24. { "https://127.0.0.1/abc text", { "127.0.0.1", nil } },
  25. { "https:\\\\127.0.0.1/abc text", { "127.0.0.1", nil } },
  26. { "https:\\\\127.0.0.1", { "127.0.0.1", nil } },
  27. { "https://127.0.0.1 text", { "127.0.0.1", nil } },
  28. { "https://[::1]:1", { "::1", nil } },
  29. { "https://user:password@[::1]:1", { "::1", nil } },
  30. { "https://user:password@[::1]", { "::1", nil } },
  31. { "https://user:password@[::1]/1", { "::1", nil } },
  32. }
  33. for i, c in ipairs(cases) do
  34. local res = url.create(pool, c[1])
  35. test("Extract urls from text" .. i, function()
  36. assert_not_nil(res, "cannot parse " .. c[1])
  37. local t = res:to_table()
  38. --local s = logger.slog("%1 -> %2", c[1], t)
  39. --print(s)
  40. assert_not_nil(t, "cannot convert to table " .. c[1])
  41. assert_equal(c[2][1], t['host'],
  42. logger.slog('expected host "%s", but got "%s" in url %s => %s',
  43. c[2][1], t['host'], c[1], t))
  44. if c[2][2] then
  45. assert_equal(c[2][1], t['host'],
  46. logger.slog('expected user "%s", but got "%s" in url %s => %s',
  47. c[2][1], t['host'], c[1], t))
  48. end
  49. end)
  50. end
  51. cases = {
  52. { [[http://example.net/path/]], true, {
  53. host = 'example.net', path = 'path/'
  54. } },
  55. { 'http://example.net/hello%20world.php?arg=x#fragment', true, {
  56. host = 'example.net', fragment = 'fragment', query = 'arg=x',
  57. path = 'hello world.php',
  58. } },
  59. { 'http://example.net/?arg=%23#fragment', true, {
  60. host = 'example.net', fragment = 'fragment', query = 'arg=#',
  61. } },
  62. { "http:/\\[::eeee:192.168.0.1]/#test", true, {
  63. host = '::eeee:c0a8:1', fragment = 'test'
  64. } },
  65. { "http:/\\[::eeee:192.168.0.1]#test", true, {
  66. host = '::eeee:c0a8:1', fragment = 'test'
  67. } },
  68. { "http:/\\[::eeee:192.168.0.1]?test", true, {
  69. host = '::eeee:c0a8:1', query = 'test'
  70. } },
  71. { "http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
  72. host = '192.168.0.1',
  73. } },
  74. { "http:/\\www.google.com/foo?bar=baz#", true, {
  75. host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
  76. } },
  77. { "http://[www.google.com]/", true, {
  78. host = 'www.google.com',
  79. } },
  80. { "<test.com", true, {
  81. host = 'test.com', tld = 'test.com',
  82. } },
  83. { "test.com>", false },
  84. { ",test.com text", false },
  85. { "ht\ttp:@www.google.com:80/;p?#", false },
  86. { "http://user:pass@/", false },
  87. { "http://foo:-80/", false },
  88. { "http:////////user:@google.com:99?foo", true, {
  89. host = 'google.com', user = 'user', port = 99, query = 'foo'
  90. } },
  91. { "http://%25DOMAIN:foobar@foodomain.com/", true, {
  92. host = 'foodomain.com', user = '%25DOMAIN'
  93. } },
  94. { "http://0.0xFFFFFF", true, {
  95. host = '0.255.255.255'
  96. } },
  97. --{"http:/\\030052000001", true, {
  98. -- host = '192.168.0.1'
  99. --}},
  100. { "http:\\/0xc0.052000001", true, {
  101. host = '192.168.0.1'
  102. } },
  103. { "http://192.168.0.1.?foo", true, {
  104. host = '192.168.0.1', query = 'foo',
  105. } },
  106. { "http://twitter.com#test", true, {
  107. host = 'twitter.com', fragment = 'test'
  108. } },
  109. { "http:www.twitter.com#test", true, {
  110. host = 'www.twitter.com', fragment = 'test'
  111. } },
  112. { "http://example。com#test", true, {
  113. host = 'example.com', fragment = 'test'
  114. } },
  115. { "http://hoho.example。com#test", true, {
  116. host = 'hoho.example.com', fragment = 'test'
  117. } },
  118. { "http://hoho。example。com#test", true, {
  119. host = 'hoho.example.com', fragment = 'test'
  120. } },
  121. { "http://hoho.example。com#test", true, {
  122. host = 'hoho.example.com', fragment = 'test'
  123. } },
  124. { "http://hehe。example。com#test", true, {
  125. host = 'hehe.example.com', fragment = 'test'
  126. } },
  127. { "http:////$%^&****((@example.org//#f@f", true, {
  128. user = '$%^&****((', host = 'example.org', fragment = 'f@f'
  129. } },
  130. { "http://@@example.com", true, {
  131. user = "@", host = "example.com"
  132. } },
  133. { "https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, {
  134. host = "example.com", path = "_Resources\\ClientImages\\UserData",
  135. query = "ol\\o", fragment = "ololo\\",
  136. } },
  137. {
  138. "http://0x3f8f29a4/pro/au.html", true, {
  139. host = "63.143.41.164",
  140. path = "pro/au.html",
  141. } },
  142. {
  143. "http://localhost", true, {
  144. host = "localhost",
  145. tld = "localhost",
  146. } },
  147. {
  148. "http://localhost.", true, {
  149. host = "localhost.",
  150. tld = "localhost",
  151. } },
  152. }
  153. -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
  154. for i, c in ipairs(cases) do
  155. local res = url.create(pool, c[1])
  156. test("Parse url: " .. c[1], function()
  157. if c[2] then
  158. assert_not_nil(res, "we are able to parse url: " .. c[1])
  159. local uf = res:to_table()
  160. for k, v in pairs(c[3]) do
  161. assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
  162. assert_equal(uf[k], v, logger.slog('expected " %s ", for %s, but got " %s " in url %s => %s',
  163. v, k, uf[k], c[1], uf))
  164. end
  165. for k, v in pairs(uf) do
  166. if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
  167. assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
  168. end
  169. end
  170. else
  171. assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
  172. end
  173. end)
  174. end
  175. cases = {
  176. { "/././foo", "/foo" },
  177. { "/a/b/c/./../../g", "/a/g" },
  178. { "/./.foo", "/.foo" },
  179. { "/foo/.", "/foo/" },
  180. { "/foo/./", "/foo/" },
  181. { "/foo/bar/..", "/foo" },
  182. { "/foo/bar/../", "/foo/" },
  183. { "/foo/..bar", "/foo/..bar" },
  184. { "/foo/bar/../ton", "/foo/ton" },
  185. { "/foo/bar/../ton/../../a", "/a" },
  186. { "/foo/../../..", "/" },
  187. { "/foo/../../../ton", "/ton" },
  188. { "////../..", "/" },
  189. { "./", "" },
  190. { "/./", "/" },
  191. { "/./././././././", "/" },
  192. { "/", "/" },
  193. { "/a/b", "/a/b" },
  194. { "/a/b/", "/a/b/" },
  195. { "..", "/" },
  196. { "/../", "/" },
  197. { "../", "/" },
  198. { "///foo", "/foo" },
  199. }
  200. for i, v in ipairs(cases) do
  201. test(string.format("Normalize paths '%s'", v[1]), function()
  202. local buf = ffi.new("uint8_t[?]", #v[1])
  203. local sizbuf = ffi.new("size_t[1]")
  204. ffi.copy(buf, v[1], #v[1])
  205. ffi.C.rspamd_normalize_path_inplace(buf, #v[1], sizbuf)
  206. local res = ffi.string(buf, tonumber(sizbuf[0]))
  207. assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
  208. end)
  209. end
  210. cases = {
  211. { 'example.com', 'example.com' },
  212. { 'baz.example.com', 'baz.example.com' },
  213. { '3.baz.example.com', 'baz.example.com' },
  214. { 'bar.example.com', 'example.com' },
  215. { 'foo.example.com', 'foo.example.com' },
  216. { '3.foo.example.com', '3.foo.example.com' },
  217. { 'foo.com', 'foo.com' },
  218. { 'bar.foo.com', 'foo.com' },
  219. }
  220. local excl_rules1 = {
  221. 'example.com',
  222. '*.foo.example.com',
  223. '!bar.example.com'
  224. }
  225. local comp_rules = lua_urls_compose.inject_composition_rules(rspamd_config, excl_rules1)
  226. for _, v in ipairs(cases) do
  227. test("URL composition " .. v[1], function()
  228. local u = url.create(pool, v[1])
  229. assert_not_nil(u, "we are able to parse url:" .. v[1])
  230. local res = comp_rules:process_url(nil, u:get_tld(), u:get_host())
  231. assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1])
  232. end)
  233. end
  234. test("URL regexp issue", function()
  235. local rspamd_regexp = require "rspamd_regexp"
  236. local u = url.create(pool,
  237. 'https://cls21.bullhornstaffing.com/MailerUnsubscribe.cfm?privateLabelID=3D26028&email=xpto&updKey=3D%3B%28U%2B%2F%200T%3EI%3B%2FQEI%5E%29%25XR%3FZ%40%5B%2EGJY%3CF%23%3F%25%22%29%5D%2D%0A')
  238. assert_not_nil(u, "we are able to parse url")
  239. local re = rspamd_regexp.create_cached("^$|^[?].*|^[#].*|[^#?]+")
  240. assert_not_nil(re, "regexp is valid")
  241. local res = re:search('/' .. u:get_path() .. '?' .. u:get_query())
  242. assert_equal(res[#res], '')
  243. end)
  244. end)