aboutsummaryrefslogtreecommitdiffstats
path: root/test/lua/unit/url.lua
blob: de274425d6e08fa96600b5d04a1c7d54e9b4f454 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
-- URL parser tests

context("URL check functions", function()
  local mpool = require("rspamd_mempool")
  local url = require("rspamd_url")
  local logger = require("rspamd_logger")
  local ffi = require("ffi")
  ffi.cdef[[
  void rspamd_url_init (const char *tld_file);
  unsigned ottery_rand_range(unsigned top);
  ]]

  local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1")

  ffi.C.rspamd_url_init(string.format('%s/%s', test_dir, "test_tld.dat"))

  test("Extract urls from text", function()
    local pool = mpool.create()
    local cases = {
      {"test.com", {"test.com", nil}},
      {" test.com", {"test.com", nil}},
      {"<test.com> text", {"test.com", nil}},
      {"test.com. text", {"test.com", nil}},
      {"mailto:A.User@example.com text", {"example.com", "A.User"}},
      {"http://Тест.Рф:18 text", {"тест.рф", nil}},
      {"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}},
      {"somebody@example.com", {"example.com", "somebody"}},
      {"https://127.0.0.1/abc text", {"127.0.0.1", nil}},
      {"https://127.0.0.1 text", {"127.0.0.1", nil}},
      {"https://[::1]:1", {"::1", nil}},
      {"https://user:password@[::1]:1", {"::1", nil}},
      {"https://user:password@[::1]", {"::1", nil}},
      {"https://user:password@[::1]/1", {"::1", nil}},
    }

    for _,c in ipairs(cases) do
      local res = url.create(pool, c[1])

      assert_not_nil(res, "cannot parse " .. c[1])
      local t = res:to_table()
      --local s = logger.slog("%1 -> %2", c[1], t)
      --print(s)
      assert_not_nil(t, "cannot convert to table " .. c[1])
      assert_equal(c[2][1], t['host'])

      if c[2][2] then
        assert_equal(c[2][2], t['user'])
      end
    end
    pool:destroy()
  end)

  -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
  test("Parse urls", function()
    local pool = mpool.create()
    -- input, parseable, {host, port, user, password, path, query, part}
    local cases = {
      {"http://%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
        host = '192.168.0.1',
      }},
      {"http://www.google.com/foo?bar=baz#", true, {
        host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
      }},
      {"http://[www.google.com]/", false},
      {"<test.com", true, {
        host = 'test.com', tld = 'test.com',
      }},
      {"test.com>", false},
      {",test.com text", false},
      {"ht\ttp:@www.google.com:80/;p?#", false},
      {"http://user:pass@/", false},
      {"http://foo:-80/", false},
      {"http:////////user:@google.com:99?foo", true, {
        host = 'google.com', user = 'user', port = 99, query = 'foo'
      }},
      {"http://%25DOMAIN:foobar@foodomain.com/", true, {
        host = 'foodomain.com', user = '%25DOMAIN'
      }},
      {"http://0.0xFFFFFF", true, {
        host = '0.255.255.255'
      }},
      {"http://030052000001", true, {
        host = '192.168.0.1'
      }},
      {"http://0xc0.052000001", true, {
        host = '192.168.0.1'
      }},
      {"http://192.168.0.1.", true, {
        host = '192.168.0.1'
      }},
      {"http://[::eeee:192.168.0.1]", true, {
        host = '::eeee:c0a8:1'
      }},
      {"http://twitter.com#test", true, {
        host = 'twitter.com', fragment = 'test'
      }},
    }

    for _,c in ipairs(cases) do
      local res = url.create(pool, c[1])

      if c[2] then
        assert_not_nil(res, "cannot parse " .. c[1])

        local uf = res:to_table()

        for k,v in pairs(c[3]) do
          assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
          assert_equal(uf[k], v, 'expected ' .. v .. ' for ' .. k .. ' but got ' .. uf[k] .. ' in url ' .. c[1])
        end
        for k,v in pairs(uf) do
          if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
            assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
          end
        end
      else
        assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
      end
    end
  end
  )
end)