]> source.dussan.org Git - rspamd.git/commitdiff
[Minor] Fix some more tld issues and tld tests
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 25 Feb 2020 18:52:36 +0000 (18:52 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 25 Feb 2020 18:52:36 +0000 (18:52 +0000)
src/libserver/url.c
test/functional/lua/tlds.lua

index 0407490d85fa41f31656989405b96c4ef4e98843..3c3dc45462584670efe04f471dafebe98ee101dd 100644 (file)
@@ -129,7 +129,6 @@ struct url_matcher {
                        url_match_t *match);
 
        gint flags;
-       gsize patlen;
 };
 
 static gboolean url_file_start (struct url_callback_data *cb,
@@ -175,44 +174,44 @@ static gboolean url_tel_end (struct url_callback_data *cb,
 struct url_matcher static_matchers[] = {
                /* Common prefixes */
                {"file://",   "",          url_file_start,  url_file_end,
-                               0, 0},
+                               0},
                {"file:\\\\",   "",        url_file_start,  url_file_end,
-                               0, 0},
+                               0},
                {"ftp://",    "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"ftp:\\\\",    "",        url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"sftp://",   "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"http:",   "",            url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"https:",   "",           url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"news://",   "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"nntp://",   "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"telnet://", "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"tel:", "",               url_tel_start,   url_tel_end,
-                               0, 0},
+                               0},
                {"webcal://", "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"mailto:",   "",          url_email_start, url_email_end,
-                               0, 0},
+                               0},
                {"callto:", "",            url_tel_start,   url_tel_end,
-                               0, 0},
+                               0},
                {"h323:",     "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"sip:",      "",          url_web_start,   url_web_end,
-                               0, 0},
+                               0},
                {"www.",      "http://",   url_web_start,   url_web_end,
-                               URL_FLAG_NOHTML, 0},
+                               URL_FLAG_NOHTML},
                {"ftp.",      "ftp://",    url_web_start,   url_web_end,
-                               URL_FLAG_NOHTML, 0},
+                               URL_FLAG_NOHTML},
                /* Likely emails */
                {"@",         "mailto://", url_email_start, url_email_end,
-                               URL_FLAG_NOHTML, 0}
+                               URL_FLAG_NOHTML}
 };
 
 struct url_callback_data {
@@ -461,7 +460,7 @@ rspamd_url_parse_tld_file (const gchar *fname,
                                RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
                m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
                                rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
-               m.patlen = strlen (m.pattern);
+
                g_array_append_val (url_scanner->matchers, m);
        }
 
@@ -488,8 +487,6 @@ rspamd_url_add_static_matchers (struct url_match_scanner *sc)
                                        static_matchers[i].pattern,
                                        RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
                }
-
-               static_matchers[i].patlen = strlen (static_matchers[i].pattern);
        }
 
        g_array_append_vals (sc->matchers, static_matchers, n);
@@ -1511,14 +1508,15 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp,
        struct url_matcher *matcher;
        const gchar *start, *pos, *p;
        struct rspamd_url *url = context;
-       gint ndots = 1;
+       gint ndots;
 
        matcher = &g_array_index (url_scanner->matchers, struct url_matcher,
                        strnum);
+       ndots = 1;
 
        if (matcher->flags & URL_FLAG_STAR_MATCH) {
                /* Skip one more tld component */
-               ndots = 2;
+               ndots ++;
        }
 
        pos = text + match_start;
@@ -2277,6 +2275,9 @@ rspamd_tld_trie_find_callback (struct rspamd_multipattern *mp,
                        ndots--;
                        pos = p + 1;
                }
+               else {
+                       pos = p;
+               }
 
                p--;
        }
index efaf01af563b0154a60fa2248cc6751171d7a282..0908001da666d8f83ef42a0f3c285ff8b6aa9f94 100644 (file)
@@ -13,10 +13,10 @@ rspamd_config:register_symbol({
       'example.com',
       'example.co.za',
       'example.in.net',
-      'example.kawasaki.jp',
+      'example.star.kawasaki.jp',
       'example.net',
       'example.net.in',
-      'example.nom.br',
+      'example.star.nom.br',
       'example.org',
       'example.org.ac',
       'example.ru.com',
@@ -34,22 +34,24 @@ rspamd_config:register_symbol({
         for _, p in ipairs(prefixes) do
           local test = rspamd_util.get_tld(p .. d)
           if (test ~= d) then
-            table.insert(worry, 'util.get_tld:' .. p .. d .. ':' .. test)
+            local opt = string.format('util.get_tld:p=%s;d=%s;got=%s', p, d, test)
+            table.insert(worry, opt)
             return
           end
           local u = rspamd_url.create(pool, p .. d)
           test = u:get_tld()
           if (test ~= d) then
-            table.insert(worry, 'url.get_tld:' .. p .. d .. ':' .. test)
+            local opt = string.format('url.create:p=%s;d=%s;got=%s', p, d, test)
+            table.insert(worry, opt)
             return
           end
         end
       end)()
     end
     if (#worry == 0) then
-      return true, "no worry"
+      return true, 1.0, "no worry"
     else
-      return true, table.concat(worry, ",")
+      return true, 1.0, worry
     end
   end
 })