]> source.dussan.org Git - rspamd.git/commitdiff
Use publicsuffix list for url search (too slow now).
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Apr 2015 15:02:51 +0000 (16:02 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Apr 2015 15:02:51 +0000 (16:02 +0100)
CMakeLists.txt
conf/options.inc
src/libserver/url.c

index 5dfeb9fd93047eff7ec3522e3728fb6f84ae2279..0fa67248df8252191f009702eac54585ca80836c 100644 (file)
@@ -1037,6 +1037,10 @@ ENDFOREACH(LUA_PLUGIN)
 # Install lua fun library
 INSTALL(FILES "contrib/lua-fun/fun.lua" DESTINATION ${PLUGINSDIR}/lua)
 
+# Install TLD list
+INSTALL(FILES "contrib/publicsuffix/effective_tld_names.dat" DESTINATION 
+       "${PLUGINSDIR}/effective_tld_names.dat")
+
 # Lua config
 INSTALL(CODE "FILE(MAKE_DIRECTORY \$ENV{DESTDIR}${CONFDIR}/lua)")
 FILE(GLOB_RECURSE LUA_CONFIGS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/conf/lua" 
index 85a69dca10ed18f92425dbc78438475cae641ad9..99921eba2e4d1800bfdc432fb48a89373ee31bfe 100644 (file)
@@ -14,3 +14,4 @@ dns {
     retransmits = 5;
 }
 tempdir = "/tmp";
+url_tld = "${PLUGINSDIR}/effective_tld_names.dat";
\ No newline at end of file
index b4c605d366039a77a4ee2391c298adbd1a6c401a..116255e4e89884dc7dd0800746d7b23710087a01 100644 (file)
@@ -46,7 +46,7 @@ typedef struct url_match_s {
 #define URL_FLAG_STAR_MATCH (1 << 2)
 
 struct url_matcher {
-       const gchar *pattern;
+       gchar *pattern;
        const gchar *prefix;
        gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos,
                url_match_t *match);
@@ -828,7 +828,7 @@ rspamd_url_parse_tld_file (const gchar *fname, struct url_match_scanner *scanner
        FILE *f;
        struct url_matcher m;
        gchar *linebuf = NULL, *p;
-       gsize buflen = 0;
+       gsize buflen = 0, patlen;
        gssize r;
        gint flags;
 
@@ -873,7 +873,10 @@ rspamd_url_parse_tld_file (const gchar *fname, struct url_match_scanner *scanner
                        p = linebuf;
                }
 
-               m.pattern = g_strdup (p);
+               patlen = strlen (p);
+               m.pattern = g_malloc (patlen + 2);
+               m.pattern[0] = '.';
+               rspamd_strlcpy (&m.pattern[1], p, patlen + 1);
                g_array_append_val (url_scanner->matchers, m);
        }
 
@@ -914,31 +917,7 @@ rspamd_url_init (const gchar *tld_file)
                for (i = 0; i < url_scanner->matchers->len; i++) {
                        m = &g_array_index (url_scanner->matchers, struct url_matcher, i);
 
-                       if (m->flags & URL_FLAG_STRICT_MATCH) {
-                               /* Insert more specific patterns */
-
-                               /* some.tld/ */
-                               rspamd_snprintf (patbuf,
-                                       sizeof (patbuf),
-                                       "%s/",
-                                       m->pattern);
-                               rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
-                               /* some.tld  */
-                               rspamd_snprintf (patbuf,
-                                       sizeof (patbuf),
-                                       "%s ",
-                                       m->pattern);
-                               rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
-                               /* some.tld: */
-                               rspamd_snprintf (patbuf,
-                                       sizeof (patbuf),
-                                       "%s:",
-                                       m->pattern);
-                               rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
-                       }
-                       else {
-                               rspamd_trie_insert (url_scanner->search_trie, m->pattern, i);
-                       }
+                       rspamd_trie_insert (url_scanner->search_trie, m->pattern, i);
 
                        /* Also use it for TLD lookups */
                        if (strcmp (m->prefix, "http://") == 0) {