From: Vsevolod Stakhov Date: Mon, 6 Apr 2015 15:02:51 +0000 (+0100) Subject: Use publicsuffix list for url search (too slow now). X-Git-Tag: 0.9.0~323 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ca78852de1b151de6f92fa35d89049072753f9e6;p=rspamd.git Use publicsuffix list for url search (too slow now). --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dfeb9fd9..0fa67248d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1037,6 +1037,10 @@ ENDFOREACH(LUA_PLUGIN) # Install lua fun library INSTALL(FILES "contrib/lua-fun/fun.lua" DESTINATION ${PLUGINSDIR}/lua) +# Install TLD list +INSTALL(FILES "contrib/publicsuffix/effective_tld_names.dat" DESTINATION + "${PLUGINSDIR}/effective_tld_names.dat") + # Lua config INSTALL(CODE "FILE(MAKE_DIRECTORY \$ENV{DESTDIR}${CONFDIR}/lua)") FILE(GLOB_RECURSE LUA_CONFIGS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/conf/lua" diff --git a/conf/options.inc b/conf/options.inc index 85a69dca1..99921eba2 100644 --- a/conf/options.inc +++ b/conf/options.inc @@ -14,3 +14,4 @@ dns { retransmits = 5; } tempdir = "/tmp"; +url_tld = "${PLUGINSDIR}/effective_tld_names.dat"; \ No newline at end of file diff --git a/src/libserver/url.c b/src/libserver/url.c index b4c605d36..116255e4e 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -46,7 +46,7 @@ typedef struct url_match_s { #define URL_FLAG_STAR_MATCH (1 << 2) struct url_matcher { - const gchar *pattern; + gchar *pattern; const gchar *prefix; gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); @@ -828,7 +828,7 @@ rspamd_url_parse_tld_file (const gchar *fname, struct url_match_scanner *scanner FILE *f; struct url_matcher m; gchar *linebuf = NULL, *p; - gsize buflen = 0; + gsize buflen = 0, patlen; gssize r; gint flags; @@ -873,7 +873,10 @@ rspamd_url_parse_tld_file (const gchar *fname, struct url_match_scanner *scanner p = linebuf; } - m.pattern = g_strdup (p); + patlen = strlen (p); + m.pattern = g_malloc (patlen + 2); + m.pattern[0] = '.'; + rspamd_strlcpy (&m.pattern[1], p, patlen + 1); g_array_append_val (url_scanner->matchers, m); } @@ -914,31 +917,7 @@ rspamd_url_init (const gchar *tld_file) for (i = 0; i < url_scanner->matchers->len; i++) { m = &g_array_index (url_scanner->matchers, struct url_matcher, i); - if (m->flags & URL_FLAG_STRICT_MATCH) { - /* Insert more specific patterns */ - - /* some.tld/ */ - rspamd_snprintf (patbuf, - sizeof (patbuf), - "%s/", - m->pattern); - rspamd_trie_insert (url_scanner->search_trie, patbuf, i); - /* some.tld */ - rspamd_snprintf (patbuf, - sizeof (patbuf), - "%s ", - m->pattern); - rspamd_trie_insert (url_scanner->search_trie, patbuf, i); - /* some.tld: */ - rspamd_snprintf (patbuf, - sizeof (patbuf), - "%s:", - m->pattern); - rspamd_trie_insert (url_scanner->search_trie, patbuf, i); - } - else { - rspamd_trie_insert (url_scanner->search_trie, m->pattern, i); - } + rspamd_trie_insert (url_scanner->search_trie, m->pattern, i); /* Also use it for TLD lookups */ if (strcmp (m->prefix, "http://") == 0) {