Browse Source

Use publicsuffix list for url search (too slow now).

tags/0.9.0
Vsevolod Stakhov 9 years ago
parent
commit
ca78852de1
3 changed files with 12 additions and 28 deletions
  1. 4
    0
      CMakeLists.txt
  2. 1
    0
      conf/options.inc
  3. 7
    28
      src/libserver/url.c

+ 4
- 0
CMakeLists.txt View File

# Install lua fun library # Install lua fun library
INSTALL(FILES "contrib/lua-fun/fun.lua" DESTINATION ${PLUGINSDIR}/lua) INSTALL(FILES "contrib/lua-fun/fun.lua" DESTINATION ${PLUGINSDIR}/lua)


# Install TLD list
INSTALL(FILES "contrib/publicsuffix/effective_tld_names.dat" DESTINATION
"${PLUGINSDIR}/effective_tld_names.dat")

# Lua config # Lua config
INSTALL(CODE "FILE(MAKE_DIRECTORY \$ENV{DESTDIR}${CONFDIR}/lua)") INSTALL(CODE "FILE(MAKE_DIRECTORY \$ENV{DESTDIR}${CONFDIR}/lua)")
FILE(GLOB_RECURSE LUA_CONFIGS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/conf/lua" FILE(GLOB_RECURSE LUA_CONFIGS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/conf/lua"

+ 1
- 0
conf/options.inc View File

retransmits = 5; retransmits = 5;
} }
tempdir = "/tmp"; tempdir = "/tmp";
url_tld = "${PLUGINSDIR}/effective_tld_names.dat";

+ 7
- 28
src/libserver/url.c View File

#define URL_FLAG_STAR_MATCH (1 << 2) #define URL_FLAG_STAR_MATCH (1 << 2)


struct url_matcher { struct url_matcher {
const gchar *pattern;
gchar *pattern;
const gchar *prefix; const gchar *prefix;
gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos,
url_match_t *match); url_match_t *match);
FILE *f; FILE *f;
struct url_matcher m; struct url_matcher m;
gchar *linebuf = NULL, *p; gchar *linebuf = NULL, *p;
gsize buflen = 0;
gsize buflen = 0, patlen;
gssize r; gssize r;
gint flags; gint flags;


p = linebuf; p = linebuf;
} }


m.pattern = g_strdup (p);
patlen = strlen (p);
m.pattern = g_malloc (patlen + 2);
m.pattern[0] = '.';
rspamd_strlcpy (&m.pattern[1], p, patlen + 1);
g_array_append_val (url_scanner->matchers, m); g_array_append_val (url_scanner->matchers, m);
} }


for (i = 0; i < url_scanner->matchers->len; i++) { for (i = 0; i < url_scanner->matchers->len; i++) {
m = &g_array_index (url_scanner->matchers, struct url_matcher, i); m = &g_array_index (url_scanner->matchers, struct url_matcher, i);


if (m->flags & URL_FLAG_STRICT_MATCH) {
/* Insert more specific patterns */

/* some.tld/ */
rspamd_snprintf (patbuf,
sizeof (patbuf),
"%s/",
m->pattern);
rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
/* some.tld */
rspamd_snprintf (patbuf,
sizeof (patbuf),
"%s ",
m->pattern);
rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
/* some.tld: */
rspamd_snprintf (patbuf,
sizeof (patbuf),
"%s:",
m->pattern);
rspamd_trie_insert (url_scanner->search_trie, patbuf, i);
}
else {
rspamd_trie_insert (url_scanner->search_trie, m->pattern, i);
}
rspamd_trie_insert (url_scanner->search_trie, m->pattern, i);


/* Also use it for TLD lookups */ /* Also use it for TLD lookups */
if (strcmp (m->prefix, "http://") == 0) { if (strcmp (m->prefix, "http://") == 0) {

Loading…
Cancel
Save