diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-01 16:28:52 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2015-07-01 16:28:52 +0100 |
commit | 6f5388b87b253f0babeaef680f376869ad4e10c4 (patch) | |
tree | 3a45b0eea073cd3956832a2446030440ee890f6f | |
parent | 307cb52ff8d2d5a258515531ebdbf9afdad3ba73 (diff) | |
download | rspamd-6f5388b87b253f0babeaef680f376869ad4e10c4.tar.gz rspamd-6f5388b87b253f0babeaef680f376869ad4e10c4.zip |
Fix finding start of URL's.
-rw-r--r-- | src/libserver/url.c | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/src/libserver/url.c b/src/libserver/url.c index 0373f23ec..5eccdebae 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -638,6 +638,9 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len, goto out; } } + else if (!g_ascii_isxdigit (t) && t != ':' && t != '.') { + goto out; + } p ++; break; case parse_user: @@ -1146,13 +1149,11 @@ static const gchar url_braces[] = { }; static gboolean -is_open_brace (gchar c) +is_url_start (gchar c) { if (c == '(' || c == '{' || - c == '[' || c == '<' || - c == '|' || c == '\'') { return TRUE; } @@ -1217,11 +1218,18 @@ url_tld_start (const gchar *begin, while (p >= begin) { if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { + + if (!is_url_start (*p) && !g_ascii_isspace (*p)) { + return FALSE; + } + p++; + if (!g_ascii_isalnum (*p)) { /* Urls cannot start with strange symbols */ return FALSE; } + match->m_begin = p; return TRUE; } @@ -1299,16 +1307,19 @@ url_web_start (const gchar *begin, { /* Check what we have found */ if (pos > begin && - (g_ascii_strncasecmp (pos, "www", - 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { - if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { + (g_ascii_strncasecmp (pos, "www",3) == 0 || + g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { + + if (!is_url_start (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { return FALSE; } } + if (*pos == '.') { /* Urls cannot start with . */ return FALSE; } + match->m_begin = pos; return TRUE; |