diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2013-11-26 16:10:37 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2013-11-26 16:10:37 +0000 |
commit | cbfa72954e882cfa8ad233d2b2d791526bc85f73 (patch) | |
tree | 763b711ed4d02ba9c99de8becd6129bde67e93bc /src/url.c | |
parent | 31b4474a2fd7b6d6b62270d19f06659937f5e725 (diff) | |
download | rspamd-cbfa72954e882cfa8ad233d2b2d791526bc85f73.tar.gz rspamd-cbfa72954e882cfa8ad233d2b2d791526bc85f73.zip |
Reduce false positive rate in urls detection.
Diffstat (limited to 'src/url.c')
-rw-r--r-- | src/url.c | 15 |
1 files changed, 11 insertions, 4 deletions
@@ -1201,7 +1201,7 @@ url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ while (p >= begin) { - if ((!is_domain (*p) && *p != '.') || g_ascii_isspace (*p)) { + if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { p ++; if (!g_ascii_isalnum (*p)) { /* Urls cannot start with strange symbols */ @@ -1224,6 +1224,10 @@ url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match return FALSE; } } + else if (*p == '/') { + /* Urls cannot contain '/' in their body */ + return FALSE; + } p --; } @@ -1235,9 +1239,9 @@ url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t { const gchar *p; - /* A url must be finished by tld, so it must be followed by punctuation or by space character */ + /* A url must be finished by tld, so it must be followed by space character */ p = pos + strlen (match->pattern); - if (p == end || g_ascii_isspace (*p) || g_ascii_ispunct (*p)) { + if (p == end || g_ascii_isspace (*p) || *p == ',') { match->m_len = p - match->m_begin; return TRUE; } @@ -1356,10 +1360,13 @@ domain: } if (!passwd && (port >= 65536 || *p == '@')) { - if (p < end) { + if (p < end && *p == '@') { /* this must be a password? */ goto passwd; } + else if (p < end) { + return FALSE; + } p--; } |