diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-03-14 18:19:17 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-03-14 18:19:17 +0300 |
commit | 9dcd8232a2326b951f9d7fc8d7063a89a4312468 (patch) | |
tree | 6bc92ab0536fa5dbf667ebe696d38e7a1ca30995 /src | |
parent | eb746e9a050fb8032ac213751ba55f9229213c91 (diff) | |
download | rspamd-9dcd8232a2326b951f9d7fc8d7063a89a4312468.tar.gz rspamd-9dcd8232a2326b951f9d7fc8d7063a89a4312468.zip |
Fix phishing check for special cases like http://host.com and http://www.host.com
Diffstat (limited to 'src')
-rw-r--r-- | src/html.c | 42 | ||||
-rw-r--r-- | src/url.c | 18 |
2 files changed, 46 insertions, 14 deletions
diff --git a/src/html.c b/src/html.c index 29d535564..74bdc29ac 100644 --- a/src/html.c +++ b/src/html.c @@ -683,12 +683,12 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url { struct uri *new; gchar *url_str; - const gchar *p; + const gchar *p, *c; gsize len = 0; gint off, rc; p = url_text; - while (len < remain) { + while (len < remain - 1) { if (*p == '<' || *p == '>') { break; } @@ -704,8 +704,39 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { if (g_ascii_strncasecmp (href_url->host, new->host, MAX (href_url->hostlen, new->hostlen)) != 0) { - href_url->is_phished = TRUE; - href_url->phished_url = new; + /* Special check for urls beginning with 'www' */ + if (new->hostlen > 4 && href_url->hostlen > 4) { + p = new->host; + c = NULL; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = href_url->host; + len = MAX (href_url->hostlen, new->hostlen - 4); + } + else { + p = href_url->host; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = new->host; + len = MAX (href_url->hostlen - 4, new->hostlen); + } + } + /* Compare parts and check for phished hostname */ + if (c != NULL && g_ascii_strncasecmp (p, c, len) != 0) { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } } } else { @@ -805,7 +836,8 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i * Check for phishing */ if ((p = strchr (c, '>')) != NULL ) { - check_phishing (task, url, p + 1, remain - (p - tag_text)); + p ++; + check_phishing (task, url, p, remain - (p - tag_text)); } if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) { g_tree_insert (part->html_urls, url_text, url); @@ -930,7 +930,7 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_ } } - while (p < end - 1 && *p != stop && is_urlsafe (*p)) { + while (p < end && *p != stop && is_urlsafe (*p)) { p ++; } @@ -979,14 +979,14 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t if (is_atom (*p)) { /* might be a domain or user@domain */ c = p; - while (p < end - 1) { + while (p < end) { if (!is_atom (*p)) { break; } p++; - while (p < end - 1 && is_atom (*p)) { + while (p < end && is_atom (*p)) { p++; } @@ -1006,18 +1006,18 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t } else if (is_domain (*p)) { domain: - while (p < end - 1) { + while (p < end) { if (!is_domain (*p)) { break; } p++; - while (p < end - 1 && is_domain (*p)) { + while (p < end && is_domain (*p)) { p++; } - if ((p + 1) < end - 1 && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) { + if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) { p++; } } @@ -1034,7 +1034,7 @@ domain: if (is_digit (*p) || passwd) { port = (*p++ - '0'); - while (p < end - 1 && is_digit (*p) && port < 65536) { + while (p < end && is_digit (*p) && port < 65536) { port = (port * 10) + (*p++ - '0'); } @@ -1052,7 +1052,7 @@ domain: passwd = TRUE; c = p; - while (p < end - 1 && is_atom (*p)) { + while (p < end && is_atom (*p)) { p++; } @@ -1076,7 +1076,7 @@ domain: case '/': /* we've detected a path component to our url */ p++; case '?': - while (p < end - 1 && is_urlsafe (*p)) { + while (p < end && is_urlsafe (*p)) { if (*p == open_brace) { brace_stack++; } |