summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-03-14 18:19:17 +0300
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-03-14 18:19:17 +0300
commit9dcd8232a2326b951f9d7fc8d7063a89a4312468 (patch)
tree6bc92ab0536fa5dbf667ebe696d38e7a1ca30995
parenteb746e9a050fb8032ac213751ba55f9229213c91 (diff)
downloadrspamd-9dcd8232a2326b951f9d7fc8d7063a89a4312468.tar.gz
rspamd-9dcd8232a2326b951f9d7fc8d7063a89a4312468.zip
Fix phishing check for special cases like http://host.com and http://www.host.com
-rw-r--r--src/html.c42
-rw-r--r--src/url.c18
2 files changed, 46 insertions, 14 deletions
diff --git a/src/html.c b/src/html.c
index 29d535564..74bdc29ac 100644
--- a/src/html.c
+++ b/src/html.c
@@ -683,12 +683,12 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
{
struct uri *new;
gchar *url_str;
- const gchar *p;
+ const gchar *p, *c;
gsize len = 0;
gint off, rc;
p = url_text;
- while (len < remain) {
+ while (len < remain - 1) {
if (*p == '<' || *p == '>') {
break;
}
@@ -704,8 +704,39 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) {
if (g_ascii_strncasecmp (href_url->host, new->host,
MAX (href_url->hostlen, new->hostlen)) != 0) {
- href_url->is_phished = TRUE;
- href_url->phished_url = new;
+ /* Special check for urls beginning with 'www' */
+ if (new->hostlen > 4 && href_url->hostlen > 4) {
+ p = new->host;
+ c = NULL;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = href_url->host;
+ len = MAX (href_url->hostlen, new->hostlen - 4);
+ }
+ else {
+ p = href_url->host;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = new->host;
+ len = MAX (href_url->hostlen - 4, new->hostlen);
+ }
+ }
+ /* Compare parts and check for phished hostname */
+ if (c != NULL && g_ascii_strncasecmp (p, c, len) != 0) {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
+ }
+ else {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
}
}
else {
@@ -805,7 +836,8 @@ parse_tag_url (struct worker_task *task, struct mime_text_part *part, tag_id_t i
* Check for phishing
*/
if ((p = strchr (c, '>')) != NULL ) {
- check_phishing (task, url, p + 1, remain - (p - tag_text));
+ p ++;
+ check_phishing (task, url, p, remain - (p - tag_text));
}
if (part->html_urls && g_tree_lookup (part->html_urls, url_text) == NULL) {
g_tree_insert (part->html_urls, url_text, url);
diff --git a/src/url.c b/src/url.c
index 83eb30d4d..8282f9217 100644
--- a/src/url.c
+++ b/src/url.c
@@ -930,7 +930,7 @@ url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_
}
}
- while (p < end - 1 && *p != stop && is_urlsafe (*p)) {
+ while (p < end && *p != stop && is_urlsafe (*p)) {
p ++;
}
@@ -979,14 +979,14 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t
if (is_atom (*p)) {
/* might be a domain or user@domain */
c = p;
- while (p < end - 1) {
+ while (p < end) {
if (!is_atom (*p)) {
break;
}
p++;
- while (p < end - 1 && is_atom (*p)) {
+ while (p < end && is_atom (*p)) {
p++;
}
@@ -1006,18 +1006,18 @@ url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t
}
else if (is_domain (*p)) {
domain:
- while (p < end - 1) {
+ while (p < end) {
if (!is_domain (*p)) {
break;
}
p++;
- while (p < end - 1 && is_domain (*p)) {
+ while (p < end && is_domain (*p)) {
p++;
}
- if ((p + 1) < end - 1 && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) {
+ if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/')) {
p++;
}
}
@@ -1034,7 +1034,7 @@ domain:
if (is_digit (*p) || passwd) {
port = (*p++ - '0');
- while (p < end - 1 && is_digit (*p) && port < 65536) {
+ while (p < end && is_digit (*p) && port < 65536) {
port = (port * 10) + (*p++ - '0');
}
@@ -1052,7 +1052,7 @@ domain:
passwd = TRUE;
c = p;
- while (p < end - 1 && is_atom (*p)) {
+ while (p < end && is_atom (*p)) {
p++;
}
@@ -1076,7 +1076,7 @@ domain:
case '/': /* we've detected a path component to our url */
p++;
case '?':
- while (p < end - 1 && is_urlsafe (*p)) {
+ while (p < end && is_urlsafe (*p)) {
if (*p == open_brace) {
brace_stack++;
}