From: Vsevolod Stakhov Date: Mon, 11 Jul 2011 14:42:13 +0000 (+0400) Subject: Fix phishing detection with img flag. X-Git-Tag: 0.4.0~35 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b14402cd4ed5bf9b3efc0cc9d50c812b66a31f57;p=rspamd.git Fix phishing detection with img flag. Handle unclosed HTML tags properly. Remove warnings for types on 32 bit archs. Do not touch grow factor many times when one shot mode is turned on. --- diff --git a/src/dns.c b/src/dns.c index 4adab9af9..e82b1a9b7 100644 --- a/src/dns.c +++ b/src/dns.c @@ -566,7 +566,7 @@ dns_fin_cb (gpointer arg) struct rspamd_dns_request *req = arg; event_del (&req->timer_event); - g_hash_table_remove (req->resolver->requests, GUINT_TO_POINTER (req->id)); + g_hash_table_remove (req->resolver->requests, GUINT_TO_POINTER ((guint)req->id)); } static guint8 * @@ -925,7 +925,7 @@ dns_parse_reply (guint8 *in, gint r, struct rspamd_dns_resolver *resolver, } /* Now try to find corresponding request */ - if ((req = g_hash_table_lookup (resolver->requests, GUINT_TO_POINTER (header->qid))) == NULL) { + if ((req = g_hash_table_lookup (resolver->requests, GUINT_TO_POINTER ((guint)header->qid))) == NULL) { /* No such requests found */ return FALSE; } @@ -1134,7 +1134,7 @@ dns_retransmit_handler (gint fd, short what, void *arg) evtimer_add (&req->timer_event, &req->tv); /* Add request to hash table */ - g_hash_table_insert (req->resolver->requests, GUINT_TO_POINTER (req->id), req); + g_hash_table_insert (req->resolver->requests, GUINT_TO_POINTER ((guint)req->id), req); register_async_event (req->session, (event_finalizer_t)dns_fin_cb, req, FALSE); } } @@ -1233,7 +1233,7 @@ make_dns_request (struct rspamd_dns_resolver *resolver, evtimer_add (&req->timer_event, &req->tv); /* Add request to hash table */ - g_hash_table_insert (resolver->requests, GUINT_TO_POINTER (req->id), req); + g_hash_table_insert (resolver->requests, GUINT_TO_POINTER ((guint)req->id), req); register_async_event (session, (event_finalizer_t)dns_fin_cb, req, FALSE); } else if (r == -1) { diff --git a/src/filter.c b/src/filter.c index b48bf64ba..fea91125f 100644 --- a/src/filter.c +++ b/src/filter.c @@ -80,18 +80,9 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch else { w = (*weight) * flag; } - /* Handle grow factor */ - if (metric_res->grow_factor && w > 0) { - w *= metric_res->grow_factor; - metric_res->grow_factor *= metric->grow_factor; - } - else if (w > 0) { - metric_res->grow_factor = metric->grow_factor; - } - - /* Add metric score */ + /* Add metric score */ if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) { if (s->options && opts && opts != s->options) { /* Append new options */ @@ -106,6 +97,14 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, s->options); } if (!single) { + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + else if (w > 0) { + metric_res->grow_factor = metric->grow_factor; + } s->score += w; metric_res->score += w; } @@ -114,6 +113,14 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch s = memory_pool_alloc (task->task_pool, sizeof (struct symbol)); s->score = w; + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + else if (w > 0) { + metric_res->grow_factor = metric->grow_factor; + } s->name = symbol; metric_res->score += w; diff --git a/src/html.c b/src/html.c index 3582022f8..bf2610821 100644 --- a/src/html.c +++ b/src/html.c @@ -662,7 +662,7 @@ static gchar * html_strncasestr (const gchar *s, const gchar *find, gsize len) { gchar c, sc; - size_t mlen; + gsize mlen; if ((c = *find++) != 0) { c = g_ascii_tolower (c); @@ -692,26 +692,41 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url p = url_text; while (len < remain) { if (*p == '<') { - /* Get tag name */ - p ++; - len ++; - if (*p == '/') { - /* Check tag name */ + /* Check tag name */ + if (*(p + 1) == '/') { + c = p + 2; + } + else { c = p + 1; - while (len < remain) { - if (!g_ascii_isspace (*p) && *p != '>') { + } + while (len < remain) { + if (!g_ascii_isspace (*p) && *p != '>') { + p ++; + len ++; + } + else { + break; + } + } + rspamd_strlcpy (tagbuf, c, MIN (sizeof(tagbuf), p - c + 1)); + if ((tag = get_tag_by_name (tagbuf)) != NULL) { + if (tag->id == id) { + break; + } + else if (tag->id == Tag_IMG) { + /* We should ignore IMG tag here */ + while (len < remain && *p != '>' && *p != '<') { p ++; len ++; } - else { - break; - } - } - rspamd_strlcpy (tagbuf, c, MIN (sizeof(tagbuf), p - c + 1)); - if ((tag = get_tag_by_name (tagbuf)) != NULL) { - if (tag->id == id) { - break; + if (*p == '>' && len < remain) { + p ++; } + + remain -= p - url_text; + url_text = p; + len = 0; + continue; } } } @@ -724,6 +739,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url if (new != NULL) { g_strstrip (url_str); rc = parse_uri (new, url_str, task->task_pool); + if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { if (g_ascii_strncasecmp (href_url->host, new->host, MAX (href_url->hostlen, new->hostlen)) != 0) { diff --git a/src/message.c b/src/message.c index 4db4bef7d..8ff53ea93 100644 --- a/src/message.c +++ b/src/message.c @@ -70,7 +70,13 @@ strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_tex state = 1; } else if (state == 1) { - depth++; + /* Opening bracket without closing one */ + p --; + while (g_ascii_isspace (*p) && p > src->data) { + p --; + } + p ++; + goto unbreak_tag; } break; @@ -107,7 +113,7 @@ strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_tex if (in_q) { break; } - +unbreak_tag: switch (state) { case 1: /* HTML/XML */ lc = '>';