summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-11 18:42:13 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2011-07-11 18:42:13 +0400
commitb14402cd4ed5bf9b3efc0cc9d50c812b66a31f57 (patch)
tree7db7c93232fe4422e4ab76f0b8457644428d373f /src
parent9e6acadd7ce323f42ebed02237d064305df32249 (diff)
downloadrspamd-b14402cd4ed5bf9b3efc0cc9d50c812b66a31f57.tar.gz
rspamd-b14402cd4ed5bf9b3efc0cc9d50c812b66a31f57.zip
Fix phishing detection with img flag.
Handle unclosed HTML tags properly. Remove warnings for types on 32 bit archs. Do not touch grow factor many times when one shot mode is turned on.
Diffstat (limited to 'src')
-rw-r--r--src/dns.c8
-rw-r--r--src/filter.c27
-rw-r--r--src/html.c48
-rw-r--r--src/message.c10
4 files changed, 61 insertions, 32 deletions
diff --git a/src/dns.c b/src/dns.c
index 4adab9af9..e82b1a9b7 100644
--- a/src/dns.c
+++ b/src/dns.c
@@ -566,7 +566,7 @@ dns_fin_cb (gpointer arg)
struct rspamd_dns_request *req = arg;
event_del (&req->timer_event);
- g_hash_table_remove (req->resolver->requests, GUINT_TO_POINTER (req->id));
+ g_hash_table_remove (req->resolver->requests, GUINT_TO_POINTER ((guint)req->id));
}
static guint8 *
@@ -925,7 +925,7 @@ dns_parse_reply (guint8 *in, gint r, struct rspamd_dns_resolver *resolver,
}
/* Now try to find corresponding request */
- if ((req = g_hash_table_lookup (resolver->requests, GUINT_TO_POINTER (header->qid))) == NULL) {
+ if ((req = g_hash_table_lookup (resolver->requests, GUINT_TO_POINTER ((guint)header->qid))) == NULL) {
/* No such requests found */
return FALSE;
}
@@ -1134,7 +1134,7 @@ dns_retransmit_handler (gint fd, short what, void *arg)
evtimer_add (&req->timer_event, &req->tv);
/* Add request to hash table */
- g_hash_table_insert (req->resolver->requests, GUINT_TO_POINTER (req->id), req);
+ g_hash_table_insert (req->resolver->requests, GUINT_TO_POINTER ((guint)req->id), req);
register_async_event (req->session, (event_finalizer_t)dns_fin_cb, req, FALSE);
}
}
@@ -1233,7 +1233,7 @@ make_dns_request (struct rspamd_dns_resolver *resolver,
evtimer_add (&req->timer_event, &req->tv);
/* Add request to hash table */
- g_hash_table_insert (resolver->requests, GUINT_TO_POINTER (req->id), req);
+ g_hash_table_insert (resolver->requests, GUINT_TO_POINTER ((guint)req->id), req);
register_async_event (session, (event_finalizer_t)dns_fin_cb, req, FALSE);
}
else if (r == -1) {
diff --git a/src/filter.c b/src/filter.c
index b48bf64ba..fea91125f 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -80,18 +80,9 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch
else {
w = (*weight) * flag;
}
- /* Handle grow factor */
- if (metric_res->grow_factor && w > 0) {
- w *= metric_res->grow_factor;
- metric_res->grow_factor *= metric->grow_factor;
- }
- else if (w > 0) {
- metric_res->grow_factor = metric->grow_factor;
- }
-
- /* Add metric score */
+ /* Add metric score */
if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) {
if (s->options && opts && opts != s->options) {
/* Append new options */
@@ -106,6 +97,14 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch
memory_pool_add_destructor (task->task_pool, (pool_destruct_func) g_list_free, s->options);
}
if (!single) {
+ /* Handle grow factor */
+ if (metric_res->grow_factor && w > 0) {
+ w *= metric_res->grow_factor;
+ metric_res->grow_factor *= metric->grow_factor;
+ }
+ else if (w > 0) {
+ metric_res->grow_factor = metric->grow_factor;
+ }
s->score += w;
metric_res->score += w;
}
@@ -114,6 +113,14 @@ insert_metric_result (struct worker_task *task, struct metric *metric, const gch
s = memory_pool_alloc (task->task_pool, sizeof (struct symbol));
s->score = w;
+ /* Handle grow factor */
+ if (metric_res->grow_factor && w > 0) {
+ w *= metric_res->grow_factor;
+ metric_res->grow_factor *= metric->grow_factor;
+ }
+ else if (w > 0) {
+ metric_res->grow_factor = metric->grow_factor;
+ }
s->name = symbol;
metric_res->score += w;
diff --git a/src/html.c b/src/html.c
index 3582022f8..bf2610821 100644
--- a/src/html.c
+++ b/src/html.c
@@ -662,7 +662,7 @@ static gchar *
html_strncasestr (const gchar *s, const gchar *find, gsize len)
{
gchar c, sc;
- size_t mlen;
+ gsize mlen;
if ((c = *find++) != 0) {
c = g_ascii_tolower (c);
@@ -692,26 +692,41 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
p = url_text;
while (len < remain) {
if (*p == '<') {
- /* Get tag name */
- p ++;
- len ++;
- if (*p == '/') {
- /* Check tag name */
+ /* Check tag name */
+ if (*(p + 1) == '/') {
+ c = p + 2;
+ }
+ else {
c = p + 1;
- while (len < remain) {
- if (!g_ascii_isspace (*p) && *p != '>') {
+ }
+ while (len < remain) {
+ if (!g_ascii_isspace (*p) && *p != '>') {
+ p ++;
+ len ++;
+ }
+ else {
+ break;
+ }
+ }
+ rspamd_strlcpy (tagbuf, c, MIN (sizeof(tagbuf), p - c + 1));
+ if ((tag = get_tag_by_name (tagbuf)) != NULL) {
+ if (tag->id == id) {
+ break;
+ }
+ else if (tag->id == Tag_IMG) {
+ /* We should ignore IMG tag here */
+ while (len < remain && *p != '>' && *p != '<') {
p ++;
len ++;
}
- else {
- break;
- }
- }
- rspamd_strlcpy (tagbuf, c, MIN (sizeof(tagbuf), p - c + 1));
- if ((tag = get_tag_by_name (tagbuf)) != NULL) {
- if (tag->id == id) {
- break;
+ if (*p == '>' && len < remain) {
+ p ++;
}
+
+ remain -= p - url_text;
+ url_text = p;
+ len = 0;
+ continue;
}
}
}
@@ -724,6 +739,7 @@ check_phishing (struct worker_task *task, struct uri *href_url, const gchar *url
if (new != NULL) {
g_strstrip (url_str);
rc = parse_uri (new, url_str, task->task_pool);
+
if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) {
if (g_ascii_strncasecmp (href_url->host, new->host,
MAX (href_url->hostlen, new->hostlen)) != 0) {
diff --git a/src/message.c b/src/message.c
index 4db4bef7d..8ff53ea93 100644
--- a/src/message.c
+++ b/src/message.c
@@ -70,7 +70,13 @@ strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_tex
state = 1;
}
else if (state == 1) {
- depth++;
+ /* Opening bracket without closing one */
+ p --;
+ while (g_ascii_isspace (*p) && p > src->data) {
+ p --;
+ }
+ p ++;
+ goto unbreak_tag;
}
break;
@@ -107,7 +113,7 @@ strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_tex
if (in_q) {
break;
}
-
+unbreak_tag:
switch (state) {
case 1: /* HTML/XML */
lc = '>';