From 640397d9209ca4367813bc418bffd5ba45c57a6d Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 3 Feb 2015 17:55:14 +0000 Subject: [PATCH] Fix urls interaction. --- src/libmime/message.c | 25 +++++++------- src/libserver/html.c | 77 ++++++++++++++++++++----------------------- src/libserver/url.h | 5 --- src/plugins/surbl.c | 20 ++++++----- 4 files changed, 58 insertions(+), 69 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 702b148cb..2f96447bf 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -44,7 +44,7 @@ strip_html_tags (struct rspamd_task *task, GByteArray * src, gint *stateptr) { - uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart; + uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart = NULL; gint br, i = 0, depth = 0, in_q = 0; gint state = 0; guint dlen; @@ -1639,25 +1639,22 @@ process_message (struct rspamd_task *task) if (url_str != NULL) { subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_url)); - if (subject_url != NULL) { - /* Try to parse url */ - rc = rspamd_url_parse (subject_url, url_str, task->task_pool); - if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || - rc == URI_ERRNO_NO_HOST_SLASH) && - subject_url->hostlen > 0) { - if (subject_url->protocol != PROTOCOL_MAILTO) { - if (!g_tree_lookup (task->urls, subject_url)) { - g_tree_insert (task->urls, + rc = rspamd_url_parse (subject_url, url_str, + strlen (url_str), task->task_pool); + + if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) { + if (subject_url->protocol != PROTOCOL_MAILTO) { + if (!g_tree_lookup (task->urls, subject_url)) { + g_tree_insert (task->urls, subject_url, subject_url); - } } } - else if (rc != URI_ERRNO_OK) { - msg_info ("extract of url '%s' failed: %s", + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, rspamd_url_strerror (rc)); - } } } } diff --git a/src/libserver/html.c b/src/libserver/html.c index 7df9270c3..fe90b7dc4 100644 --- a/src/libserver/html.c +++ b/src/libserver/html.c @@ -575,10 +575,7 @@ void decode_entitles (gchar *s, guint * len) { guint l, rep_len; - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ - gchar *e = s; - gchar *end_ptr; + gchar *t = s, *h = s, *e = s, *end_ptr; gint state = 0, val, base; entity *found, key; @@ -735,45 +732,38 @@ check_phishing (struct rspamd_task *task, if (rspamd_url_find (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) { new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_url)); - if (new != NULL) { - g_strstrip (url_str); - rc = rspamd_url_parse (new, url_str, task->task_pool); + g_strstrip (url_str); + rc = rspamd_url_parse (new, url_str, strlen (url_str), task->task_pool); - if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == - URI_ERRNO_NO_HOST_SLASH) { - if (g_ascii_strncasecmp (href_url->host, new->host, + if (rc == URI_ERRNO_OK) { + if (g_ascii_strncasecmp (href_url->host, new->host, MAX (href_url->hostlen, new->hostlen)) != 0) { - /* Special check for urls beginning with 'www' */ - if (new->hostlen > 4 && href_url->hostlen > 4) { - p = new->host; - c = NULL; - if ((p[0] == 'w' || p[0] == 'W') && + /* Special check for urls beginning with 'www' */ + if (new->hostlen > 4 && href_url->hostlen > 4) { + p = new->host; + c = NULL; + if ((p[0] == 'w' || p[0] == 'W') && (p[1] == 'w' || p[1] == 'W') && (p[2] == 'w' || p[2] == 'W') && (p[3] == '.')) { - p += 4; - c = href_url->host; - len = MAX (href_url->hostlen, new->hostlen - 4); - } - else { - p = href_url->host; - if ((p[0] == 'w' || p[0] == 'W') && + p += 4; + c = href_url->host; + len = MAX (href_url->hostlen, new->hostlen - 4); + } + else { + p = href_url->host; + if ((p[0] == 'w' || p[0] == 'W') && (p[1] == 'w' || p[1] == 'W') && (p[2] == 'w' || p[2] == 'W') && (p[3] == '.')) { - p += 4; - c = new->host; - len = MAX (href_url->hostlen - 4, new->hostlen); - } - } - /* Compare parts and check for phished hostname */ - if (c != NULL) { - if (g_ascii_strncasecmp (p, c, len) != 0) { - href_url->is_phished = TRUE; - href_url->phished_url = new; - } + p += 4; + c = new->host; + len = MAX (href_url->hostlen - 4, new->hostlen); } - else { + } + /* Compare parts and check for phished hostname */ + if (c != NULL) { + if (g_ascii_strncasecmp (p, c, len) != 0) { href_url->is_phished = TRUE; href_url->phished_url = new; } @@ -783,12 +773,16 @@ check_phishing (struct rspamd_task *task, href_url->phished_url = new; } } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } } - else { - msg_info ("extract of url '%s' failed: %s", + } + else { + msg_info ("extract of url '%s' failed: %s", url_str, rspamd_url_strerror (rc)); - } } } @@ -871,8 +865,7 @@ parse_tag_url (struct rspamd_task *task, url_text = rspamd_mempool_alloc (task->task_pool, len + 1); rspamd_strlcpy (url_text, c, len + 1); - rspamd_url_unescape (url_text); - decode_entitles (url_text, NULL); + decode_entitles (url_text, &len); if (g_ascii_strncasecmp (url_text, "http", sizeof ("http") - 1) != 0 && @@ -882,14 +875,14 @@ parse_tag_url (struct rspamd_task *task, sizeof ("ftp://") - 1) != 0 && g_ascii_strncasecmp (url_text, "mailto:", sizeof ("mailto:") - 1) != 0) { + return; } url = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_url)); - rc = rspamd_url_parse (url, url_text, task->task_pool); + rc = rspamd_url_parse (url, url_text, len, task->task_pool); - if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != - 0) { + if (rc != URI_ERRNO_EMPTY && url->hostlen != 0) { /* * Check for phishing */ diff --git a/src/libserver/url.h b/src/libserver/url.h index db3a3472c..f2aed42c6 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -108,9 +108,4 @@ gboolean rspamd_url_find (rspamd_mempool_t *pool, */ const gchar * rspamd_url_strerror (enum uri_errno err); -/* - * URL unescape characters in the specified string - */ -void rspamd_url_unescape (gchar *s); - #endif diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index df9227c08..119a4f8cb 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -867,10 +867,10 @@ redirector_callback (gint fd, short what, void *arg) { struct redirector_param *param = (struct redirector_param *)arg; gchar url_buf[512]; - gint r; + gint r, urllen; struct timeval *timeout; struct rspamd_task *task; - gchar *p, *c; + gchar *p, *c, *urlstr; gboolean found = FALSE; task = param->task; @@ -947,17 +947,21 @@ redirector_callback (gint fd, short what, void *arg) break; } } + if (found) { debug_task ("<%s> got reply from redirector: '%s' -> '%s'", param->task->message_id, struri (param->url), c); - r = - rspamd_url_parse (param->url, - rspamd_mempool_strdup (param->task->task_pool, - c), param->task->task_pool); - if (r == URI_ERRNO_OK || r == URI_ERRNO_NO_SLASHES || r == - URI_ERRNO_NO_HOST_SLASH) { + + urllen = strlen (c); + urlstr = rspamd_mempool_alloc (param->task->task_pool, + urllen + 1); + rspamd_strlcpy (urlstr, c, urllen + 1); + r = rspamd_url_parse (param->url, urlstr, urllen, + param->task->task_pool); + + if (r == URI_ERRNO_OK) { make_surbl_requests (param->url, param->task, param->suffix, -- 2.39.5