aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-03 17:55:14 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-02-03 17:55:14 +0000
commit640397d9209ca4367813bc418bffd5ba45c57a6d (patch)
tree5bc070310b06f036bacf9861cb10113f9387732a
parentb4c0e9b59d3985726d9a346085172394a0495ce6 (diff)
downloadrspamd-640397d9209ca4367813bc418bffd5ba45c57a6d.tar.gz
rspamd-640397d9209ca4367813bc418bffd5ba45c57a6d.zip
Fix urls interaction.
-rw-r--r--src/libmime/message.c25
-rw-r--r--src/libserver/html.c77
-rw-r--r--src/libserver/url.h5
-rw-r--r--src/plugins/surbl.c20
4 files changed, 58 insertions, 69 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 702b148cb..2f96447bf 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -44,7 +44,7 @@ strip_html_tags (struct rspamd_task *task,
GByteArray * src,
gint *stateptr)
{
- uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart;
+ uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart = NULL;
gint br, i = 0, depth = 0, in_q = 0;
gint state = 0;
guint dlen;
@@ -1639,25 +1639,22 @@ process_message (struct rspamd_task *task)
if (url_str != NULL) {
subject_url = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_url));
- if (subject_url != NULL) {
- /* Try to parse url */
- rc = rspamd_url_parse (subject_url, url_str, task->task_pool);
- if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES ||
- rc == URI_ERRNO_NO_HOST_SLASH) &&
- subject_url->hostlen > 0) {
- if (subject_url->protocol != PROTOCOL_MAILTO) {
- if (!g_tree_lookup (task->urls, subject_url)) {
- g_tree_insert (task->urls,
+ rc = rspamd_url_parse (subject_url, url_str,
+ strlen (url_str), task->task_pool);
+
+ if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
+ if (subject_url->protocol != PROTOCOL_MAILTO) {
+ if (!g_tree_lookup (task->urls, subject_url)) {
+ g_tree_insert (task->urls,
subject_url,
subject_url);
- }
}
}
- else if (rc != URI_ERRNO_OK) {
- msg_info ("extract of url '%s' failed: %s",
+ }
+ else if (rc != URI_ERRNO_OK) {
+ msg_info ("extract of url '%s' failed: %s",
url_str,
rspamd_url_strerror (rc));
- }
}
}
}
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 7df9270c3..fe90b7dc4 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -575,10 +575,7 @@ void
decode_entitles (gchar *s, guint * len)
{
guint l, rep_len;
- gchar *t = s; /* t - tortoise */
- gchar *h = s; /* h - hare */
- gchar *e = s;
- gchar *end_ptr;
+ gchar *t = s, *h = s, *e = s, *end_ptr;
gint state = 0, val, base;
entity *found, key;
@@ -735,45 +732,38 @@ check_phishing (struct rspamd_task *task,
if (rspamd_url_find (task->task_pool, url_text, len, NULL, NULL, &url_str,
TRUE) && url_str != NULL) {
new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_url));
- if (new != NULL) {
- g_strstrip (url_str);
- rc = rspamd_url_parse (new, url_str, task->task_pool);
+ g_strstrip (url_str);
+ rc = rspamd_url_parse (new, url_str, strlen (url_str), task->task_pool);
- if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc ==
- URI_ERRNO_NO_HOST_SLASH) {
- if (g_ascii_strncasecmp (href_url->host, new->host,
+ if (rc == URI_ERRNO_OK) {
+ if (g_ascii_strncasecmp (href_url->host, new->host,
MAX (href_url->hostlen, new->hostlen)) != 0) {
- /* Special check for urls beginning with 'www' */
- if (new->hostlen > 4 && href_url->hostlen > 4) {
- p = new->host;
- c = NULL;
- if ((p[0] == 'w' || p[0] == 'W') &&
+ /* Special check for urls beginning with 'www' */
+ if (new->hostlen > 4 && href_url->hostlen > 4) {
+ p = new->host;
+ c = NULL;
+ if ((p[0] == 'w' || p[0] == 'W') &&
(p[1] == 'w' || p[1] == 'W') &&
(p[2] == 'w' || p[2] == 'W') &&
(p[3] == '.')) {
- p += 4;
- c = href_url->host;
- len = MAX (href_url->hostlen, new->hostlen - 4);
- }
- else {
- p = href_url->host;
- if ((p[0] == 'w' || p[0] == 'W') &&
+ p += 4;
+ c = href_url->host;
+ len = MAX (href_url->hostlen, new->hostlen - 4);
+ }
+ else {
+ p = href_url->host;
+ if ((p[0] == 'w' || p[0] == 'W') &&
(p[1] == 'w' || p[1] == 'W') &&
(p[2] == 'w' || p[2] == 'W') &&
(p[3] == '.')) {
- p += 4;
- c = new->host;
- len = MAX (href_url->hostlen - 4, new->hostlen);
- }
- }
- /* Compare parts and check for phished hostname */
- if (c != NULL) {
- if (g_ascii_strncasecmp (p, c, len) != 0) {
- href_url->is_phished = TRUE;
- href_url->phished_url = new;
- }
+ p += 4;
+ c = new->host;
+ len = MAX (href_url->hostlen - 4, new->hostlen);
}
- else {
+ }
+ /* Compare parts and check for phished hostname */
+ if (c != NULL) {
+ if (g_ascii_strncasecmp (p, c, len) != 0) {
href_url->is_phished = TRUE;
href_url->phished_url = new;
}
@@ -783,12 +773,16 @@ check_phishing (struct rspamd_task *task,
href_url->phished_url = new;
}
}
+ else {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
}
- else {
- msg_info ("extract of url '%s' failed: %s",
+ }
+ else {
+ msg_info ("extract of url '%s' failed: %s",
url_str,
rspamd_url_strerror (rc));
- }
}
}
@@ -871,8 +865,7 @@ parse_tag_url (struct rspamd_task *task,
url_text = rspamd_mempool_alloc (task->task_pool, len + 1);
rspamd_strlcpy (url_text, c, len + 1);
- rspamd_url_unescape (url_text);
- decode_entitles (url_text, NULL);
+ decode_entitles (url_text, &len);
if (g_ascii_strncasecmp (url_text, "http",
sizeof ("http") - 1) != 0 &&
@@ -882,14 +875,14 @@ parse_tag_url (struct rspamd_task *task,
sizeof ("ftp://") - 1) != 0 &&
g_ascii_strncasecmp (url_text, "mailto:",
sizeof ("mailto:") - 1) != 0) {
+
return;
}
url = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_url));
- rc = rspamd_url_parse (url, url_text, task->task_pool);
+ rc = rspamd_url_parse (url, url_text, len, task->task_pool);
- if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen !=
- 0) {
+ if (rc != URI_ERRNO_EMPTY && url->hostlen != 0) {
/*
* Check for phishing
*/
diff --git a/src/libserver/url.h b/src/libserver/url.h
index db3a3472c..f2aed42c6 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -108,9 +108,4 @@ gboolean rspamd_url_find (rspamd_mempool_t *pool,
*/
const gchar * rspamd_url_strerror (enum uri_errno err);
-/*
- * URL unescape characters in the specified string
- */
-void rspamd_url_unescape (gchar *s);
-
#endif
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index df9227c08..119a4f8cb 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -867,10 +867,10 @@ redirector_callback (gint fd, short what, void *arg)
{
struct redirector_param *param = (struct redirector_param *)arg;
gchar url_buf[512];
- gint r;
+ gint r, urllen;
struct timeval *timeout;
struct rspamd_task *task;
- gchar *p, *c;
+ gchar *p, *c, *urlstr;
gboolean found = FALSE;
task = param->task;
@@ -947,17 +947,21 @@ redirector_callback (gint fd, short what, void *arg)
break;
}
}
+
if (found) {
debug_task ("<%s> got reply from redirector: '%s' -> '%s'",
param->task->message_id,
struri (param->url),
c);
- r =
- rspamd_url_parse (param->url,
- rspamd_mempool_strdup (param->task->task_pool,
- c), param->task->task_pool);
- if (r == URI_ERRNO_OK || r == URI_ERRNO_NO_SLASHES || r ==
- URI_ERRNO_NO_HOST_SLASH) {
+
+ urllen = strlen (c);
+ urlstr = rspamd_mempool_alloc (param->task->task_pool,
+ urllen + 1);
+ rspamd_strlcpy (urlstr, c, urllen + 1);
+ r = rspamd_url_parse (param->url, urlstr, urllen,
+ param->task->task_pool);
+
+ if (r == URI_ERRNO_OK) {
make_surbl_requests (param->url,
param->task,
param->suffix,