Fix urls interaction.

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 3 Feb 2015 17:55:14 +0000 (17:55 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 17 Feb 2015 15:14:09 +0000 (15:14 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 3 Feb 2015 17:55:14 +0000 (17:55 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 17 Feb 2015 15:14:09 +0000 (15:14 +0000)
diff --git a/src/libmime/message.c b/src/libmime/message.c

index d0549cbd56ef63647a5bf25759a5912380bf8f7c..92d16e807ec5b6771e8f36f96240c43873007bdb 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -44,7 +44,7 @@ strip_html_tags (struct rspamd_task *task,
         GByteArray * src,
         gint *stateptr)
  {
-       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart;
+       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart = NULL;
         gint br, i = 0, depth = 0, in_q = 0;
         gint state = 0;
         guint dlen;
@@ -1639,25 +1639,22 @@ process_message (struct rspamd_task *task)
                                 if (url_str != NULL) {
                                         subject_url = rspamd_mempool_alloc0 (task->task_pool,
                                                         sizeof (struct rspamd_url));
-                                       if (subject_url != NULL) {
-                                               /* Try to parse url */
-                                               rc = rspamd_url_parse (subject_url, url_str, task->task_pool);
-                                               if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES ||
-                                                       rc == URI_ERRNO_NO_HOST_SLASH) &&
-                                                       subject_url->hostlen > 0) {
-                                                       if (subject_url->protocol != PROTOCOL_MAILTO) {
-                                                               if (!g_tree_lookup (task->urls, subject_url)) {
-                                                                       g_tree_insert (task->urls,
+                                       rc = rspamd_url_parse (subject_url, url_str,
+                                                       strlen (url_str), task->task_pool);
+
+                                       if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
+                                               if (subject_url->protocol != PROTOCOL_MAILTO) {
+                                                       if (!g_tree_lookup (task->urls, subject_url)) {
+                                                               g_tree_insert (task->urls,
                                                                                 subject_url,
                                                                                 subject_url);
-                                                               }
                                                         }
                                                 }
-                                               else if (rc != URI_ERRNO_OK) {
-                                                       msg_info ("extract of url '%s' failed: %s",
+                                       }
+                                       else if (rc != URI_ERRNO_OK) {
+                                               msg_info ("extract of url '%s' failed: %s",
                                                                 url_str,
                                                                 rspamd_url_strerror (rc));
-                                               }
                                         }
                                 }
                         }
diff --git a/src/libserver/html.c b/src/libserver/html.c

index 7df9270c3b58a8aca03f28591f518a2145f078cb..fe90b7dc456b3e918e66731aa266305b2a0e7fca 100644 (file)
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -575,10 +575,7 @@ void
  decode_entitles (gchar *s, guint * len)
  {
         guint l, rep_len;
-       gchar *t = s;                           /* t - tortoise */
-       gchar *h = s;                           /* h - hare     */
-       gchar *e = s;
-       gchar *end_ptr;
+       gchar *t = s, *h = s, *e = s, *end_ptr;
         gint state = 0, val, base;
         entity *found, key;
  
@@ -735,45 +732,38 @@ check_phishing (struct rspamd_task *task,
         if (rspamd_url_find (task->task_pool, url_text, len, NULL, NULL, &url_str,
                 TRUE) && url_str != NULL) {
                 new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_url));
-               if (new != NULL) {
-                       g_strstrip (url_str);
-                       rc = rspamd_url_parse (new, url_str, task->task_pool);
+               g_strstrip (url_str);
+               rc = rspamd_url_parse (new, url_str, strlen (url_str), task->task_pool);
  
-                       if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc ==
-                               URI_ERRNO_NO_HOST_SLASH) {
-                               if (g_ascii_strncasecmp (href_url->host, new->host,
+               if (rc == URI_ERRNO_OK) {
+                       if (g_ascii_strncasecmp (href_url->host, new->host,
                                         MAX (href_url->hostlen, new->hostlen)) != 0) {
-                                       /* Special check for urls beginning with 'www' */
-                                       if (new->hostlen > 4 && href_url->hostlen > 4) {
-                                               p = new->host;
-                                               c = NULL;
-                                               if ((p[0] == 'w' || p[0] == 'W') &&
+                               /* Special check for urls beginning with 'www' */
+                               if (new->hostlen > 4 && href_url->hostlen > 4) {
+                                       p = new->host;
+                                       c = NULL;
+                                       if ((p[0] == 'w' || p[0] == 'W') &&
                                                         (p[1] == 'w' || p[1] == 'W') &&
                                                         (p[2] == 'w' || p[2] == 'W') &&
                                                         (p[3] == '.')) {
-                                                       p += 4;
-                                                       c = href_url->host;
-                                                       len = MAX (href_url->hostlen, new->hostlen - 4);
-                                               }
-                                               else {
-                                                       p = href_url->host;
-                                                       if ((p[0] == 'w' || p[0] == 'W') &&
+                                               p += 4;
+                                               c = href_url->host;
+                                               len = MAX (href_url->hostlen, new->hostlen - 4);
+                                       }
+                                       else {
+                                               p = href_url->host;
+                                               if ((p[0] == 'w' || p[0] == 'W') &&
                                                                 (p[1] == 'w' || p[1] == 'W') &&
                                                                 (p[2] == 'w' || p[2] == 'W') &&
                                                                 (p[3] == '.')) {
-                                                               p += 4;
-                                                               c = new->host;
-                                                               len = MAX (href_url->hostlen - 4, new->hostlen);
-                                                       }
-                                               }
-                                               /* Compare parts and check for phished hostname */
-                                               if (c != NULL) {
-                                                       if (g_ascii_strncasecmp (p, c, len) != 0) {
-                                                               href_url->is_phished = TRUE;
-                                                               href_url->phished_url = new;
-                                                       }
+                                                       p += 4;
+                                                       c = new->host;
+                                                       len = MAX (href_url->hostlen - 4, new->hostlen);
                                                 }
-                                               else {
+                                       }
+                                       /* Compare parts and check for phished hostname */
+                                       if (c != NULL) {
+                                               if (g_ascii_strncasecmp (p, c, len) != 0) {
                                                         href_url->is_phished = TRUE;
                                                         href_url->phished_url = new;
                                                 }
@@ -783,12 +773,16 @@ check_phishing (struct rspamd_task *task,
                                                 href_url->phished_url = new;
                                         }
                                 }
+                               else {
+                                       href_url->is_phished = TRUE;
+                                       href_url->phished_url = new;
+                               }
                         }
-                       else {
-                               msg_info ("extract of url '%s' failed: %s",
+               }
+               else {
+                       msg_info ("extract of url '%s' failed: %s",
                                         url_str,
                                         rspamd_url_strerror (rc));
-                       }
                 }
         }
  
@@ -871,8 +865,7 @@ parse_tag_url (struct rspamd_task *task,
  
                 url_text = rspamd_mempool_alloc (task->task_pool, len + 1);
                 rspamd_strlcpy (url_text, c, len + 1);
-               rspamd_url_unescape (url_text);
-               decode_entitles (url_text, NULL);
+               decode_entitles (url_text, &len);
  
                 if (g_ascii_strncasecmp (url_text, "http",
                         sizeof ("http") - 1) != 0 &&
@@ -882,14 +875,14 @@ parse_tag_url (struct rspamd_task *task,
                         sizeof ("ftp://") - 1) != 0 &&
                         g_ascii_strncasecmp (url_text, "mailto:",
                         sizeof ("mailto:") - 1) != 0) {
+
                         return;
                 }
  
                 url = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_url));
-               rc = rspamd_url_parse (url, url_text, task->task_pool);
+               rc = rspamd_url_parse (url, url_text, len, task->task_pool);
  
-               if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen !=
-                       0) {
+               if (rc != URI_ERRNO_EMPTY && url->hostlen != 0) {
                         /*
                          * Check for phishing
                          */
diff --git a/src/libserver/url.h b/src/libserver/url.h

index db3a3472c9013865fcacdc1d26d909e16e646a2a..f2aed42c6ab1f50375e3ad7ebfcfc9685f219700 100644 (file)
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -108,9 +108,4 @@ gboolean rspamd_url_find (rspamd_mempool_t *pool,
   */
  const gchar * rspamd_url_strerror (enum uri_errno err);
  
-/*
- * URL unescape characters in the specified string
- */
-void rspamd_url_unescape (gchar *s);
-
  #endif
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c

index 201a6df18bfdc1510ad85444021894be558a91ab..a958a277c4ec5c4f8a9cfcefd9a055f71be088d4 100644 (file)
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -868,10 +868,10 @@ redirector_callback (gint fd, short what, void *arg)
  {
         struct redirector_param *param = (struct redirector_param *)arg;
         gchar url_buf[512];
-       gint r;
+       gint r, urllen;
         struct timeval *timeout;
         struct rspamd_task *task;
-       gchar *p, *c;
+       gchar *p, *c, *urlstr;
         gboolean found = FALSE;
  
         task = param->task;
@@ -948,17 +948,21 @@ redirector_callback (gint fd, short what, void *arg)
                                                 break;
                                         }
                                 }
+
                                 if (found) {
                                         debug_task ("<%s> got reply from redirector: '%s' -> '%s'",
                                                 param->task->message_id,
                                                 struri (param->url),
                                                 c);
-                                       r =
-                                               rspamd_url_parse (param->url,
-                                                       rspamd_mempool_strdup (param->task->task_pool,
-                                                       c), param->task->task_pool);
-                                       if (r == URI_ERRNO_OK || r == URI_ERRNO_NO_SLASHES || r ==
-                                               URI_ERRNO_NO_HOST_SLASH) {
+
+                                       urllen = strlen (c);
+                                       urlstr = rspamd_mempool_alloc (param->task->task_pool,
+                                                       urllen + 1);
+                                       rspamd_strlcpy (urlstr, c, urllen + 1);
+                                       r = rspamd_url_parse (param->url, urlstr, urllen,
+                                                       param->task->task_pool);
+
+                                       if (r == URI_ERRNO_OK) {
                                                 make_surbl_requests (param->url,
                                                         param->task,
                                                         param->suffix,
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 3 Feb 2015 17:55:14 +0000 (17:55 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 17 Feb 2015 15:14:09 +0000 (15:14 +0000)
src/libmime/message.c		patch \| blob \| history
src/libserver/html.c		patch \| blob \| history
src/libserver/url.h		patch \| blob \| history
src/plugins/surbl.c		patch \| blob \| history