]> source.dussan.org Git - rspamd.git/commitdiff
Fix urls interaction.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 3 Feb 2015 17:55:14 +0000 (17:55 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 17 Feb 2015 15:14:09 +0000 (15:14 +0000)
src/libmime/message.c
src/libserver/html.c
src/libserver/url.h
src/plugins/surbl.c

index d0549cbd56ef63647a5bf25759a5912380bf8f7c..92d16e807ec5b6771e8f36f96240c43873007bdb 100644 (file)
@@ -44,7 +44,7 @@ strip_html_tags (struct rspamd_task *task,
        GByteArray * src,
        gint *stateptr)
 {
-       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart;
+       uint8_t *p, *rp, *tbegin = NULL, *end, c, lc, *estart = NULL;
        gint br, i = 0, depth = 0, in_q = 0;
        gint state = 0;
        guint dlen;
@@ -1639,25 +1639,22 @@ process_message (struct rspamd_task *task)
                                if (url_str != NULL) {
                                        subject_url = rspamd_mempool_alloc0 (task->task_pool,
                                                        sizeof (struct rspamd_url));
-                                       if (subject_url != NULL) {
-                                               /* Try to parse url */
-                                               rc = rspamd_url_parse (subject_url, url_str, task->task_pool);
-                                               if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES ||
-                                                       rc == URI_ERRNO_NO_HOST_SLASH) &&
-                                                       subject_url->hostlen > 0) {
-                                                       if (subject_url->protocol != PROTOCOL_MAILTO) {
-                                                               if (!g_tree_lookup (task->urls, subject_url)) {
-                                                                       g_tree_insert (task->urls,
+                                       rc = rspamd_url_parse (subject_url, url_str,
+                                                       strlen (url_str), task->task_pool);
+
+                                       if ((rc == URI_ERRNO_OK) && subject_url->hostlen > 0) {
+                                               if (subject_url->protocol != PROTOCOL_MAILTO) {
+                                                       if (!g_tree_lookup (task->urls, subject_url)) {
+                                                               g_tree_insert (task->urls,
                                                                                subject_url,
                                                                                subject_url);
-                                                               }
                                                        }
                                                }
-                                               else if (rc != URI_ERRNO_OK) {
-                                                       msg_info ("extract of url '%s' failed: %s",
+                                       }
+                                       else if (rc != URI_ERRNO_OK) {
+                                               msg_info ("extract of url '%s' failed: %s",
                                                                url_str,
                                                                rspamd_url_strerror (rc));
-                                               }
                                        }
                                }
                        }
index 7df9270c3b58a8aca03f28591f518a2145f078cb..fe90b7dc456b3e918e66731aa266305b2a0e7fca 100644 (file)
@@ -575,10 +575,7 @@ void
 decode_entitles (gchar *s, guint * len)
 {
        guint l, rep_len;
-       gchar *t = s;                           /* t - tortoise */
-       gchar *h = s;                           /* h - hare     */
-       gchar *e = s;
-       gchar *end_ptr;
+       gchar *t = s, *h = s, *e = s, *end_ptr;
        gint state = 0, val, base;
        entity *found, key;
 
@@ -735,45 +732,38 @@ check_phishing (struct rspamd_task *task,
        if (rspamd_url_find (task->task_pool, url_text, len, NULL, NULL, &url_str,
                TRUE) && url_str != NULL) {
                new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_url));
-               if (new != NULL) {
-                       g_strstrip (url_str);
-                       rc = rspamd_url_parse (new, url_str, task->task_pool);
+               g_strstrip (url_str);
+               rc = rspamd_url_parse (new, url_str, strlen (url_str), task->task_pool);
 
-                       if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc ==
-                               URI_ERRNO_NO_HOST_SLASH) {
-                               if (g_ascii_strncasecmp (href_url->host, new->host,
+               if (rc == URI_ERRNO_OK) {
+                       if (g_ascii_strncasecmp (href_url->host, new->host,
                                        MAX (href_url->hostlen, new->hostlen)) != 0) {
-                                       /* Special check for urls beginning with 'www' */
-                                       if (new->hostlen > 4 && href_url->hostlen > 4) {
-                                               p = new->host;
-                                               c = NULL;
-                                               if ((p[0] == 'w' || p[0] == 'W') &&
+                               /* Special check for urls beginning with 'www' */
+                               if (new->hostlen > 4 && href_url->hostlen > 4) {
+                                       p = new->host;
+                                       c = NULL;
+                                       if ((p[0] == 'w' || p[0] == 'W') &&
                                                        (p[1] == 'w' || p[1] == 'W') &&
                                                        (p[2] == 'w' || p[2] == 'W') &&
                                                        (p[3] == '.')) {
-                                                       p += 4;
-                                                       c = href_url->host;
-                                                       len = MAX (href_url->hostlen, new->hostlen - 4);
-                                               }
-                                               else {
-                                                       p = href_url->host;
-                                                       if ((p[0] == 'w' || p[0] == 'W') &&
+                                               p += 4;
+                                               c = href_url->host;
+                                               len = MAX (href_url->hostlen, new->hostlen - 4);
+                                       }
+                                       else {
+                                               p = href_url->host;
+                                               if ((p[0] == 'w' || p[0] == 'W') &&
                                                                (p[1] == 'w' || p[1] == 'W') &&
                                                                (p[2] == 'w' || p[2] == 'W') &&
                                                                (p[3] == '.')) {
-                                                               p += 4;
-                                                               c = new->host;
-                                                               len = MAX (href_url->hostlen - 4, new->hostlen);
-                                                       }
-                                               }
-                                               /* Compare parts and check for phished hostname */
-                                               if (c != NULL) {
-                                                       if (g_ascii_strncasecmp (p, c, len) != 0) {
-                                                               href_url->is_phished = TRUE;
-                                                               href_url->phished_url = new;
-                                                       }
+                                                       p += 4;
+                                                       c = new->host;
+                                                       len = MAX (href_url->hostlen - 4, new->hostlen);
                                                }
-                                               else {
+                                       }
+                                       /* Compare parts and check for phished hostname */
+                                       if (c != NULL) {
+                                               if (g_ascii_strncasecmp (p, c, len) != 0) {
                                                        href_url->is_phished = TRUE;
                                                        href_url->phished_url = new;
                                                }
@@ -783,12 +773,16 @@ check_phishing (struct rspamd_task *task,
                                                href_url->phished_url = new;
                                        }
                                }
+                               else {
+                                       href_url->is_phished = TRUE;
+                                       href_url->phished_url = new;
+                               }
                        }
-                       else {
-                               msg_info ("extract of url '%s' failed: %s",
+               }
+               else {
+                       msg_info ("extract of url '%s' failed: %s",
                                        url_str,
                                        rspamd_url_strerror (rc));
-                       }
                }
        }
 
@@ -871,8 +865,7 @@ parse_tag_url (struct rspamd_task *task,
 
                url_text = rspamd_mempool_alloc (task->task_pool, len + 1);
                rspamd_strlcpy (url_text, c, len + 1);
-               rspamd_url_unescape (url_text);
-               decode_entitles (url_text, NULL);
+               decode_entitles (url_text, &len);
 
                if (g_ascii_strncasecmp (url_text, "http",
                        sizeof ("http") - 1) != 0 &&
@@ -882,14 +875,14 @@ parse_tag_url (struct rspamd_task *task,
                        sizeof ("ftp://") - 1) != 0 &&
                        g_ascii_strncasecmp (url_text, "mailto:",
                        sizeof ("mailto:") - 1) != 0) {
+
                        return;
                }
 
                url = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_url));
-               rc = rspamd_url_parse (url, url_text, task->task_pool);
+               rc = rspamd_url_parse (url, url_text, len, task->task_pool);
 
-               if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen !=
-                       0) {
+               if (rc != URI_ERRNO_EMPTY && url->hostlen != 0) {
                        /*
                         * Check for phishing
                         */
index db3a3472c9013865fcacdc1d26d909e16e646a2a..f2aed42c6ab1f50375e3ad7ebfcfc9685f219700 100644 (file)
@@ -108,9 +108,4 @@ gboolean rspamd_url_find (rspamd_mempool_t *pool,
  */
 const gchar * rspamd_url_strerror (enum uri_errno err);
 
-/*
- * URL unescape characters in the specified string
- */
-void rspamd_url_unescape (gchar *s);
-
 #endif
index 201a6df18bfdc1510ad85444021894be558a91ab..a958a277c4ec5c4f8a9cfcefd9a055f71be088d4 100644 (file)
@@ -868,10 +868,10 @@ redirector_callback (gint fd, short what, void *arg)
 {
        struct redirector_param *param = (struct redirector_param *)arg;
        gchar url_buf[512];
-       gint r;
+       gint r, urllen;
        struct timeval *timeout;
        struct rspamd_task *task;
-       gchar *p, *c;
+       gchar *p, *c, *urlstr;
        gboolean found = FALSE;
 
        task = param->task;
@@ -948,17 +948,21 @@ redirector_callback (gint fd, short what, void *arg)
                                                break;
                                        }
                                }
+
                                if (found) {
                                        debug_task ("<%s> got reply from redirector: '%s' -> '%s'",
                                                param->task->message_id,
                                                struri (param->url),
                                                c);
-                                       r =
-                                               rspamd_url_parse (param->url,
-                                                       rspamd_mempool_strdup (param->task->task_pool,
-                                                       c), param->task->task_pool);
-                                       if (r == URI_ERRNO_OK || r == URI_ERRNO_NO_SLASHES || r ==
-                                               URI_ERRNO_NO_HOST_SLASH) {
+
+                                       urllen = strlen (c);
+                                       urlstr = rspamd_mempool_alloc (param->task->task_pool,
+                                                       urllen + 1);
+                                       rspamd_strlcpy (urlstr, c, urllen + 1);
+                                       r = rspamd_url_parse (param->url, urlstr, urllen,
+                                                       param->task->task_pool);
+
+                                       if (r == URI_ERRNO_OK) {
                                                make_surbl_requests (param->url,
                                                        param->task,
                                                        param->suffix,