]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Try to fix false positive URL detections in text parts
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Thu, 12 May 2016 13:42:50 +0000 (14:42 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 16 May 2016 17:19:48 +0000 (19:19 +0200)
src/libmime/message.c
src/libserver/url.c
test/lua/unit/url.lua

index 7656547d1b2e708d0b552e98f74cf91c43ca74fa..7dc35f8f33c97dc1f7838b47f41deb45200424a7 100644 (file)
@@ -1261,7 +1261,6 @@ process_text_part (struct rspamd_task *task,
                                type,
                                text_part);
                text_part->orig = part_content;
-               rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
                g_ptr_array_add (task->text_parts, text_part);
        }
        else {
@@ -1303,6 +1302,10 @@ process_text_part (struct rspamd_task *task,
                        c = p + 1;
                }
        }
+
+       if (!IS_PART_HTML (text_part)) {
+               rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
+       }
 }
 
 struct mime_foreach_data {
index fb725ed18520672151e74812956bb3c793693b8b..4192411884e4d1a480f36c69a58c9165bed2db5b 100644 (file)
@@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb,
 {
        const gchar *p;
 
-       /* A url must be finished by tld, so it must be followed by space character */
        p = pos + match->m_len;
 
-       if (p == cb->end || g_ascii_isspace (*p) || *p == ',') {
+       if (p == cb->end) {
                match->m_len = p - match->m_begin;
                return TRUE;
        }
@@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
 {
        struct rspamd_url_mimepart_cbdata mcbd;
 
-       if (part->content == NULL || part->content->len == 0) {
+       if (part->stripped_content == NULL || part->stripped_content->len == 0) {
                msg_warn_task ("got empty text part");
                return;
        }
@@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
        mcbd.task = task;
        mcbd.part = part;
 
-       rspamd_url_find_multiple (task->task_pool, part->content->data,
-                       part->content->len, is_html,
+       rspamd_url_find_multiple (task->task_pool, part->stripped_content->data,
+                       part->stripped_content->len, is_html,
                        rspamd_url_text_part_callback, &mcbd);
 
        /* Handle offsets of this part */
index 06082afe0036832045819788de0c4d20740339cf..de274425d6e08fa96600b5d04a1c7d54e9b4f454 100644 (file)
@@ -17,8 +17,8 @@ context("URL check functions", function()
   test("Extract urls from text", function()
     local pool = mpool.create()
     local cases = {
-      {"test.com text", {"test.com", nil}},
-      {" test.com text", {"test.com", nil}},
+      {"test.com", {"test.com", nil}},
+      {" test.com", {"test.com", nil}},
       {"<test.com> text", {"test.com", nil}},
       {"test.com. text", {"test.com", nil}},
       {"mailto:A.User@example.com text", {"example.com", "A.User"}},