From 1211bc8499a332d71fc7d874c07acdf9f4d5917a Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 12 May 2016 14:42:50 +0100 Subject: [PATCH] [Fix] Try to fix false positive URL detections in text parts --- src/libmime/message.c | 5 ++++- src/libserver/url.c | 9 ++++----- test/lua/unit/url.lua | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 7656547d1..7dc35f8f3 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1261,7 +1261,6 @@ process_text_part (struct rspamd_task *task, type, text_part); text_part->orig = part_content; - rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); g_ptr_array_add (task->text_parts, text_part); } else { @@ -1303,6 +1302,10 @@ process_text_part (struct rspamd_task *task, c = p + 1; } } + + if (!IS_PART_HTML (text_part)) { + rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); + } } struct mime_foreach_data { diff --git a/src/libserver/url.c b/src/libserver/url.c index fb725ed18..419241188 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb, { const gchar *p; - /* A url must be finished by tld, so it must be followed by space character */ p = pos + match->m_len; - if (p == cb->end || g_ascii_isspace (*p) || *p == ',') { + if (p == cb->end) { match->m_len = p - match->m_begin; return TRUE; } @@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, { struct rspamd_url_mimepart_cbdata mcbd; - if (part->content == NULL || part->content->len == 0) { + if (part->stripped_content == NULL || part->stripped_content->len == 0) { msg_warn_task ("got empty text part"); return; } @@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, mcbd.task = task; mcbd.part = part; - rspamd_url_find_multiple (task->task_pool, part->content->data, - part->content->len, is_html, + rspamd_url_find_multiple (task->task_pool, part->stripped_content->data, + part->stripped_content->len, is_html, rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 06082afe0..de274425d 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -17,8 +17,8 @@ context("URL check functions", function() test("Extract urls from text", function() local pool = mpool.create() local cases = { - {"test.com text", {"test.com", nil}}, - {" test.com text", {"test.com", nil}}, + {"test.com", {"test.com", nil}}, + {" test.com", {"test.com", nil}}, {" text", {"test.com", nil}}, {"test.com. text", {"test.com", nil}}, {"mailto:A.User@example.com text", {"example.com", "A.User"}}, -- 2.39.5