diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-12 14:42:50 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-05-12 14:42:50 +0100 |
commit | 9dc7bf904bf62b332cfd5a0a99a195cef19149d7 (patch) | |
tree | 48a17e364fc16d6be5e9501f5639eea99b2774e9 | |
parent | 7373423937593c5781bef430e4a17856ea3f75bc (diff) | |
download | rspamd-9dc7bf904bf62b332cfd5a0a99a195cef19149d7.tar.gz rspamd-9dc7bf904bf62b332cfd5a0a99a195cef19149d7.zip |
[Fix] Try to fix false positive URL detections in text parts
-rw-r--r-- | src/libmime/message.c | 5 | ||||
-rw-r--r-- | src/libserver/url.c | 9 | ||||
-rw-r--r-- | test/lua/unit/url.lua | 4 |
3 files changed, 10 insertions, 8 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index dbc9921d9..791bd6837 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1268,7 +1268,6 @@ process_text_part (struct rspamd_task *task, type, text_part); text_part->orig = part_content; - rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); g_ptr_array_add (task->text_parts, text_part); } else { @@ -1310,6 +1309,10 @@ process_text_part (struct rspamd_task *task, c = p + 1; } } + + if (!IS_PART_HTML (text_part)) { + rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); + } } struct mime_foreach_data { diff --git a/src/libserver/url.c b/src/libserver/url.c index fe70585be..70a5f3c9b 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb, { const gchar *p; - /* A url must be finished by tld, so it must be followed by space character */ p = pos + match->m_len; - if (p == cb->end || g_ascii_isspace (*p) || *p == ',') { + if (p == cb->end) { match->m_len = p - match->m_begin; return TRUE; } @@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, { struct rspamd_url_mimepart_cbdata mcbd; - if (part->content == NULL || part->content->len == 0) { + if (part->stripped_content == NULL || part->stripped_content->len == 0) { msg_warn_task ("got empty text part"); return; } @@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, mcbd.task = task; mcbd.part = part; - rspamd_url_find_multiple (task->task_pool, part->content->data, - part->content->len, is_html, + rspamd_url_find_multiple (task->task_pool, part->stripped_content->data, + part->stripped_content->len, is_html, rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua index 06082afe0..de274425d 100644 --- a/test/lua/unit/url.lua +++ b/test/lua/unit/url.lua @@ -17,8 +17,8 @@ context("URL check functions", function() test("Extract urls from text", function() local pool = mpool.create() local cases = { - {"test.com text", {"test.com", nil}}, - {" test.com text", {"test.com", nil}}, + {"test.com", {"test.com", nil}}, + {" test.com", {"test.com", nil}}, {"<test.com> text", {"test.com", nil}}, {"test.com. text", {"test.com", nil}}, {"mailto:A.User@example.com text", {"example.com", "A.User"}}, |