aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-12 14:42:50 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-05-12 14:42:50 +0100
commit9dc7bf904bf62b332cfd5a0a99a195cef19149d7 (patch)
tree48a17e364fc16d6be5e9501f5639eea99b2774e9
parent7373423937593c5781bef430e4a17856ea3f75bc (diff)
downloadrspamd-9dc7bf904bf62b332cfd5a0a99a195cef19149d7.tar.gz
rspamd-9dc7bf904bf62b332cfd5a0a99a195cef19149d7.zip
[Fix] Try to fix false positive URL detections in text parts
-rw-r--r--src/libmime/message.c5
-rw-r--r--src/libserver/url.c9
-rw-r--r--test/lua/unit/url.lua4
3 files changed, 10 insertions, 8 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index dbc9921d9..791bd6837 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1268,7 +1268,6 @@ process_text_part (struct rspamd_task *task,
type,
text_part);
text_part->orig = part_content;
- rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
g_ptr_array_add (task->text_parts, text_part);
}
else {
@@ -1310,6 +1309,10 @@ process_text_part (struct rspamd_task *task,
c = p + 1;
}
}
+
+ if (!IS_PART_HTML (text_part)) {
+ rspamd_url_text_extract (task->task_pool, task, text_part, FALSE);
+ }
}
struct mime_foreach_data {
diff --git a/src/libserver/url.c b/src/libserver/url.c
index fe70585be..70a5f3c9b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1795,10 +1795,9 @@ url_tld_end (struct url_callback_data *cb,
{
const gchar *p;
- /* A url must be finished by tld, so it must be followed by space character */
p = pos + match->m_len;
- if (p == cb->end || g_ascii_isspace (*p) || *p == ',') {
+ if (p == cb->end) {
match->m_len = p - match->m_begin;
return TRUE;
}
@@ -2302,7 +2301,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
{
struct rspamd_url_mimepart_cbdata mcbd;
- if (part->content == NULL || part->content->len == 0) {
+ if (part->stripped_content == NULL || part->stripped_content->len == 0) {
msg_warn_task ("got empty text part");
return;
}
@@ -2310,8 +2309,8 @@ rspamd_url_text_extract (rspamd_mempool_t *pool,
mcbd.task = task;
mcbd.part = part;
- rspamd_url_find_multiple (task->task_pool, part->content->data,
- part->content->len, is_html,
+ rspamd_url_find_multiple (task->task_pool, part->stripped_content->data,
+ part->stripped_content->len, is_html,
rspamd_url_text_part_callback, &mcbd);
/* Handle offsets of this part */
diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua
index 06082afe0..de274425d 100644
--- a/test/lua/unit/url.lua
+++ b/test/lua/unit/url.lua
@@ -17,8 +17,8 @@ context("URL check functions", function()
test("Extract urls from text", function()
local pool = mpool.create()
local cases = {
- {"test.com text", {"test.com", nil}},
- {" test.com text", {"test.com", nil}},
+ {"test.com", {"test.com", nil}},
+ {" test.com", {"test.com", nil}},
{"<test.com> text", {"test.com", nil}},
{"test.com. text", {"test.com", nil}},
{"mailto:A.User@example.com text", {"example.com", "A.User"}},