diff options
-rw-r--r-- | src/libmime/message.c | 45 |
1 files changed, 41 insertions, 4 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index c45550e6d..f167730d4 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -859,12 +859,49 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, rspamd_normalize_text_part (task, text_part); if (!IS_PART_HTML (text_part)) { - rspamd_url_text_extract (task->task_pool, task, text_part, - RSPAMD_URL_FIND_ALL); + if (mime_part->parent_part) { + struct rspamd_mime_part *parent = mime_part->parent_part; + + if (IS_PART_MULTIPART (parent) && parent->specific.mp->children->len == 2) { + /* + * Use strict extraction mode: we will extract missing urls from + * an html part if needed + */ + rspamd_url_text_extract (task->task_pool, task, text_part, + RSPAMD_URL_FIND_STRICT); + } + else { + /* + * Fall back to full text extraction using TLD patterns + */ + rspamd_url_text_extract (task->task_pool, task, text_part, + RSPAMD_URL_FIND_ALL); + } + } + else { + /* + * Fall back to full text extraction using TLD patterns + */ + rspamd_url_text_extract (task->task_pool, task, text_part, + RSPAMD_URL_FIND_ALL); + } } else { - rspamd_url_text_extract (task->task_pool, task, text_part, - RSPAMD_URL_FIND_STRICT); + if (mime_part->parent_part) { + struct rspamd_mime_part *parent = mime_part->parent_part; + + if (IS_PART_MULTIPART (parent) && parent->specific.mp->children->len == 2) { + /* Do not extract urls from HTML at all */ + } + else { + rspamd_url_text_extract (task->task_pool, task, text_part, + RSPAMD_URL_FIND_STRICT); + } + } + else { + rspamd_url_text_extract (task->task_pool, task, text_part, + RSPAMD_URL_FIND_STRICT); + } } if (text_part->exceptions) { |