From 85631ea2677e0a95679c38b3103af03fff161d9c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sat, 3 Feb 2018 13:47:52 +0000 Subject: [PATCH] [Feature] Try to detect HTML messages better --- src/libmime/message.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 49cbc585c..c92a1d26e 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -646,7 +646,23 @@ rspamd_message_process_text_part (struct rspamd_task *task, found_html = TRUE; } else { - found_txt = TRUE; + /* + * We also need to apply heuristic for text parts that are actually + * HTML. + */ + RSPAMD_FTOK_ASSIGN (&html_tok, "parsed_data.begin, html_tok.begin, + MIN (html_tok.len, mime_part->parsed_data.len)) == 0 || + rspamd_lc_cmp (mime_part->parsed_data.begin, xhtml_tok.begin, + MIN (xhtml_tok.len, mime_part->parsed_data.len)) == 0) { + msg_info_task ("found html part pretending to be text/plain part"); + found_html = TRUE; + } + else { + found_txt = TRUE; + } } } else { -- 2.39.5