diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-02-03 13:47:52 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-02-03 15:27:11 +0000 |
commit | 85631ea2677e0a95679c38b3103af03fff161d9c (patch) | |
tree | 7e3f768d4b19d029c16454bc7ffa91dabb0d1fb1 | |
parent | f99dab02f8dfd97ce81a3827e2f17df6c24b9d1a (diff) | |
download | rspamd-85631ea2677e0a95679c38b3103af03fff161d9c.tar.gz rspamd-85631ea2677e0a95679c38b3103af03fff161d9c.zip |
[Feature] Try to detect HTML messages better
-rw-r--r-- | src/libmime/message.c | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index 49cbc585c..c92a1d26e 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -646,7 +646,23 @@ rspamd_message_process_text_part (struct rspamd_task *task, found_html = TRUE; } else { - found_txt = TRUE; + /* + * We also need to apply heuristic for text parts that are actually + * HTML. + */ + RSPAMD_FTOK_ASSIGN (&html_tok, "<!DOCTYPE html"); + RSPAMD_FTOK_ASSIGN (&xhtml_tok, "<html"); + + if (rspamd_lc_cmp (mime_part->parsed_data.begin, html_tok.begin, + MIN (html_tok.len, mime_part->parsed_data.len)) == 0 || + rspamd_lc_cmp (mime_part->parsed_data.begin, xhtml_tok.begin, + MIN (xhtml_tok.len, mime_part->parsed_data.len)) == 0) { + msg_info_task ("found html part pretending to be text/plain part"); + found_html = TRUE; + } + else { + found_txt = TRUE; + } } } else { |