aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-02-03 13:47:52 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-02-03 15:27:11 +0000
commit85631ea2677e0a95679c38b3103af03fff161d9c (patch)
tree7e3f768d4b19d029c16454bc7ffa91dabb0d1fb1
parentf99dab02f8dfd97ce81a3827e2f17df6c24b9d1a (diff)
downloadrspamd-85631ea2677e0a95679c38b3103af03fff161d9c.tar.gz
rspamd-85631ea2677e0a95679c38b3103af03fff161d9c.zip
[Feature] Try to detect HTML messages better
-rw-r--r--src/libmime/message.c18
1 files changed, 17 insertions, 1 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 49cbc585c..c92a1d26e 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -646,7 +646,23 @@ rspamd_message_process_text_part (struct rspamd_task *task,
found_html = TRUE;
}
else {
- found_txt = TRUE;
+ /*
+ * We also need to apply heuristic for text parts that are actually
+ * HTML.
+ */
+ RSPAMD_FTOK_ASSIGN (&html_tok, "<!DOCTYPE html");
+ RSPAMD_FTOK_ASSIGN (&xhtml_tok, "<html");
+
+ if (rspamd_lc_cmp (mime_part->parsed_data.begin, html_tok.begin,
+ MIN (html_tok.len, mime_part->parsed_data.len)) == 0 ||
+ rspamd_lc_cmp (mime_part->parsed_data.begin, xhtml_tok.begin,
+ MIN (xhtml_tok.len, mime_part->parsed_data.len)) == 0) {
+ msg_info_task ("found html part pretending to be text/plain part");
+ found_html = TRUE;
+ }
+ else {
+ found_txt = TRUE;
+ }
}
}
else {