]> source.dussan.org Git - rspamd.git/commitdiff
[Feature] Try to detect HTML messages better
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 3 Feb 2018 13:47:52 +0000 (13:47 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Sat, 3 Feb 2018 15:27:11 +0000 (15:27 +0000)
src/libmime/message.c

index 49cbc585c5933bef984d14c71252351a5484b4ac..c92a1d26e148c4e1b69f565a2504708f32ee4758 100644 (file)
@@ -646,7 +646,23 @@ rspamd_message_process_text_part (struct rspamd_task *task,
                        found_html = TRUE;
                }
                else {
-                       found_txt = TRUE;
+                       /*
+                        * We also need to apply heuristic for text parts that are actually
+                        * HTML.
+                        */
+                       RSPAMD_FTOK_ASSIGN (&html_tok, "<!DOCTYPE html");
+                       RSPAMD_FTOK_ASSIGN (&xhtml_tok, "<html");
+
+                       if (rspamd_lc_cmp (mime_part->parsed_data.begin, html_tok.begin,
+                                       MIN (html_tok.len, mime_part->parsed_data.len)) == 0 ||
+                                       rspamd_lc_cmp (mime_part->parsed_data.begin, xhtml_tok.begin,
+                                                       MIN (xhtml_tok.len, mime_part->parsed_data.len)) == 0) {
+                               msg_info_task ("found html part pretending to be text/plain part");
+                               found_html = TRUE;
+                       }
+                       else {
+                               found_txt = TRUE;
+                       }
                }
        }
        else {