From c2c0d9f0847ed304bbeba3544c7744b6b4ec6bab Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 6 Mar 2017 13:44:13 +0000 Subject: [PATCH] [Fix] Distinguish missing and broken mandatory headers --- src/libmime/content_type.h | 1 + src/libmime/message.h | 3 ++- src/libmime/mime_parser.c | 22 ++++++++++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h index 25283d365..57fe0e6a3 100644 --- a/src/libmime/content_type.h +++ b/src/libmime/content_type.h @@ -27,6 +27,7 @@ enum rspamd_content_type_flags { RSPAMD_CONTENT_TYPE_TEXT = 1 << 2, RSPAMD_CONTENT_TYPE_MESSAGE = 1 << 3, RSPAMD_CONTENT_TYPE_DSN = 1 << 4, + RSPAMD_CONTENT_TYPE_MISSING = 1 << 5, }; #define IS_CT_MULTIPART(ct) ((ct)->flags & RSPAMD_CONTENT_TYPE_MULTIPART) diff --git a/src/libmime/message.h b/src/libmime/message.h index 15fcfcccc..6004d1759 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -24,7 +24,8 @@ enum rspamd_mime_part_flags { RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1), RSPAMD_MIME_PART_IMAGE = (1 << 2), RSPAMD_MIME_PART_ARCHIVE = (1 << 3), - RSPAMD_MIME_PART_BAD_CTE = (1 << 4) + RSPAMD_MIME_PART_BAD_CTE = (1 << 4), + RSPAMD_MIME_PART_MISSING_CTE = (1 << 5) }; enum rspamd_cte { diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 54c490109..b16983287 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -243,7 +243,9 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, struct rspamd_mime_part *par if (hdrs == NULL) { part->cte = rspamd_mime_part_get_cte_heuristic (task, part); - part->flags |= RSPAMD_MIME_PART_BAD_CTE; + msg_info_task ("detected missing CTE for part as: %s", + rspamd_cte_to_string (part->cte)); + part->flags |= RSPAMD_MIME_PART_MISSING_CTE; } else { for (i = 0; i < hdrs->len; i ++) { @@ -262,6 +264,9 @@ rspamd_mime_part_get_cte (struct rspamd_task *task, struct rspamd_mime_part *par if (part->cte == RSPAMD_CTE_UNKNOWN) { part->cte = rspamd_mime_part_get_cte_heuristic (task, part); + + msg_info_task ("corrected bad CTE for part to: %s", + rspamd_cte_to_string (part->cte)); } else if (part->cte == RSPAMD_CTE_B64 || part->cte == RSPAMD_CTE_QP) { /* Additionally check sanity */ @@ -357,6 +362,19 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, case RSPAMD_CTE_7BIT: case RSPAMD_CTE_8BIT: case RSPAMD_CTE_UNKNOWN: + if (part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING) { + if (part->cte != RSPAMD_CTE_7BIT) { + /* We have something that has a missing content-type, + * but it has non-7bit characters. + * + * In theory, it is very unsafe to process it as a text part + * as we unlikely get some sane result + */ + part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT; + part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; + } + } + if (IS_CT_TEXT (part->ct)) { /* Need to copy text as we have couple of in-place change functions */ parsed = rspamd_fstring_sized_new (part->raw_data.len); @@ -1059,7 +1077,7 @@ rspamd_mime_parse_message (struct rspamd_task *task, /* For messages we automatically assume plaintext */ msg_info_task ("cannot find content-type for a message, assume text/plain"); sel = rspamd_mempool_alloc0 (task->task_pool, sizeof (*sel)); - sel->flags = RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_BROKEN; + sel->flags = RSPAMD_CONTENT_TYPE_TEXT|RSPAMD_CONTENT_TYPE_MISSING; RSPAMD_FTOK_ASSIGN (&sel->type, "text"); RSPAMD_FTOK_ASSIGN (&sel->subtype, "plain"); } -- 2.39.5