From 569c024acdb4a7eaff33cdaaefc1ae25764c2ae2 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 31 Oct 2016 15:08:16 +0000 Subject: [PATCH] [CritFix] Introduce raw content to text parts Previously we had the following types of text content: - `orig`: MIME *decoded* content - `content`: same as `orig` but converted to utf8 and without HTML tags - `stripped`: same as `content` but without newlines Now we add `raw`: - `raw`: raw mime content as it was in an original message This also fixes R_BAD_CTE_7BIT rule --- rules/regexp/headers.lua | 2 +- src/libmime/message.c | 23 ++++++++++++++++++----- src/libmime/message.h | 1 + src/libserver/re_cache.c | 4 ++-- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua index 085994037..6b43c2f05 100644 --- a/rules/regexp/headers.lua +++ b/rules/regexp/headers.lua @@ -63,7 +63,7 @@ local r_ctype_text = 'content_type_is_type(text)' -- Content transfer encoding is 7bit local r_cte_7bit = 'compare_transfer_encoding(7bit)' -- And body contains 8bit characters -local r_body_8bit = '/[^\\x01-\\x7f]/Pr' +local r_body_8bit = '/[^\\x01-\\x7f]/Qr' reconf['R_BAD_CTE_7BIT'] = { re = string.format('(%s) & (%s) & (%s)', r_ctype_text, r_cte_7bit, r_body_8bit), score = 3.0, diff --git a/src/libmime/message.c b/src/libmime/message.c index 5b14c6f9e..7fc22d1ef 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -946,6 +946,7 @@ process_text_part (struct rspamd_task *task, { struct rspamd_mime_text_part *text_part; const gchar *cd; + GMimeStream *st; /* Skip attachments */ #ifndef GMIME24 @@ -1073,6 +1074,19 @@ process_text_part (struct rspamd_task *task, } rspamd_extract_words (task, text_part); + + if (!(text_part->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)) { + text_part->raw = g_byte_array_sized_new (part_content->len * 1.5 + 0.5); + st = g_mime_stream_mem_new_with_byte_array (text_part->raw); + g_mime_object_write_to_stream (GMIME_OBJECT (mime_part->mime), st); + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (st), FALSE); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t) free_byte_array_callback, + text_part->raw); + } + else { + text_part->raw = NULL; + } } struct mime_foreach_data { @@ -1234,13 +1248,12 @@ mime_foreach_callback (GMimeObject * part, gpointer user_data) if (wrapper != NULL) { #endif part_stream = g_mime_stream_mem_new (); - if (g_mime_data_wrapper_write_to_stream (wrapper, - part_stream) != -1) { + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { - g_mime_stream_mem_set_owner (GMIME_STREAM_MEM ( - part_stream), FALSE); + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), + FALSE); part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM ( - part_stream)); + part_stream)); g_object_unref (part_stream); mime_part = rspamd_mempool_alloc0 (task->task_pool, diff --git a/src/libmime/message.h b/src/libmime/message.h index e38420dbf..8003d073c 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -53,6 +53,7 @@ struct rspamd_mime_text_part { const gchar *lang_code; const gchar *language; const gchar *real_charset; + GByteArray *raw; /**< undecoded mime part */ GByteArray *orig; GByteArray *content; GByteArray *stripped_content; /**< no newlines or html tags */ diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 955aa91e0..248e7ace6 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -875,8 +875,8 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, } /* Select data for regexp */ if (re_class->type == RSPAMD_RE_RAWMIME) { - in = part->orig->data; - len = part->orig->len; + in = part->raw->data; + len = part->raw->len; raw = TRUE; } else { -- 2.39.5