From 1912eac2d678b2993b4ef1fa41e36ca7a38e8239 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 11 Dec 2018 12:01:52 +0000 Subject: [PATCH] [Feature] Core: Add libmagic detection for all parts --- src/libmime/archives.c | 4 +- src/libmime/message.c | 51 +++++++++++++++++++------ src/libmime/message.h | 1 + src/lua/lua_mimepart.c | 87 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 113 insertions(+), 30 deletions(-) diff --git a/src/libmime/archives.c b/src/libmime/archives.c index 9cfce6968..1f9a5c634 100644 --- a/src/libmime/archives.c +++ b/src/libmime/archives.c @@ -1509,8 +1509,8 @@ rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str, } if (magic_start != NULL) { - if (part->parsed_data.len > magic_len && memcmp (part->parsed_data.begin, - magic_start, magic_len) == 0) { + if (part->parsed_data.len > magic_len && + memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) { return TRUE; } } diff --git a/src/libmime/message.c b/src/libmime/message.c index a5faaf017..bbae5e426 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -703,7 +703,7 @@ rspamd_message_process_html_text_part (struct rspamd_task *task, return TRUE; } -static void +static gboolean rspamd_message_process_text_part_maybe (struct rspamd_task *task, struct rspamd_mime_part *mime_part) { @@ -812,11 +812,11 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT && (task->cfg && !task->cfg->check_text_attachements)) { debug_task ("skip attachments for checking as text parts"); - return; + return TRUE; } else if (!(found_txt || found_html)) { /* Not a text part */ - return; + return FALSE; } text_part = rspamd_mempool_alloc0 (task->task_pool, @@ -830,12 +830,12 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, if (found_html) { if (!rspamd_message_process_html_text_part (task, text_part)) { - return; + return FALSE; } } else { if (!rspamd_message_process_plain_text_part (task, text_part)) { - return; + return FALSE; } } @@ -866,7 +866,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, rspamd_task_insert_result (task, GTUBE_SYMBOL, 0, NULL); - return; + return TRUE; } /* Post process part */ @@ -885,6 +885,8 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, } rspamd_mime_part_create_words (task, text_part); + + return TRUE; } /* Creates message from various data using libmagic to detect type */ @@ -900,15 +902,18 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, g_assert (start != NULL); + part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part)); + tok = rspamd_task_get_request_header (task, "Content-Type"); if (tok) { /* We have Content-Type defined */ ct = rspamd_content_type_parse (tok->begin, tok->len, task->task_pool); + part->ct = ct; } - else if (task->cfg && task->cfg->libs_ctx) { - /* Try to predict it by content (slow) */ + + if (task->cfg && task->cfg->libs_ctx) { mb = magic_buffer (task->cfg->libs_ctx->libmagic, start, len); @@ -918,12 +923,16 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start, srch.len = strlen (mb); ct = rspamd_content_type_parse (srch.begin, srch.len, task->task_pool); + msg_warn_task ("construct fake mime of type: %s", mb); + + if (!part->ct) { + part->ct = ct; + } + + part->detected_ct = ct; } } - msg_warn_task ("construct fake mime of type: %s", mb); - part = rspamd_mempool_alloc0 (task->task_pool, sizeof (*part)); - part->ct = ct; part->raw_data.begin = start; part->raw_data.len = len; part->parsed_data.begin = start; @@ -1189,7 +1198,25 @@ rspamd_message_process (struct rspamd_task *task) struct rspamd_mime_part *part; part = g_ptr_array_index (task->parts, i); - rspamd_message_process_text_part_maybe (task, part); + + + if (!rspamd_message_process_text_part_maybe (task, part) && + part->parsed_data.len > 0) { + const gchar *mb = magic_buffer (task->cfg->libs_ctx->libmagic, + part->parsed_data.begin, + part->parsed_data.len); + + if (mb) { + rspamd_ftok_t srch; + + srch.begin = mb; + srch.len = strlen (mb); + part->detected_ct = rspamd_content_type_parse (srch.begin, + srch.len, + task->task_pool); + } + + } } rspamd_images_process (task); diff --git a/src/libmime/message.h b/src/libmime/message.h index 29f777c3b..25c88cc3a 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -47,6 +47,7 @@ struct rspamd_mime_multipart { struct rspamd_mime_part { struct rspamd_content_type *ct; + struct rspamd_content_type *detected_ct; struct rspamd_content_disposition *cd; rspamd_ftok_t raw_data; rspamd_ftok_t parsed_data; diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index 3617a145b..3019cf577 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -333,6 +333,20 @@ LUA_FUNCTION_DEF (mimepart, get_type); */ LUA_FUNCTION_DEF (mimepart, get_type_full); +/*** + * @method mime_part:get_detected_type() + * Extract content-type string of the mime part. Use libmagic detection + * @return {string,string} content type in form 'type','subtype' + */ +LUA_FUNCTION_DEF (mimepart, get_detected_type); + +/*** + * @method mime_part:get_detected_type_full() + * Extract content-type string of the mime part with all attributes. Use libmagic detection + * @return {string,string,table} content type in form 'type','subtype', {attrs} + */ +LUA_FUNCTION_DEF (mimepart, get_detected_type_full); + /*** * @method mime_part:get_cte() * Extract content-transfer-encoding for a part @@ -457,6 +471,8 @@ static const struct luaL_reg mimepartlib_m[] = { LUA_INTERFACE_DEF (mimepart, get_length), LUA_INTERFACE_DEF (mimepart, get_type), LUA_INTERFACE_DEF (mimepart, get_type_full), + LUA_INTERFACE_DEF (mimepart, get_detected_type), + LUA_INTERFACE_DEF (mimepart, get_detected_type_full), LUA_INTERFACE_DEF (mimepart, get_cte), LUA_INTERFACE_DEF (mimepart, get_filename), LUA_INTERFACE_DEF (mimepart, get_header), @@ -1189,48 +1205,49 @@ lua_mimepart_get_length (lua_State * L) } static gint -lua_mimepart_get_type_common (lua_State * L, gboolean full) +lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct, + gboolean full) { - struct rspamd_mime_part *part = lua_check_mimepart (L); + GHashTableIter it; gpointer k, v; struct rspamd_content_type_param *param; - if (part == NULL) { + if (ct == NULL) { lua_pushnil (L); lua_pushnil (L); return 2; } - lua_pushlstring (L, part->ct->type.begin, part->ct->type.len); - lua_pushlstring (L, part->ct->subtype.begin, part->ct->subtype.len); + lua_pushlstring (L, ct->type.begin, ct->type.len); + lua_pushlstring (L, ct->subtype.begin, ct->subtype.len); if (!full) { return 2; } - lua_createtable (L, 0, 2 + (part->ct->attrs ? - g_hash_table_size (part->ct->attrs) : 0)); + lua_createtable (L, 0, 2 + (ct->attrs ? + g_hash_table_size (ct->attrs) : 0)); - if (part->ct->charset.len > 0) { + if (ct->charset.len > 0) { lua_pushstring (L, "charset"); - lua_pushlstring (L, part->ct->charset.begin, part->ct->charset.len); + lua_pushlstring (L, ct->charset.begin, ct->charset.len); lua_settable (L, -3); } - if (part->ct->boundary.len > 0) { + if (ct->boundary.len > 0) { lua_pushstring (L, "charset"); - lua_pushlstring (L, part->ct->boundary.begin, part->ct->boundary.len); + lua_pushlstring (L, ct->boundary.begin, ct->boundary.len); lua_settable (L, -3); } - if (part->ct->attrs) { - g_hash_table_iter_init (&it, part->ct->attrs); + if (ct->attrs) { + g_hash_table_iter_init (&it, ct->attrs); while (g_hash_table_iter_next (&it, &k, &v)) { param = v; - if (param->name.len > 0 && param->name.len > 0) { + if (param->name.len > 0 && param->value.len > 0) { /* TODO: think about multiple values here */ lua_pushlstring (L, param->name.begin, param->name.len); lua_pushlstring (L, param->value.begin, param->value.len); @@ -1246,14 +1263,52 @@ static gint lua_mimepart_get_type (lua_State * L) { LUA_TRACE_POINT; - return lua_mimepart_get_type_common (L, FALSE); + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->ct, FALSE); } static gint lua_mimepart_get_type_full (lua_State * L) { LUA_TRACE_POINT; - return lua_mimepart_get_type_common (L, TRUE); + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->ct, TRUE); +} + +static gint +lua_mimepart_get_detected_type (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->detected_ct, FALSE); +} + +static gint +lua_mimepart_get_detected_type_full (lua_State * L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart (L); + + if (part == NULL) { + return luaL_error (L, "invalid arguments"); + } + + return lua_mimepart_get_type_common (L, part->detected_ct, TRUE); } static gint -- 2.39.5