From: Vsevolod Stakhov Date: Mon, 16 Aug 2021 10:19:40 +0000 (+0100) Subject: [Fix] Add a special logic for text part with no text extraction X-Git-Tag: 3.0~5 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=229cf1c94b1879d92028735d46da3a9021f97c29;p=rspamd.git [Fix] Add a special logic for text part with no text extraction --- diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index f082a53e5..09c93effc 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -287,6 +287,7 @@ local types = { xml = { ct = 'application/xml', type = 'text', + no_text = true, }, txt = { type = 'text', @@ -302,16 +303,19 @@ local types = { type = 'text', ct = 'text/csv', av_check = false, + no_text = true, }, ics = { type = 'text', ct = 'text/calendar', av_check = false, + no_text = true, }, vcf = { type = 'text', ct = 'text/vcard', av_check = false, + no_text = true, }, eml = { type = 'message', diff --git a/src/libmime/message.c b/src/libmime/message.c index d788844a3..e6fc5be94 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1446,6 +1446,19 @@ rspamd_message_process (struct rspamd_task *task) part->detected_type = rspamd_mempool_strdup (task->task_pool, lua_tostring (L, -1)); } + + lua_pop (L, 1); + + lua_pushstring (L, "no_text"); + lua_gettable (L, -2); + + if (lua_isboolean (L, -1)) { + if (!!lua_toboolean (L, -1)) { + part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION; + } + } + + lua_pop (L, 1); } } @@ -1479,7 +1492,8 @@ rspamd_message_process (struct rspamd_task *task) rspamd_images_process_mime_part_maybe (task, part); /* Still no content detected, try text heuristic */ - if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) { + if (part->part_type == RSPAMD_MIME_PART_UNDEFINED && + !(part->flags & RSPAMD_MIME_PART_NO_TEXT_EXTRACTION)) { rspamd_message_process_text_part_maybe (task, part); } } diff --git a/src/libmime/message.h b/src/libmime/message.h index 4549c056d..25bf70f77 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -31,9 +31,10 @@ struct rspamd_image; struct rspamd_archive; enum rspamd_mime_part_flags { - RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1), - RSPAMD_MIME_PART_BAD_CTE = (1 << 4), - RSPAMD_MIME_PART_MISSING_CTE = (1 << 5), + RSPAMD_MIME_PART_ATTACHEMENT = (1u << 1u), + RSPAMD_MIME_PART_BAD_CTE = (1u << 4u), + RSPAMD_MIME_PART_MISSING_CTE = (1u << 5u), + RSPAMD_MIME_PART_NO_TEXT_EXTRACTION = (1u << 6u), }; enum rspamd_mime_part_type {