[Fix] Add a special logic for text part with no text extraction

This commit is contained in:
Vsevolod Stakhov 2021-08-16 11:19:40 +01:00
parent ef16ff4ddd
commit 229cf1c94b
3 changed files with 23 additions and 4 deletions

View File

@ -287,6 +287,7 @@ local types = {
xml = {
ct = 'application/xml',
type = 'text',
no_text = true,
},
txt = {
type = 'text',
@ -302,16 +303,19 @@ local types = {
type = 'text',
ct = 'text/csv',
av_check = false,
no_text = true,
},
ics = {
type = 'text',
ct = 'text/calendar',
av_check = false,
no_text = true,
},
vcf = {
type = 'text',
ct = 'text/vcard',
av_check = false,
no_text = true,
},
eml = {
type = 'message',

View File

@ -1446,6 +1446,19 @@ rspamd_message_process (struct rspamd_task *task)
part->detected_type = rspamd_mempool_strdup (task->task_pool,
lua_tostring (L, -1));
}
lua_pop (L, 1);
lua_pushstring (L, "no_text");
lua_gettable (L, -2);
if (lua_isboolean (L, -1)) {
if (!!lua_toboolean (L, -1)) {
part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
}
}
lua_pop (L, 1);
}
}
@ -1479,7 +1492,8 @@ rspamd_message_process (struct rspamd_task *task)
rspamd_images_process_mime_part_maybe (task, part);
/* Still no content detected, try text heuristic */
if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
if (part->part_type == RSPAMD_MIME_PART_UNDEFINED &&
!(part->flags & RSPAMD_MIME_PART_NO_TEXT_EXTRACTION)) {
rspamd_message_process_text_part_maybe (task, part);
}
}

View File

@ -31,9 +31,10 @@ struct rspamd_image;
struct rspamd_archive;
enum rspamd_mime_part_flags {
RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
RSPAMD_MIME_PART_MISSING_CTE = (1 << 5),
RSPAMD_MIME_PART_ATTACHEMENT = (1u << 1u),
RSPAMD_MIME_PART_BAD_CTE = (1u << 4u),
RSPAMD_MIME_PART_MISSING_CTE = (1u << 5u),
RSPAMD_MIME_PART_NO_TEXT_EXTRACTION = (1u << 6u),
};
enum rspamd_mime_part_type {