diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-12 21:13:04 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-03-12 21:13:04 +0000 |
commit | 3377d26289d64f828169c6dfbf4c71b3f5cdea09 (patch) | |
tree | cc76df2667f8fcccfe288ead0776e39405d01ec5 /src/libmime/mime_parser.c | |
parent | d879f9e5ba06e0e7ff37fd99edb8a9b39c58b39c (diff) | |
download | rspamd-3377d26289d64f828169c6dfbf4c71b3f5cdea09.tar.gz rspamd-3377d26289d64f828169c6dfbf4c71b3f5cdea09.zip |
[Minor] Treat absent Content-Type more wisely
Diffstat (limited to 'src/libmime/mime_parser.c')
-rw-r--r-- | src/libmime/mime_parser.c | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c index 0d6659867..a89b76b3a 100644 --- a/src/libmime/mime_parser.c +++ b/src/libmime/mime_parser.c @@ -25,6 +25,7 @@ #include "contrib/uthash/utlist.h" #include <openssl/cms.h> #include <openssl/pkcs7.h> +#include "contrib/fastutf8/fastutf8.h" struct rspamd_mime_parser_lib_ctx { struct rspamd_multipattern *mp_boundary; @@ -610,8 +611,20 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task, * In theory, it is very unsafe to process it as a text part * as we unlikely get some sane result */ - part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT; - part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; + + /* + * On the other hand, there is an evidence that some + * emails actually rely on that. + * So we apply an expensive hack here: + * if there are no 8bit characters -OR- the content is valid + * UTF8, we can still imply Content-Type == text/plain + */ + + if (rspamd_str_has_8bit (part->raw_data.begin, part->raw_data.len) && + !rspamd_fast_utf8_validate (part->raw_data.begin, part->raw_data.len)) { + part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT; + part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN; + } } } |