diff options
Diffstat (limited to 'src/libmime/mime_encoding.c')
-rw-r--r-- | src/libmime/mime_encoding.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index fdcd19c7b..11f764e11 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -455,6 +455,36 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, return FALSE; } +/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */ +#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128) + +static inline gboolean +rspamd_mime_has_8bit (const guchar *beg, gsize len) +{ + unsigned long *w; + gsize i, leftover = len % sizeof (*w); + + w = (unsigned long *)beg; + + for (i = 0; i < len / sizeof (*w); i ++) { + if (hasmore (*w, 127)) { + return TRUE; + } + + w ++; + } + + beg = (const guchar *)w; + + for (i = 0; i < leftover; i ++) { + if (beg[i] > 127) { + return TRUE; + } + } + + return FALSE; +} + GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, struct rspamd_mime_text_part *text_part) @@ -468,12 +498,27 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, rspamd_ftok_t charset_tok; struct rspamd_mime_part *part = text_part->mime_part; + if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) { + text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT; + } + part_content = rspamd_mempool_alloc0 (task->task_pool, sizeof (GByteArray)); part_content->data = rspamd_mempool_alloc (task->task_pool, text_part->parsed.len); memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len); part_content->len = text_part->parsed.len; + if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) { + text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED; + } + + if (!(text_part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED)) { + /* We don't care anymore about encoding */ + SET_PART_UTF (text_part); + + return part_content; + } + if (task->cfg && task->cfg->raw_mode) { SET_PART_RAW (text_part); return part_content; |