#define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1)
#define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2)
#define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
return FALSE;
}
+/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
+#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128)
+
+static inline gboolean
+rspamd_mime_has_8bit (const guchar *beg, gsize len)
+{
+ unsigned long *w;
+ gsize i, leftover = len % sizeof (*w);
+
+ w = (unsigned long *)beg;
+
+ for (i = 0; i < len / sizeof (*w); i ++) {
+ if (hasmore (*w, 127)) {
+ return TRUE;
+ }
+
+ w ++;
+ }
+
+ beg = (const guchar *)w;
+
+ for (i = 0; i < leftover; i ++) {
+ if (beg[i] > 127) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
GByteArray *
rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part)
rspamd_ftok_t charset_tok;
struct rspamd_mime_part *part = text_part->mime_part;
+ if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) {
+ text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT;
+ }
+
part_content = rspamd_mempool_alloc0 (task->task_pool, sizeof (GByteArray));
part_content->data = rspamd_mempool_alloc (task->task_pool,
text_part->parsed.len);
memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len);
part_content->len = text_part->parsed.len;
+ if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
+ text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED;
+ }
+
+ if (!(text_part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED)) {
+ /* We don't care anymore about encoding */
+ SET_PART_UTF (text_part);
+
+ return part_content;
+ }
+
if (task->cfg && task->cfg->raw_mode) {
SET_PART_RAW (text_part);
return part_content;