aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-24 16:52:59 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-04-24 16:52:59 +0100
commitac7140f3be4c97d27f3da58500be1be1594b442c (patch)
tree7ea8b1ce1384244bb9d87850fedf7bbbaea0b75b
parent6336f4e4794fb3c492154fbc8898baf070b32b62 (diff)
downloadrspamd-ac7140f3be4c97d27f3da58500be1be1594b442c.tar.gz
rspamd-ac7140f3be4c97d27f3da58500be1be1594b442c.zip
[Fetaure] Add method that detects 8 bit characters in text parts
-rw-r--r--src/libmime/message.h2
-rw-r--r--src/libmime/mime_encoding.c45
2 files changed, 47 insertions, 0 deletions
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 22f4fd24d..a6a6f0595 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -69,6 +69,8 @@ struct rspamd_mime_part {
#define RSPAMD_MIME_TEXT_PART_FLAG_BALANCED (1 << 1)
#define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 2)
#define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 3)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT (1 << 4)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 5)
#define IS_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
#define IS_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index fdcd19c7b..11f764e11 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -455,6 +455,36 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
return FALSE;
}
+/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
+#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128)
+
+static inline gboolean
+rspamd_mime_has_8bit (const guchar *beg, gsize len)
+{
+ unsigned long *w;
+ gsize i, leftover = len % sizeof (*w);
+
+ w = (unsigned long *)beg;
+
+ for (i = 0; i < len / sizeof (*w); i ++) {
+ if (hasmore (*w, 127)) {
+ return TRUE;
+ }
+
+ w ++;
+ }
+
+ beg = (const guchar *)w;
+
+ for (i = 0; i < leftover; i ++) {
+ if (beg[i] > 127) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
GByteArray *
rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
struct rspamd_mime_text_part *text_part)
@@ -468,12 +498,27 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
rspamd_ftok_t charset_tok;
struct rspamd_mime_part *part = text_part->mime_part;
+ if (rspamd_mime_has_8bit (text_part->raw.begin, text_part->raw.len)) {
+ text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT;
+ }
+
part_content = rspamd_mempool_alloc0 (task->task_pool, sizeof (GByteArray));
part_content->data = rspamd_mempool_alloc (task->task_pool,
text_part->parsed.len);
memcpy (part_content->data, text_part->parsed.begin, text_part->parsed.len);
part_content->len = text_part->parsed.len;
+ if (rspamd_mime_has_8bit (text_part->parsed.begin, text_part->parsed.len)) {
+ text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED;
+ }
+
+ if (!(text_part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED)) {
+ /* We don't care anymore about encoding */
+ SET_PART_UTF (text_part);
+
+ return part_content;
+ }
+
if (task->cfg && task->cfg->raw_mode) {
SET_PART_RAW (text_part);
return part_content;