diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-01-12 14:53:19 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-01-12 14:53:19 +0000 |
commit | f03ded05654f6fd62028e3dcaea461fe0116b96c (patch) | |
tree | badfaaeb695f3e6a46f6d9e68c92b27c4b11e81e /src/libmime/message.c | |
parent | 12e3b592612d179a86e6ca566161ce467b497daa (diff) | |
download | rspamd-f03ded05654f6fd62028e3dcaea461fe0116b96c.tar.gz rspamd-f03ded05654f6fd62028e3dcaea461fe0116b96c.zip |
[Feature] Core: Ignore and mark invisible spaces
Diffstat (limited to 'src/libmime/message.c')
-rw-r--r-- | src/libmime/message.c | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c index a15485339..4cb9e07b3 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -215,13 +215,15 @@ rspamd_mime_part_detect_language (struct rspamd_task *task, } static void -rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, +rspamd_strip_newlines_parse (struct rspamd_task *task, + const gchar *begin, const gchar *pe, struct rspamd_mime_text_part *part) { const gchar *p = begin, *c = begin; gchar last_c = '\0'; gboolean crlf_added = FALSE; gboolean url_open_bracket = FALSE; + UChar32 uc; enum { normal_char, @@ -230,6 +232,39 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, } state = normal_char; while (p < pe) { + if (IS_PART_UTF (part)) { + gint32 off = p - begin; + U8_NEXT (begin, off, pe - begin, uc); + + if (uc != -1) { + while (p < pe) { + if (uc == 0x200b) { + /* Invisible space ! */ + task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE; + + if (p > c) { + g_byte_array_append (part->utf_stripped_content, + (const guint8 *) c, p - c); + c = begin + off; + p = c; + } + + U8_NEXT (begin, off, pe - begin, uc); + + if (uc != 0x200b) { + break; + } + + p = begin + off; + c = p; + } + else { + break; + } + } + } + } + if (G_UNLIKELY (*p) == '\r') { switch (state) { case normal_char: @@ -469,7 +504,7 @@ rspamd_normalize_text_part (struct rspamd_task *task, p = (const gchar *)part->utf_content->data; end = p + part->utf_content->len; - rspamd_strip_newlines_parse (p, end, part); + rspamd_strip_newlines_parse (task, p, end, part); for (i = 0; i < part->newlines->len; i ++) { ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex)); |