aboutsummaryrefslogtreecommitdiffstats
path: root/src/libmime/message.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-01-12 14:53:19 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-01-12 14:53:19 +0000
commitf03ded05654f6fd62028e3dcaea461fe0116b96c (patch)
treebadfaaeb695f3e6a46f6d9e68c92b27c4b11e81e /src/libmime/message.c
parent12e3b592612d179a86e6ca566161ce467b497daa (diff)
downloadrspamd-f03ded05654f6fd62028e3dcaea461fe0116b96c.tar.gz
rspamd-f03ded05654f6fd62028e3dcaea461fe0116b96c.zip
[Feature] Core: Ignore and mark invisible spaces
Diffstat (limited to 'src/libmime/message.c')
-rw-r--r--src/libmime/message.c39
1 files changed, 37 insertions, 2 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index a15485339..4cb9e07b3 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -215,13 +215,15 @@ rspamd_mime_part_detect_language (struct rspamd_task *task,
}
static void
-rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
+rspamd_strip_newlines_parse (struct rspamd_task *task,
+ const gchar *begin, const gchar *pe,
struct rspamd_mime_text_part *part)
{
const gchar *p = begin, *c = begin;
gchar last_c = '\0';
gboolean crlf_added = FALSE;
gboolean url_open_bracket = FALSE;
+ UChar32 uc;
enum {
normal_char,
@@ -230,6 +232,39 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
} state = normal_char;
while (p < pe) {
+ if (IS_PART_UTF (part)) {
+ gint32 off = p - begin;
+ U8_NEXT (begin, off, pe - begin, uc);
+
+ if (uc != -1) {
+ while (p < pe) {
+ if (uc == 0x200b) {
+ /* Invisible space ! */
+ task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
+
+ if (p > c) {
+ g_byte_array_append (part->utf_stripped_content,
+ (const guint8 *) c, p - c);
+ c = begin + off;
+ p = c;
+ }
+
+ U8_NEXT (begin, off, pe - begin, uc);
+
+ if (uc != 0x200b) {
+ break;
+ }
+
+ p = begin + off;
+ c = p;
+ }
+ else {
+ break;
+ }
+ }
+ }
+ }
+
if (G_UNLIKELY (*p) == '\r') {
switch (state) {
case normal_char:
@@ -469,7 +504,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
p = (const gchar *)part->utf_content->data;
end = p + part->utf_content->len;
- rspamd_strip_newlines_parse (p, end, part);
+ rspamd_strip_newlines_parse (task, p, end, part);
for (i = 0; i < part->newlines->len; i ++) {
ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex));