diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-22 13:07:01 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-12-22 13:07:01 +0000 |
commit | 40f6e53fe3c30d5272c1efd4bbd2f527a92990aa (patch) | |
tree | 1c9183eb7977c7f281e6c57c3d3af263a3de0be4 /src/libmime | |
parent | b75412f4eb755a236d8db83cf44d569c5aaf0aba (diff) | |
download | rspamd-40f6e53fe3c30d5272c1efd4bbd2f527a92990aa.tar.gz rspamd-40f6e53fe3c30d5272c1efd4bbd2f527a92990aa.zip |
[Feature] Filter non-utf chars from all decoded headers
Diffstat (limited to 'src/libmime')
-rw-r--r-- | src/libmime/mime_encoding.c | 54 | ||||
-rw-r--r-- | src/libmime/mime_encoding.h | 8 | ||||
-rw-r--r-- | src/libmime/mime_headers.c | 2 |
3 files changed, 40 insertions, 24 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c index b33312faf..adde740be 100644 --- a/src/libmime/mime_encoding.c +++ b/src/libmime/mime_encoding.c @@ -274,13 +274,39 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in, return TRUE; } -gboolean -rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, - gchar *in, gsize len) +void +rspamd_mime_charset_utf_enforce (gchar *in, gsize len) { const gchar *end, *p; gsize remain = len; + /* Now we validate input and replace bad characters with '?' symbol */ + p = in; + + while (remain > 0 && !g_utf8_validate (p, remain, &end)) { + gchar *valid; + + valid = g_utf8_find_next_char (end, in + len); + + if (!valid) { + valid = in + len; + } + + if (valid > end) { + memset ((gchar *)end, '?', valid - end); + p = valid; + remain = (in + len) - p; + } + else { + break; + } + } +} + +gboolean +rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, + gchar *in, gsize len) +{ if (utf_compatible_re == NULL) { utf_compatible_re = rspamd_regexp_new ( "^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi)|(?:us)|(?:ISO-8859-1)|" @@ -290,27 +316,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, if (rspamd_regexp_match (utf_compatible_re, charset->begin, charset->len, TRUE)) { - /* Now we validate input and replace bad characters with '?' symbol */ - p = in; - - while (remain > 0 && !g_utf8_validate (p, remain, &end)) { - gchar *valid; - - valid = g_utf8_find_next_char (end, in + len); - - if (!valid) { - valid = in + len; - } - - if (valid > end) { - memset ((gchar *)end, '?', valid - end); - p = valid; - remain = (in + len) - p; - } - else { - break; - } - } + rspamd_mime_charset_utf_enforce (in, len); return TRUE; } diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h index a4999266f..c1a24eeca 100644 --- a/src/libmime/mime_encoding.h +++ b/src/libmime/mime_encoding.h @@ -78,4 +78,12 @@ GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task, gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset, gchar *in, gsize len); +/** + * Ensure that all characters in string are valid utf8 chars or replace them + * with '?' + * @param in + * @param len + */ +void rspamd_mime_charset_utf_enforce (gchar *in, gsize len); + #endif /* SRC_LIBMIME_MIME_ENCODING_H_ */ diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c index f2f2c2270..9b65f1ecf 100644 --- a/src/libmime/mime_headers.c +++ b/src/libmime/mime_headers.c @@ -227,6 +227,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target, new->decoded = ""; } + /* We also validate utf8 and replace all non-valid utf8 chars */ + rspamd_mime_charset_utf_enforce (new->decoded, strlen (new->decoded)); rspamd_mime_header_add (task, target, new); state = 0; break; |