summaryrefslogtreecommitdiffstats
path: root/src/libmime
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-22 13:07:01 +0000
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-12-22 13:07:01 +0000
commit40f6e53fe3c30d5272c1efd4bbd2f527a92990aa (patch)
tree1c9183eb7977c7f281e6c57c3d3af263a3de0be4 /src/libmime
parentb75412f4eb755a236d8db83cf44d569c5aaf0aba (diff)
downloadrspamd-40f6e53fe3c30d5272c1efd4bbd2f527a92990aa.tar.gz
rspamd-40f6e53fe3c30d5272c1efd4bbd2f527a92990aa.zip
[Feature] Filter non-utf chars from all decoded headers
Diffstat (limited to 'src/libmime')
-rw-r--r--src/libmime/mime_encoding.c54
-rw-r--r--src/libmime/mime_encoding.h8
-rw-r--r--src/libmime/mime_headers.c2
3 files changed, 40 insertions, 24 deletions
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index b33312faf..adde740be 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -274,13 +274,39 @@ rspamd_mime_to_utf8_byte_array (GByteArray *in,
return TRUE;
}
-gboolean
-rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
- gchar *in, gsize len)
+void
+rspamd_mime_charset_utf_enforce (gchar *in, gsize len)
{
const gchar *end, *p;
gsize remain = len;
+ /* Now we validate input and replace bad characters with '?' symbol */
+ p = in;
+
+ while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
+ gchar *valid;
+
+ valid = g_utf8_find_next_char (end, in + len);
+
+ if (!valid) {
+ valid = in + len;
+ }
+
+ if (valid > end) {
+ memset ((gchar *)end, '?', valid - end);
+ p = valid;
+ remain = (in + len) - p;
+ }
+ else {
+ break;
+ }
+ }
+}
+
+gboolean
+rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
+ gchar *in, gsize len)
+{
if (utf_compatible_re == NULL) {
utf_compatible_re = rspamd_regexp_new (
"^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi)|(?:us)|(?:ISO-8859-1)|"
@@ -290,27 +316,7 @@ rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
if (rspamd_regexp_match (utf_compatible_re, charset->begin, charset->len,
TRUE)) {
- /* Now we validate input and replace bad characters with '?' symbol */
- p = in;
-
- while (remain > 0 && !g_utf8_validate (p, remain, &end)) {
- gchar *valid;
-
- valid = g_utf8_find_next_char (end, in + len);
-
- if (!valid) {
- valid = in + len;
- }
-
- if (valid > end) {
- memset ((gchar *)end, '?', valid - end);
- p = valid;
- remain = (in + len) - p;
- }
- else {
- break;
- }
- }
+ rspamd_mime_charset_utf_enforce (in, len);
return TRUE;
}
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
index a4999266f..c1a24eeca 100644
--- a/src/libmime/mime_encoding.h
+++ b/src/libmime/mime_encoding.h
@@ -78,4 +78,12 @@ GByteArray * rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
gboolean rspamd_mime_charset_utf_check (rspamd_ftok_t *charset,
gchar *in, gsize len);
+/**
+ * Ensure that all characters in string are valid utf8 chars or replace them
+ * with '?'
+ * @param in
+ * @param len
+ */
+void rspamd_mime_charset_utf_enforce (gchar *in, gsize len);
+
#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index f2f2c2270..9b65f1ecf 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -227,6 +227,8 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
new->decoded = "";
}
+ /* We also validate utf8 and replace all non-valid utf8 chars */
+ rspamd_mime_charset_utf_enforce (new->decoded, strlen (new->decoded));
rspamd_mime_header_add (task, target, new);
state = 0;
break;