aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-08-25 14:24:56 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-08-25 14:24:56 +0100
commit3daeefad81c5ba90c22809c47b7bcf5df168523a (patch)
tree8f4eae424a6bbdab8f7a7de3fb63cf99afcc4371 /src
parentd224a23261ce2de4298f0e846d7b7866d841bf84 (diff)
downloadrspamd-3daeefad81c5ba90c22809c47b7bcf5df168523a.tar.gz
rspamd-3daeefad81c5ba90c22809c47b7bcf5df168523a.zip
Validate charset attribute of Content-Type.
Diffstat (limited to 'src')
-rw-r--r--src/libmime/message.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 7ee31a26d..7adde3d95 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -807,6 +807,40 @@ free_byte_array_callback (void *pointer)
g_byte_array_free (arr, TRUE);
}
+static gboolean
+charset_validate (rspamd_mempool_t *pool, const gchar *in, gchar **out)
+{
+ /*
+ * This is a simple routine to validate input charset
+ * we just check that charset starts with alphanumeric and ends
+ * with alphanumeric
+ */
+ const gchar *begin, *end;
+ gboolean changed = FALSE;
+
+ begin = in;
+
+ while (!g_ascii_isalnum (*begin)) {
+ begin ++;
+ changed = TRUE;
+ }
+ end = begin + strlen (begin) - 1;
+ while (!g_ascii_isalnum (*end)) {
+ end --;
+ changed = TRUE;
+ }
+
+ if (!changed) {
+ *out = (gchar *)in;
+ }
+ else {
+ *out = rspamd_mempool_alloc (pool, end - begin + 2);
+ rspamd_strlcpy (*out, begin, end - begin + 2);
+ }
+
+ return TRUE;
+}
+
static GByteArray *
convert_text_to_utf (struct rspamd_task *task,
GByteArray * part_content,
@@ -816,7 +850,7 @@ convert_text_to_utf (struct rspamd_task *task,
GError *err = NULL;
gsize read_bytes, write_bytes;
const gchar *charset;
- gchar *res_str;
+ gchar *res_str, *ocharset;
GByteArray *result_array;
if (task->cfg->raw_mode) {
@@ -829,9 +863,15 @@ convert_text_to_utf (struct rspamd_task *task,
text_part->is_raw = TRUE;
return part_content;
}
-
- if (g_ascii_strcasecmp (charset,
- "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+ if (!charset_validate (task->task_pool, charset, &ocharset)) {
+ msg_info (
+ "<%s>: has invalid charset",
+ task->message_id);
+ text_part->is_raw = TRUE;
+ return part_content;
+ }
+ if (g_ascii_strcasecmp (ocharset,
+ "utf-8") == 0 || g_ascii_strcasecmp (ocharset, "utf8") == 0) {
if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
text_part->is_raw = FALSE;
text_part->is_utf = TRUE;
@@ -849,7 +889,7 @@ convert_text_to_utf (struct rspamd_task *task,
res_str = g_convert_with_fallback (part_content->data,
part_content->len,
UTF8_CHARSET,
- charset,
+ ocharset,
NULL,
&read_bytes,
&write_bytes,
@@ -857,7 +897,7 @@ convert_text_to_utf (struct rspamd_task *task,
if (res_str == NULL) {
msg_warn ("<%s>: cannot convert from %s to utf8: %s",
task->message_id,
- charset,
+ ocharset,
err ? err->message : "unknown problem");
text_part->is_raw = TRUE;
return part_content;