diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-10-17 14:17:00 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2011-10-17 14:17:00 +0300 |
commit | b441439d550de340e892903b1309fb35bfba6312 (patch) | |
tree | e375e802f6a1173abe9dd29053e1e5ff2aae4123 /src/message.c | |
parent | 350af45ecb3cdd4fc08989ee5365f8e9a5044e83 (diff) | |
download | rspamd-b441439d550de340e892903b1309fb35bfba6312.tar.gz rspamd-b441439d550de340e892903b1309fb35bfba6312.zip |
Check utf8 characters before gregex checks as they assume input to be a utf8 valid string.
Diffstat (limited to 'src/message.c')
-rw-r--r-- | src/message.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/src/message.c b/src/message.c index 0298a97e5..19d56f7df 100644 --- a/src/message.c +++ b/src/message.c @@ -710,14 +710,21 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC } if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; - return part_content; + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + return part_content; + } + else { + msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); + text_part->is_raw = TRUE; + return part_content; + } } res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); if (res_str == NULL) { - msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem"); + msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); text_part->is_raw = TRUE; return part_content; } @@ -986,6 +993,12 @@ process_message (struct worker_task *task) task->message = message; memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message); + /* Save message id for future use */ + task->message_id = g_mime_message_get_message_id (task->message); + if (task->message_id == NULL) { + task->message_id = "undef"; + } + task->parser_recursion = 0; #ifdef GMIME24 g_mime_message_foreach (message, mime_foreach_callback, task); @@ -1003,10 +1016,6 @@ process_message (struct worker_task *task) if (task->queue_id == NULL) { task->queue_id = "undef"; } - task->message_id = g_mime_message_get_message_id (task->message); - if (task->message_id == NULL) { - task->message_id = "undef"; - } #ifdef GMIME24 task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); |