From b441439d550de340e892903b1309fb35bfba6312 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 17 Oct 2011 14:17:00 +0300 Subject: [PATCH] Check utf8 characters before gregex checks as they assume input to be a utf8 valid string. --- src/lua/lua_regexp.c | 7 +++++++ src/message.c | 25 +++++++++++++++++-------- src/plugins/regexp.c | 15 +++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/lua/lua_regexp.c b/src/lua/lua_regexp.c index 100b106fe..ba9a4dabc 100644 --- a/src/lua/lua_regexp.c +++ b/src/lua/lua_regexp.c @@ -160,6 +160,13 @@ lua_regexp_match (lua_State *L) if (re) { data = luaL_checkstring (L, 2); if (data) { + if ((g_regex_get_compile_flags (re) & G_REGEX_RAW) == 0) { + /* Validate input */ + if (!g_utf8_validate (data, -1, NULL)) { + lua_pushnil (L); + return 1; + } + } if (g_regex_match_full (re, data, -1, 0, 0, &mi, NULL)) { matches = g_match_info_fetch_all (mi); lua_newtable (L); diff --git a/src/message.c b/src/message.c index 0298a97e5..19d56f7df 100644 --- a/src/message.c +++ b/src/message.c @@ -710,14 +710,21 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC } if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; - return part_content; + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + return part_content; + } + else { + msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); + text_part->is_raw = TRUE; + return part_content; + } } res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); if (res_str == NULL) { - msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem"); + msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); text_part->is_raw = TRUE; return part_content; } @@ -986,6 +993,12 @@ process_message (struct worker_task *task) task->message = message; memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message); + /* Save message id for future use */ + task->message_id = g_mime_message_get_message_id (task->message); + if (task->message_id == NULL) { + task->message_id = "undef"; + } + task->parser_recursion = 0; #ifdef GMIME24 g_mime_message_foreach (message, mime_foreach_callback, task); @@ -1003,10 +1016,6 @@ process_message (struct worker_task *task) if (task->queue_id == NULL) { task->queue_id = "undef"; } - task->message_id = g_mime_message_get_message_id (task->message); - if (task->message_id == NULL) { - task->message_id = "undef"; - } #ifdef GMIME24 task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index df3675e93..519666662 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -696,6 +696,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar while (cur) { debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data); /* Try to match regexp */ + if (!re->is_raw) { + /* Validate input */ + if (!g_utf8_validate (cur->data, -1, NULL)) { + cur = g_list_next (cur); + continue; + } + } if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) { if (G_UNLIKELY (re->is_test)) { msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data); @@ -746,6 +753,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar regexp = re->raw_regexp; } else { + /* This time there is no need to validate anything as conversion succeed only for valid characters */ regexp = re->regexp; } /* Select data for regexp */ @@ -913,6 +921,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data); rh = cur->data; /* Try to match regexp */ + if (!re->is_raw) { + /* Validate input */ + if (!g_utf8_validate (rh->value, -1, NULL)) { + cur = g_list_next (cur); + continue; + } + } if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) { if (G_UNLIKELY (re->is_test)) { msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data); -- 2.39.5