]> source.dussan.org Git - rspamd.git/commitdiff
Check utf8 characters before gregex checks as they assume input to be a utf8 valid...
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 17 Oct 2011 11:17:00 +0000 (14:17 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Mon, 17 Oct 2011 11:17:00 +0000 (14:17 +0300)
src/lua/lua_regexp.c
src/message.c
src/plugins/regexp.c

index 100b106fe838433af7336ed568ad3a0ca7505c7c..ba9a4dabcc872adbc9997f9dfffe3494c9c7de3f 100644 (file)
@@ -160,6 +160,13 @@ lua_regexp_match (lua_State *L)
        if (re) {
                data = luaL_checkstring (L, 2);
                if (data) {
+                       if ((g_regex_get_compile_flags (re) & G_REGEX_RAW) == 0) {
+                               /* Validate input */
+                               if (!g_utf8_validate (data, -1, NULL)) {
+                                       lua_pushnil (L);
+                                       return 1;
+                               }
+                       }
                        if (g_regex_match_full (re, data, -1, 0, 0, &mi, NULL)) {
                                matches = g_match_info_fetch_all (mi);
                                lua_newtable (L);
index 0298a97e5cdb7051a49f2603be1c6b52fd777d02..19d56f7dffd58b0d3a387d5541dfa319f5a487c6 100644 (file)
@@ -710,14 +710,21 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
        }
 
        if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
-               text_part->is_raw = FALSE;
-               text_part->is_utf = TRUE;
-               return part_content;
+               if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+                       text_part->is_raw = FALSE;
+                       text_part->is_utf = TRUE;
+                       return part_content;
+               }
+               else {
+                       msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id);
+                       text_part->is_raw = TRUE;
+                       return part_content;
+               }
        }
 
        res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
        if (res_str == NULL) {
-               msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+               msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem");
                text_part->is_raw = TRUE;
                return part_content;
        }
@@ -986,6 +993,12 @@ process_message (struct worker_task *task)
                task->message = message;
                memory_pool_add_destructor (task->task_pool, (pool_destruct_func) destroy_message, task->message);
 
+               /* Save message id for future use */
+               task->message_id = g_mime_message_get_message_id (task->message);
+               if (task->message_id == NULL) {
+                       task->message_id = "undef";
+               }
+
                task->parser_recursion = 0;
 #ifdef GMIME24
                g_mime_message_foreach (message, mime_foreach_callback, task);
@@ -1003,10 +1016,6 @@ process_message (struct worker_task *task)
                if (task->queue_id == NULL) {
                        task->queue_id = "undef";
                }
-               task->message_id = g_mime_message_get_message_id (task->message);
-               if (task->message_id == NULL) {
-                       task->message_id = "undef";
-               }
 
 #ifdef GMIME24
                task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message));
index df3675e93355ff31f6b7cf8c8136324ef7210f77..5196666621df9b96531f4b2203ebfb127a589b8f 100644 (file)
@@ -696,6 +696,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                        while (cur) {
                                debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
                                /* Try to match regexp */
+                               if (!re->is_raw) {
+                                       /* Validate input */
+                                       if (!g_utf8_validate (cur->data, -1, NULL)) {
+                                               cur = g_list_next (cur);
+                                               continue;
+                                       }
+                               }
                                if (cur->data && g_regex_match_full (re->regexp, cur->data, -1, 0, 0, NULL, &err) == TRUE) {
                                        if (G_UNLIKELY (re->is_test)) {
                                                msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);
@@ -746,6 +753,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                                regexp = re->raw_regexp;
                        }
                        else {
+                               /* This time there is no need to validate anything as conversion succeed only for valid characters */
                                regexp = re->regexp;
                        }
                        /* Select data for regexp */
@@ -913,6 +921,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task, const gchar
                                debug_task ("found header \"%s\" with value \"%s\"", re->header, (const gchar *)cur->data);
                                rh = cur->data;
                                /* Try to match regexp */
+                               if (!re->is_raw) {
+                                       /* Validate input */
+                                       if (!g_utf8_validate (rh->value, -1, NULL)) {
+                                               cur = g_list_next (cur);
+                                               continue;
+                                       }
+                               }
                                if (rh->value && g_regex_match_full (re->regexp, rh->value, -1, 0, 0, NULL, &err) == TRUE) {
                                        if (G_UNLIKELY (re->is_test)) {
                                                msg_info ("process test regexp %s for header %s with value '%s' returned TRUE", re->regexp_text, re->header, (const gchar *)cur->data);