* For mime parts set flag 'raw' and if we cannot determine charset of part or cannot

author Vsevolod Stakhov <vsevolod@rambler-co.ru>

Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)

committer Vsevolod Stakhov <vsevolod@rambler-co.ru>

Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)
author Vsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)
committer Vsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)
diff --git a/src/cfg_file.h b/src/cfg_file.h

index b1cbd61252b423c819b334ecd95a924aea588ff7..c91b419ba02ccc4991aa12473ae17f0a3c5766e2 100644 (file)
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -77,6 +77,7 @@ struct rspamd_regexp {
         enum rspamd_regexp_type type;                                   /**< regexp type                                                                                */
         char *regexp_text;                                                              /**< regexp text representation                                                 */
         GRegex *regexp;                                                                 /**< glib regexp structure                                                              */
+       GRegex *raw_regexp;                                                             /**< glib regexp structure for raw matching                             */
         char *header;                                                                   /**< header name for header regexps                                             */
  };
  
diff --git a/src/expressions.c b/src/expressions.c

index 8dfdc17b3cfa499b7e1496570339ec5d882ced5e..08570834280f654719f010902dfc6e789e5c9633 100644 (file)
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -632,12 +632,20 @@ parse_regexp (memory_pool_t *pool, char *line)
         result->regexp = g_regex_new (begin, regexp_flags, 0, &err);
         result->regexp_text = memory_pool_strdup (pool, begin);
         memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->regexp);
-       *end = '/';
  
         if (result->regexp == NULL || err != NULL) {
+               *end = '/';
                 msg_warn ("parse_regexp: could not read regexp: %s while reading regexp %s", err->message, src);
                 return NULL;
         }
+       result->raw_regexp = g_regex_new (begin, regexp_flags | G_REGEX_RAW, 0, &err);
+       memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->raw_regexp);
+       *end = '/';
+
+       if (result->raw_regexp == NULL || err != NULL) {
+               msg_warn ("parse_regexp: could not read raw regexp: %s while reading regexp %s", err->message, src);
+               return NULL;
+       }
         
         /* Add to cache for further usage */
         re_cache_add (result->regexp_text, result);
diff --git a/src/message.c b/src/message.c

index 32d9bd6731746d01d7f7f22fef7aa19fd1c3036f..14f9245cb2450c5b08ada43b5706ec6f36fdce73 100644 (file)
--- a/src/message.c
+++ b/src/message.c
@@ -235,7 +235,7 @@ free_byte_array_callback (void *pointer)
  }
  
  static GByteArray *
-convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type)
+convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type, struct mime_text_part *text_part)
  {
         GError *err = NULL;
         gsize read_bytes, write_bytes;
@@ -244,10 +244,12 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
         GByteArray *result_array;
  
         if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
-               charset = "ASCII";
+               text_part->is_raw = TRUE;
+               return part_content;
         }
         
         if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+               text_part->is_raw = TRUE;
                 return part_content;
         }
         
@@ -256,6 +258,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
                                                                           &read_bytes, &write_bytes, &err);
         if (res_str == NULL) {
                 msg_warn ("convert_text_to_utf: cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+               text_part->is_raw = TRUE;
                 return part_content;
         }
  
@@ -263,6 +266,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
         result_array->data = res_str;
         result_array->len = write_bytes + 1;
         memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, res_str);
+       text_part->is_raw = FALSE;
  
         return result_array;
  }
@@ -277,7 +281,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
                 url_parse_html (task, part_content);
  
                 text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
-               text_part->orig = convert_text_to_utf (task, part_content, type);
+               text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
                 text_part->content = strip_html_tags (part_content, NULL);
                 text_part->is_html = TRUE;
                 text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -289,7 +293,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
                 url_parse_text (task, part_content);
  
                 text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
-               text_part->orig = convert_text_to_utf (task, part_content, type);
+               text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
                 text_part->content = part_content;
                 text_part->is_html = FALSE;
                 text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
diff --git a/src/message.h b/src/message.h

index c67d14589c4c569a3c15eb3161581156ddb1683a..9e9b5de1fb05c8f377c4b84344377393d204c7d0 100644 (file)
--- a/src/message.h
+++ b/src/message.h
@@ -16,6 +16,7 @@ struct mime_part {
  
  struct mime_text_part {
         gboolean is_html;
+       gboolean is_raw;
         GByteArray *orig;
         GByteArray *content;
         fuzzy_hash_t *fuzzy;
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c

index a05e0e0e066e34c3280237fbd132e65d35b15e88..fa9eafdd498dda54b3a570ff84bb2603a0ede9d5 100644 (file)
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -157,6 +157,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
         char *headerv, *c, t;
         struct mime_text_part *part;
         GList *cur, *headerlist;
+       GRegex *regexp;
         struct uri *url;
         int r;
  
@@ -209,7 +210,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
                         cur = g_list_first (task->text_parts);
                         while (cur) {
                                 part = (struct mime_text_part *)cur->data;
-                               if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
+                               if (part->is_raw) {
+                                       regexp = re->raw_regexp;
+                               }
+                               else {
+                                       regexp = re->regexp;
+                               }
+                               if (g_regex_match_full (regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
                                         task_cache_add (task, re, 1);
                                         return 1;
                                 }
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>
	Wed, 15 Apr 2009 13:01:01 +0000 (17:01 +0400)
src/cfg_file.h		patch \| blob \| history
src/expressions.c		patch \| blob \| history
src/message.c		patch \| blob \| history
src/message.h		patch \| blob \| history
src/plugins/regexp.c		patch \| blob \| history