diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-15 17:01:01 +0400 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-04-15 17:01:01 +0400 |
commit | d50dff03fdf56db4a24cf44e4e9eec70c69e81c3 (patch) | |
tree | 480a33e817a8bdcc66733f0712c101fd42c62947 /src | |
parent | 086e9da19d8ceaa605b1151c93b229a2e1040e79 (diff) | |
download | rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.tar.gz rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.zip |
* For mime parts set flag 'raw' and if we cannot determine charset of part or cannot
encode it to utf8 just use for such parts raw regexps
Diffstat (limited to 'src')
-rw-r--r-- | src/cfg_file.h | 1 | ||||
-rw-r--r-- | src/expressions.c | 10 | ||||
-rw-r--r-- | src/message.c | 12 | ||||
-rw-r--r-- | src/message.h | 1 | ||||
-rw-r--r-- | src/plugins/regexp.c | 9 |
5 files changed, 27 insertions, 6 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h index b1cbd6125..c91b419ba 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -77,6 +77,7 @@ struct rspamd_regexp { enum rspamd_regexp_type type; /**< regexp type */ char *regexp_text; /**< regexp text representation */ GRegex *regexp; /**< glib regexp structure */ + GRegex *raw_regexp; /**< glib regexp structure for raw matching */ char *header; /**< header name for header regexps */ }; diff --git a/src/expressions.c b/src/expressions.c index 8dfdc17b3..085708342 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -632,12 +632,20 @@ parse_regexp (memory_pool_t *pool, char *line) result->regexp = g_regex_new (begin, regexp_flags, 0, &err); result->regexp_text = memory_pool_strdup (pool, begin); memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->regexp); - *end = '/'; if (result->regexp == NULL || err != NULL) { + *end = '/'; msg_warn ("parse_regexp: could not read regexp: %s while reading regexp %s", err->message, src); return NULL; } + result->raw_regexp = g_regex_new (begin, regexp_flags | G_REGEX_RAW, 0, &err); + memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->raw_regexp); + *end = '/'; + + if (result->raw_regexp == NULL || err != NULL) { + msg_warn ("parse_regexp: could not read raw regexp: %s while reading regexp %s", err->message, src); + return NULL; + } /* Add to cache for further usage */ re_cache_add (result->regexp_text, result); diff --git a/src/message.c b/src/message.c index 32d9bd673..14f9245cb 100644 --- a/src/message.c +++ b/src/message.c @@ -235,7 +235,7 @@ free_byte_array_callback (void *pointer) } static GByteArray * -convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type) +convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type, struct mime_text_part *text_part) { GError *err = NULL; gsize read_bytes, write_bytes; @@ -244,10 +244,12 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo GByteArray *result_array; if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { - charset = "ASCII"; + text_part->is_raw = TRUE; + return part_content; } if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { + text_part->is_raw = TRUE; return part_content; } @@ -256,6 +258,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo &read_bytes, &write_bytes, &err); if (res_str == NULL) { msg_warn ("convert_text_to_utf: cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem"); + text_part->is_raw = TRUE; return part_content; } @@ -263,6 +266,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo result_array->data = res_str; result_array->len = write_bytes + 1; memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, res_str); + text_part->is_raw = FALSE; return result_array; } @@ -277,7 +281,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont url_parse_html (task, part_content); text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part)); - text_part->orig = convert_text_to_utf (task, part_content, type); + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); text_part->content = strip_html_tags (part_content, NULL); text_part->is_html = TRUE; text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool); @@ -289,7 +293,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont url_parse_text (task, part_content); text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part)); - text_part->orig = convert_text_to_utf (task, part_content, type); + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); text_part->content = part_content; text_part->is_html = FALSE; text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool); diff --git a/src/message.h b/src/message.h index c67d14589..9e9b5de1f 100644 --- a/src/message.h +++ b/src/message.h @@ -16,6 +16,7 @@ struct mime_part { struct mime_text_part { gboolean is_html; + gboolean is_raw; GByteArray *orig; GByteArray *content; fuzzy_hash_t *fuzzy; diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c index a05e0e0e0..fa9eafdd4 100644 --- a/src/plugins/regexp.c +++ b/src/plugins/regexp.c @@ -157,6 +157,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task) char *headerv, *c, t; struct mime_text_part *part; GList *cur, *headerlist; + GRegex *regexp; struct uri *url; int r; @@ -209,7 +210,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task) cur = g_list_first (task->text_parts); while (cur) { part = (struct mime_text_part *)cur->data; - if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) { + if (part->is_raw) { + regexp = re->raw_regexp; + } + else { + regexp = re->regexp; + } + if (g_regex_match_full (regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) { task_cache_add (task, re, 1); return 1; } |