summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-15 17:01:01 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-15 17:01:01 +0400
commitd50dff03fdf56db4a24cf44e4e9eec70c69e81c3 (patch)
tree480a33e817a8bdcc66733f0712c101fd42c62947 /src
parent086e9da19d8ceaa605b1151c93b229a2e1040e79 (diff)
downloadrspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.tar.gz
rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.zip
* For mime parts set flag 'raw' and if we cannot determine charset of part or cannot
encode it to utf8 just use for such parts raw regexps
Diffstat (limited to 'src')
-rw-r--r--src/cfg_file.h1
-rw-r--r--src/expressions.c10
-rw-r--r--src/message.c12
-rw-r--r--src/message.h1
-rw-r--r--src/plugins/regexp.c9
5 files changed, 27 insertions, 6 deletions
diff --git a/src/cfg_file.h b/src/cfg_file.h
index b1cbd6125..c91b419ba 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -77,6 +77,7 @@ struct rspamd_regexp {
enum rspamd_regexp_type type; /**< regexp type */
char *regexp_text; /**< regexp text representation */
GRegex *regexp; /**< glib regexp structure */
+ GRegex *raw_regexp; /**< glib regexp structure for raw matching */
char *header; /**< header name for header regexps */
};
diff --git a/src/expressions.c b/src/expressions.c
index 8dfdc17b3..085708342 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -632,12 +632,20 @@ parse_regexp (memory_pool_t *pool, char *line)
result->regexp = g_regex_new (begin, regexp_flags, 0, &err);
result->regexp_text = memory_pool_strdup (pool, begin);
memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->regexp);
- *end = '/';
if (result->regexp == NULL || err != NULL) {
+ *end = '/';
msg_warn ("parse_regexp: could not read regexp: %s while reading regexp %s", err->message, src);
return NULL;
}
+ result->raw_regexp = g_regex_new (begin, regexp_flags | G_REGEX_RAW, 0, &err);
+ memory_pool_add_destructor (pool, (pool_destruct_func)g_regex_unref, (void *)result->raw_regexp);
+ *end = '/';
+
+ if (result->raw_regexp == NULL || err != NULL) {
+ msg_warn ("parse_regexp: could not read raw regexp: %s while reading regexp %s", err->message, src);
+ return NULL;
+ }
/* Add to cache for further usage */
re_cache_add (result->regexp_text, result);
diff --git a/src/message.c b/src/message.c
index 32d9bd673..14f9245cb 100644
--- a/src/message.c
+++ b/src/message.c
@@ -235,7 +235,7 @@ free_byte_array_callback (void *pointer)
}
static GByteArray *
-convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type)
+convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type, struct mime_text_part *text_part)
{
GError *err = NULL;
gsize read_bytes, write_bytes;
@@ -244,10 +244,12 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
GByteArray *result_array;
if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
- charset = "ASCII";
+ text_part->is_raw = TRUE;
+ return part_content;
}
if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+ text_part->is_raw = TRUE;
return part_content;
}
@@ -256,6 +258,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
&read_bytes, &write_bytes, &err);
if (res_str == NULL) {
msg_warn ("convert_text_to_utf: cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+ text_part->is_raw = TRUE;
return part_content;
}
@@ -263,6 +266,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
result_array->data = res_str;
result_array->len = write_bytes + 1;
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, res_str);
+ text_part->is_raw = FALSE;
return result_array;
}
@@ -277,7 +281,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
url_parse_html (task, part_content);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
- text_part->orig = convert_text_to_utf (task, part_content, type);
+ text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
text_part->content = strip_html_tags (part_content, NULL);
text_part->is_html = TRUE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -289,7 +293,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
url_parse_text (task, part_content);
text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
- text_part->orig = convert_text_to_utf (task, part_content, type);
+ text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
text_part->content = part_content;
text_part->is_html = FALSE;
text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
diff --git a/src/message.h b/src/message.h
index c67d14589..9e9b5de1f 100644
--- a/src/message.h
+++ b/src/message.h
@@ -16,6 +16,7 @@ struct mime_part {
struct mime_text_part {
gboolean is_html;
+ gboolean is_raw;
GByteArray *orig;
GByteArray *content;
fuzzy_hash_t *fuzzy;
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index a05e0e0e0..fa9eafdd4 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -157,6 +157,7 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
char *headerv, *c, t;
struct mime_text_part *part;
GList *cur, *headerlist;
+ GRegex *regexp;
struct uri *url;
int r;
@@ -209,7 +210,13 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
cur = g_list_first (task->text_parts);
while (cur) {
part = (struct mime_text_part *)cur->data;
- if (g_regex_match_full (re->regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
+ if (part->is_raw) {
+ regexp = re->raw_regexp;
+ }
+ else {
+ regexp = re->regexp;
+ }
+ if (g_regex_match_full (regexp, part->orig->data, part->orig->len, 0, 0, NULL, NULL) == TRUE) {
task_cache_add (task, re, 1);
return 1;
}