* For mime parts set flag 'raw' and if we cannot determine charset of part or cannot

encode it to utf8 just use for such parts raw regexps
author: Vsevolod Stakhov <vsevolod@rambler-co.ru> 2009-04-15 17:01:01 +0400
committer: Vsevolod Stakhov <vsevolod@rambler-co.ru> 2009-04-15 17:01:01 +0400
commit: d50dff03fdf56db4a24cf44e4e9eec70c69e81c3 (patch)
tree: 480a33e817a8bdcc66733f0712c101fd42c62947 /src/message.c
parent: 086e9da19d8ceaa605b1151c93b229a2e1040e79 (diff)
download: rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.tar.gz
rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.zip
1 files changed, 8 insertions, 4 deletions
diff --git a/src/message.c b/src/message.c
index 32d9bd673..14f9245cb 100644
--- a/src/message.c
+++ b/src/message.c
@@ -235,7 +235,7 @@ free_byte_array_callback (void *pointer)
 }
 
 static GByteArray *
-convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type)
+convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeContentType *type, struct mime_text_part *text_part)
 {
 	GError *err = NULL;
 	gsize read_bytes, write_bytes;
@@ -244,10 +244,12 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
 	GByteArray *result_array;
 
 	if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) {
-		charset = "ASCII";
+		text_part->is_raw = TRUE;
+		return part_content;
 	}
 	
 	if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) {
+		text_part->is_raw = TRUE;
 		return part_content;
 	}
 	
@@ -256,6 +258,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
 									  &read_bytes, &write_bytes, &err);
 	if (res_str == NULL) {
 		msg_warn ("convert_text_to_utf: cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
+		text_part->is_raw = TRUE;
 		return part_content;
 	}
 
@@ -263,6 +266,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray *part_content, GMimeCo
 	result_array->data = res_str;
 	result_array->len = write_bytes + 1;
 	memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, res_str);
+	text_part->is_raw = FALSE;
 
 	return result_array;
 }
@@ -277,7 +281,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
 		url_parse_html (task, part_content);
 
 		text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
-		text_part->orig = convert_text_to_utf (task, part_content, type);
+		text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
 		text_part->content = strip_html_tags (part_content, NULL);
 		text_part->is_html = TRUE;
 		text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
@@ -289,7 +293,7 @@ process_text_part (struct worker_task *task, GByteArray *part_content, GMimeCont
 		url_parse_text (task, part_content);
 
 		text_part = memory_pool_alloc (task->task_pool, sizeof (struct mime_text_part));
-		text_part->orig = convert_text_to_utf (task, part_content, type);
+		text_part->orig = convert_text_to_utf (task, part_content, type, text_part);
 		text_part->content = part_content;
 		text_part->is_html = FALSE;
 		text_part->fuzzy = fuzzy_init_byte_array (text_part->content, task->task_pool);
author	Vsevolod Stakhov <vsevolod@rambler-co.ru>	2009-04-15 17:01:01 +0400
committer	Vsevolod Stakhov <vsevolod@rambler-co.ru>	2009-04-15 17:01:01 +0400
commit	d50dff03fdf56db4a24cf44e4e9eec70c69e81c3 (patch)
tree	480a33e817a8bdcc66733f0712c101fd42c62947 /src/message.c
parent	086e9da19d8ceaa605b1151c93b229a2e1040e79 (diff)
download	rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.tar.gz rspamd-d50dff03fdf56db4a24cf44e4e9eec70c69e81c3.zip