From cdfe9ccd2c199b038b2b9005be92865f16b6e360 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 19 Nov 2014 15:19:49 +0000 Subject: [PATCH] Fix raw vs parsed reperesentations. Raw parts are now: - decoded b64/qp, but *NOT* converted to utf-8 Processed parts are now: - converted to UTF-8 - normalized if needed (e.g. HTML tags are stripped) --- src/libmime/message.c | 11 ++++++----- src/libserver/url.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index f6c84e123..b93810cb0 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1066,8 +1066,9 @@ process_text_part (struct rspamd_task *task, task->text_parts = g_list_prepend (task->text_parts, text_part); return; } - text_part->orig = convert_text_to_utf (task, - part_content, + text_part->orig = part_content; + part_content = convert_text_to_utf (task, + text_part->orig, type, text_part); text_part->is_balanced = TRUE; @@ -1077,7 +1078,7 @@ process_text_part (struct rspamd_task *task, text_part->content = strip_html_tags (task, task->task_pool, text_part, - text_part->orig, + part_content, NULL); if (text_part->html_nodes != NULL) { @@ -1107,11 +1108,11 @@ process_text_part (struct rspamd_task *task, task->text_parts = g_list_prepend (task->text_parts, text_part); return; } - text_part->orig = convert_text_to_utf (task, + text_part->content = convert_text_to_utf (task, part_content, type, text_part); - text_part->content = text_part->orig; + text_part->orig = part_content; url_parse_text (task->task_pool, task, text_part, FALSE); fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); task->text_parts = g_list_prepend (task->text_parts, text_part); diff --git a/src/libserver/url.c b/src/libserver/url.c index 13c7cde7a..f64258625 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -1829,7 +1829,7 @@ url_parse_text (rspamd_mempool_t * pool, gchar *p, *end, *begin; - if (!part->orig->data || part->orig->len == 0) { + if (part->content == NULL || part->content->len == 0) { msg_warn ("got empty text part"); return; } -- 2.39.5