From af5f57916e4345d988802794c84460960ee47d0c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Thu, 6 Sep 2018 16:24:28 +0100 Subject: [PATCH] [Minor] Add UText wrapper for stripped content --- src/libmime/message.c | 15 ++++++++++++++- src/libmime/message.h | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/libmime/message.c b/src/libmime/message.c index 388ab0aa3..e59d34b25 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -495,11 +495,11 @@ static void rspamd_normalize_text_part (struct rspamd_task *task, struct rspamd_mime_text_part *part) { - const gchar *p, *end; guint i; goffset off; struct rspamd_process_exception *ex; + UErrorCode uc_err = U_ZERO_ERROR; part->newlines = g_ptr_array_sized_new (128); @@ -526,6 +526,18 @@ rspamd_normalize_text_part (struct rspamd_task *task, } } + if (IS_PART_UTF (part)) { + utext_openUTF8 (&part->utf_stripped_text, + part->utf_stripped_content->data, + part->utf_stripped_content->len, + &uc_err); + + if (!U_SUCCESS (uc_err)) { + msg_warn_task ("cannot open text from utf content"); + /* Probably, should be an assertion */ + } + } + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, part->utf_stripped_content); @@ -833,6 +845,7 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task, text_part->raw.len = mime_part->raw_data.len; text_part->parsed.begin = mime_part->parsed_data.begin; text_part->parsed.len = mime_part->parsed_data.len; + text_part->utf_stripped_text = (UText)UTEXT_INITIALIZER; if (found_html) { if (!rspamd_message_process_html_text_part (task, text_part)) { diff --git a/src/libmime/message.h b/src/libmime/message.h index e4b5a3d4b..f4dbdaa72 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -14,6 +14,7 @@ #include "content_type.h" #include +#include struct rspamd_task; struct controller_session; @@ -97,6 +98,7 @@ struct rspamd_mime_text_part { GByteArray *utf_stripped_content; /* utf content with no newlines */ GArray *normalized_hashes; GArray *utf_words; + UText utf_stripped_text; /* Used by libicu to represent the utf8 content */ /* Unicode content, used by libicu */ GArray *unicode_raw_content; /* unicode raw content (of UChar) */ -- 2.39.5